From aea13244b28b72cdf9dddfd20feaa565d43bfe3c Mon Sep 17 00:00:00 2001 From: "jiyong.min" Date: Wed, 2 Jan 2019 09:44:09 +0900 Subject: [PATCH] Imported Upstream version 2.0.1 Change-Id: I4f1f873f6161c53b4b6b71b001ef5147017c04ef --- .gitignore | 14 - .travis.yml | 131 - BUILDING.md | 933 +-- Brewfile | 4 + CMakeLists.txt | 1433 ++-- ChangeLog.md | 226 +- LICENSE.md | 16 +- Makefile.am | 794 -- README.ijg | 64 +- README.md | 13 +- acinclude.m4 | 287 - appveyor.yml | 57 - bmp.c | 341 - bmp.h | 42 - cderror.h | 11 +- cdjpeg.c | 21 +- cdjpeg.h | 56 +- ci/keys.enc | Bin 4624 -> 0 bytes cjpeg.1 | 5 +- cjpeg.c | 97 +- cmakescripts/BuildPackages.cmake | 177 + cmakescripts/GNUInstallDirs.cmake | 416 + cmakescripts/cmake_uninstall.cmake.in | 8 +- cmakescripts/testclean.cmake | 4 +- cmyk.h | 61 + configure.ac | 616 -- djpeg.1 | 3 + djpeg.c | 102 +- doc/html/annotated.html | 2 +- doc/html/classes.html | 2 +- doc/html/doxygen-extra.css | 2 +- doc/html/functions.html | 2 +- doc/html/functions_vars.html | 2 +- doc/html/group___turbo_j_p_e_g.html | 667 +- doc/html/index.html | 2 +- doc/html/modules.html | 2 +- doc/html/search/all_74.js | 59 +- doc/html/search/enums_74.js | 1 + doc/html/search/enumvalues_74.js | 3 + doc/html/search/functions_74.js | 49 +- doc/html/search/variables_74.js | 1 + doc/html/structtjregion.html | 2 +- doc/html/structtjscalingfactor.html | 2 +- doc/html/structtjtransform.html | 4 +- doxygen.config | 2 +- example.c => example.txt | 38 +- jaricom.c | 13 +- java/CMakeLists.txt | 123 +- java/Makefile.am | 75 - java/TJBench.java | 328 +- java/TJExample.java | 403 +- java/TJUnitTest.java | 289 +- java/doc/constant-values.html | 101 +- java/doc/deprecated-list.html | 8 +- java/doc/help-doc.html | 8 +- java/doc/index-all.html | 59 +- java/doc/index.html | 11 +- java/doc/org/libjpegturbo/turbojpeg/TJ.html | 224 +- .../org/libjpegturbo/turbojpeg/TJCompressor.html | 12 +- .../org/libjpegturbo/turbojpeg/TJCustomFilter.html | 8 +- .../org/libjpegturbo/turbojpeg/TJDecompressor.html | 32 +- .../org/libjpegturbo/turbojpeg/TJException.html | 67 +- .../libjpegturbo/turbojpeg/TJScalingFactor.html | 20 +- .../org/libjpegturbo/turbojpeg/TJTransform.html | 55 +- .../org/libjpegturbo/turbojpeg/TJTransformer.html | 8 +- java/doc/org/libjpegturbo/turbojpeg/YUVImage.html | 8 +- .../libjpegturbo/turbojpeg/package-summary.html | 8 +- .../org/libjpegturbo/turbojpeg/package-tree.html | 8 +- java/doc/overview-tree.html | 8 +- java/doc/script.js | 30 + java/doc/serialized-form.html | 21 +- java/org/libjpegturbo/turbojpeg/TJ.java | 125 +- java/org/libjpegturbo/turbojpeg/TJCompressor.java | 113 +- .../org/libjpegturbo/turbojpeg/TJDecompressor.java | 112 +- java/org/libjpegturbo/turbojpeg/TJException.java | 25 + .../{TJLoader.java.tmpl => TJLoader-unix.java.in} | 16 +- .../{TJLoader.java.in => TJLoader-win.java.in} | 0 .../libjpegturbo/turbojpeg/TJScalingFactor.java | 13 +- java/org/libjpegturbo/turbojpeg/TJTransform.java | 31 +- java/org/libjpegturbo/turbojpeg/TJTransformer.java | 6 +- java/org/libjpegturbo/turbojpeg/YUVImage.java | 16 +- jcapimin.c | 44 +- jcapistd.c | 26 +- jcarith.c | 104 +- jccoefct.c | 102 +- jccolext.c | 46 +- jccolor.c | 377 +- jcdctmgr.c | 118 +- jchuff.c | 255 +- jchuff.h | 13 +- jcicc.c | 105 + jcinit.c | 10 +- jcmainct.c | 40 +- jcmarker.c | 93 +- jcmaster.c | 135 +- jcomapi.c | 12 +- jconfig.h.in | 58 +- jconfig.txt | 2 +- jconfigint.h.in | 24 +- jcparam.c | 87 +- jcphuff.c | 675 +- jcprepct.c | 92 +- jcsample.c | 120 +- jcstest.c | 24 +- jctrans.c | 70 +- jdapimin.c | 42 +- jdapistd.c | 114 +- jdarith.c | 122 +- jdatadst-tj.c | 33 +- jdatadst.c | 50 +- jdatasrc-tj.c | 30 +- jdatasrc.c | 52 +- jdcoefct.c | 247 +- jdcoefct.h | 6 +- jdcol565.c | 88 +- jdcolext.c | 34 +- jdcolor.c | 478 +- jdct.h | 148 +- jddctmgr.c | 32 +- jdhuff.c | 167 +- jdhuff.h | 98 +- jdicc.c | 171 + jdinput.c | 99 +- jdmainct.c | 118 +- jdmainct.h | 12 +- jdmarker.c | 302 +- jdmaster.c | 173 +- jdmerge.c | 368 +- jdmrg565.c | 134 +- jdmrgext.c | 50 +- jdphuff.c | 141 +- jdpostct.c | 122 +- jdsample.c | 129 +- jdtrans.c | 14 +- jerror.c | 16 +- jerror.h | 99 +- jfdctflt.c | 58 +- jfdctfst.c | 50 +- jfdctint.c | 116 +- jidctflt.c | 112 +- jidctfst.c | 156 +- jidctint.c | 1722 ++-- jidctred.c | 254 +- jinclude.h | 20 +- jmemmgr.c | 304 +- jmemnobs.c | 32 +- jmemsys.h | 28 +- jmorecfg.h | 84 +- jpegcomp.h | 32 +- jpegint.h | 132 +- jpeglib.h | 210 +- jpegtran.1 | 5 + jpegtran.c | 100 +- jquant1.c | 192 +- jquant2.c | 357 +- jsimd.h | 182 +- jsimd_none.c | 244 +- jsimddct.h | 104 +- jstdhuff.c | 126 +- jutils.c | 20 +- jversion.h | 29 +- libjpeg.txt | 80 +- md5/Makefile.am | 4 - md5/md5.c | 535 +- md5/md5.h | 88 +- md5/md5cmp.c | 39 +- md5/md5hl.c | 164 +- rdbmp.c | 452 +- rdcolmap.c | 34 +- rdgif.c | 2 +- rdjpgcom.c | 79 +- rdppm.c | 445 +- rdrle.c | 72 +- rdswitch.c | 74 +- rdtarga.c | 128 +- release/{Distribution.xml => Distribution.xml.in} | 10 +- release/ReadMe.txt | 2 +- release/{deb-control.tmpl => deb-control.in} | 11 +- release/installer.nsi.in | 191 + release/libjpeg-turbo.nsi.in | 162 - release/libjpeg-turbo.spec.in | 164 - release/libjpeg.pc.in | 10 +- release/libturbojpeg.pc.in | 10 +- release/makecygwinpkg.in | 48 +- release/makedpkg.in | 77 +- release/makemacpkg.in | 424 +- release/makerpm.in | 30 + release/makesrpm.in | 48 + release/maketarball.in | 51 + release/rpm.spec.in | 221 + release/uninstall.in | 39 +- sharedlib/CMakeLists.txt | 68 +- simd/CMakeLists.txt | 403 +- simd/Makefile.am | 102 - simd/{jsimd_arm.c => arm/jsimd.c} | 410 +- simd/{jsimd_arm_neon.S => arm/jsimd_neon.S} | 300 +- simd/{jsimd_arm64.c => arm64/jsimd.c} | 449 +- simd/{jsimd_arm64_neon.S => arm64/jsimd_neon.S} | 436 +- simd/gas-preprocessor.in | 1 + simd/i386/jccolext-avx2.asm | 580 ++ simd/i386/jccolext-mmx.asm | 478 ++ simd/i386/jccolext-sse2.asm | 505 ++ simd/i386/jccolor-avx2.asm | 123 + simd/i386/jccolor-mmx.asm | 123 + simd/i386/jccolor-sse2.asm | 122 + simd/i386/jcgray-avx2.asm | 115 + simd/i386/jcgray-mmx.asm | 115 + simd/i386/jcgray-sse2.asm | 114 + simd/i386/jcgryext-avx2.asm | 459 ++ simd/i386/jcgryext-mmx.asm | 357 + simd/i386/jcgryext-sse2.asm | 384 + simd/i386/jchuff-sse2.asm | 426 + simd/i386/jcphuff-sse2.asm | 660 ++ simd/i386/jcsample-avx2.asm | 390 + simd/i386/jcsample-mmx.asm | 326 + simd/i386/jcsample-sse2.asm | 353 + simd/i386/jdcolext-avx2.asm | 517 ++ simd/i386/jdcolext-mmx.asm | 406 + simd/i386/jdcolext-sse2.asm | 460 ++ simd/i386/jdcolor-avx2.asm | 120 + simd/i386/jdcolor-mmx.asm | 119 + simd/i386/jdcolor-sse2.asm | 119 + simd/i386/jdmerge-avx2.asm | 138 + simd/i386/jdmerge-mmx.asm | 125 + simd/i386/jdmerge-sse2.asm | 137 + simd/i386/jdmrgext-avx2.asm | 577 ++ simd/i386/jdmrgext-mmx.asm | 462 ++ simd/i386/jdmrgext-sse2.asm | 519 ++ simd/i386/jdsample-avx2.asm | 762 ++ simd/i386/jdsample-mmx.asm | 733 ++ simd/i386/jdsample-sse2.asm | 726 ++ simd/i386/jfdctflt-3dn.asm | 320 + simd/i386/jfdctflt-sse.asm | 371 + simd/i386/jfdctfst-mmx.asm | 397 + simd/i386/jfdctfst-sse2.asm | 405 + simd/i386/jfdctint-avx2.asm | 333 + simd/i386/jfdctint-mmx.asm | 622 ++ simd/i386/jfdctint-sse2.asm | 635 ++ simd/i386/jidctflt-3dn.asm | 453 ++ simd/i386/jidctflt-sse.asm | 573 ++ simd/i386/jidctflt-sse2.asm | 499 ++ simd/i386/jidctfst-mmx.asm | 501 ++ simd/i386/jidctfst-sse2.asm | 503 ++ simd/i386/jidctint-avx2.asm | 455 ++ simd/i386/jidctint-mmx.asm | 853 ++ simd/i386/jidctint-sse2.asm | 860 ++ simd/i386/jidctred-mmx.asm | 706 ++ simd/i386/jidctred-sse2.asm | 594 ++ simd/i386/jquant-3dn.asm | 232 + simd/i386/jquant-mmx.asm | 278 + simd/i386/jquant-sse.asm | 210 + simd/i386/jquantf-sse2.asm | 170 + simd/i386/jquanti-avx2.asm | 190 + simd/i386/jquanti-sse2.asm | 203 + simd/i386/jsimd.c | 1253 +++ simd/i386/jsimdcpu.asm | 132 + simd/jccolext-mmx.asm | 476 -- simd/jccolext-sse2-64.asm | 486 -- simd/jccolext-sse2.asm | 503 -- simd/jccolor-altivec.c | 104 - simd/jccolor-mmx.asm | 122 - simd/jccolor-sse2-64.asm | 121 - simd/jccolor-sse2.asm | 121 - simd/jcgray-altivec.c | 99 - simd/jcgray-mmx.asm | 115 - simd/jcgray-sse2-64.asm | 114 - simd/jcgray-sse2.asm | 114 - simd/jcgryext-mmx.asm | 356 - simd/jcgryext-sse2-64.asm | 365 - simd/jcgryext-sse2.asm | 384 - simd/jchuff-sse2-64.asm | 360 - simd/jchuff-sse2.asm | 426 - simd/jcolsamp.inc | 104 - simd/jcsample-mmx.asm | 323 - simd/jcsample-sse2-64.asm | 329 - simd/jcsample-sse2.asm | 350 - simd/jdcolext-mmx.asm | 404 - simd/jdcolext-sse2-64.asm | 440 -- simd/jdcolext-sse2.asm | 459 -- simd/jdcolor-altivec.c | 96 - simd/jdcolor-mmx.asm | 119 - simd/jdcolor-sse2-64.asm | 119 - simd/jdcolor-sse2.asm | 119 - simd/jdmerge-altivec.c | 108 - simd/jdmerge-mmx.asm | 125 - simd/jdmerge-sse2-64.asm | 125 - simd/jdmerge-sse2.asm | 125 - simd/jdmrgext-mmx.asm | 463 -- simd/jdmrgext-sse2-64.asm | 537 -- simd/jdmrgext-sse2.asm | 518 -- simd/jdsample-mmx.asm | 736 -- simd/jdsample-sse2-64.asm | 670 -- simd/jdsample-sse2.asm | 728 -- simd/jfdctflt-3dn.asm | 319 - simd/jfdctflt-sse-64.asm | 357 - simd/jfdctflt-sse.asm | 369 - simd/jfdctfst-mmx.asm | 396 - simd/jfdctfst-sse2-64.asm | 391 - simd/jfdctfst-sse2.asm | 403 - simd/jfdctint-altivec.c | 262 - simd/jfdctint-mmx.asm | 621 -- simd/jfdctint-sse2-64.asm | 621 -- simd/jfdctint-sse2.asm | 633 -- simd/jidctflt-3dn.asm | 451 -- simd/jidctflt-sse.asm | 571 -- simd/jidctflt-sse2-64.asm | 482 -- simd/jidctflt-sse2.asm | 497 -- simd/jidctfst-mmx.asm | 499 -- simd/jidctfst-sse2-64.asm | 491 -- simd/jidctfst-sse2.asm | 501 -- simd/jidctint-mmx.asm | 851 -- simd/jidctint-sse2-64.asm | 847 -- simd/jidctint-sse2.asm | 858 -- simd/jidctred-mmx.asm | 705 -- simd/jidctred-sse2-64.asm | 575 -- simd/jidctred-sse2.asm | 593 -- simd/jquant-3dn.asm | 232 - simd/jquant-mmx.asm | 273 - simd/jquant-sse.asm | 210 - simd/jquantf-sse2-64.asm | 157 - simd/jquantf-sse2.asm | 170 - simd/jquanti-sse2-64.asm | 186 - simd/jquanti-sse2.asm | 199 - simd/jsimd.h | 1322 ++-- simd/jsimd_i386.c | 1091 --- simd/jsimd_mips.c | 1140 --- simd/jsimd_mips_dspr2.S | 4486 ----------- simd/jsimd_mips_dspr2_asm.h | 283 - simd/jsimd_x86_64.c | 887 --- simd/jsimdcfg.inc.h | 130 - simd/jsimdcpu.asm | 104 - simd/jsimdext.inc | 375 - simd/loongson/jccolext-mmi.c | 469 ++ simd/loongson/jccolor-mmi.c | 148 + simd/loongson/jcsample-mmi.c | 100 + simd/{ => loongson}/jcsample.h | 6 +- simd/loongson/jdcolext-mmi.c | 424 + simd/loongson/jdcolor-mmi.c | 139 + simd/loongson/jdsample-mmi.c | 245 + simd/loongson/jfdctint-mmi.c | 398 + simd/loongson/jidctint-mmi.c | 571 ++ simd/loongson/jquanti-mmi.c | 130 + simd/loongson/jsimd.c | 610 ++ simd/loongson/jsimd_mmi.h | 57 + simd/loongson/loongson-mmintrin.h | 1307 ++++ simd/mips/jsimd.c | 1123 +++ simd/mips/jsimd_dspr2.S | 4479 +++++++++++ simd/mips/jsimd_dspr2_asm.h | 292 + simd/nasm/jcolsamp.inc | 137 + simd/{ => nasm}/jdct.inc | 16 +- simd/{ => nasm}/jpeg_nbits_table.inc | 8192 ++++++++++---------- {win => simd/nasm}/jsimdcfg.inc | 3 +- simd/nasm/jsimdcfg.inc.h | 131 + simd/nasm/jsimdext.inc | 476 ++ simd/nasm_lt.sh | 60 - simd/{ => powerpc}/jccolext-altivec.c | 18 +- simd/powerpc/jccolor-altivec.c | 116 + simd/powerpc/jcgray-altivec.c | 111 + simd/{ => powerpc}/jcgryext-altivec.c | 19 +- simd/{ => powerpc}/jcsample-altivec.c | 27 +- simd/powerpc/jcsample.h | 28 + simd/{ => powerpc}/jdcolext-altivec.c | 12 +- simd/powerpc/jdcolor-altivec.c | 106 + simd/powerpc/jdmerge-altivec.c | 130 + simd/{ => powerpc}/jdmrgext-altivec.c | 34 +- simd/{ => powerpc}/jdsample-altivec.c | 78 +- simd/{ => powerpc}/jfdctfst-altivec.c | 80 +- simd/powerpc/jfdctint-altivec.c | 258 + simd/{ => powerpc}/jidctfst-altivec.c | 124 +- simd/{ => powerpc}/jidctint-altivec.c | 298 +- simd/{ => powerpc}/jquanti-altivec.c | 48 +- simd/{jsimd_powerpc.c => powerpc/jsimd.c} | 534 +- simd/{ => powerpc}/jsimd_altivec.h | 63 +- simd/x86_64/jccolext-avx2.asm | 560 ++ simd/x86_64/jccolext-sse2.asm | 485 ++ simd/x86_64/jccolor-avx2.asm | 123 + simd/x86_64/jccolor-sse2.asm | 122 + simd/x86_64/jcgray-avx2.asm | 115 + simd/x86_64/jcgray-sse2.asm | 114 + simd/x86_64/jcgryext-avx2.asm | 439 ++ simd/x86_64/jcgryext-sse2.asm | 364 + simd/x86_64/jchuff-sse2.asm | 348 + simd/x86_64/jcphuff-sse2.asm | 637 ++ simd/x86_64/jcsample-avx2.asm | 368 + simd/x86_64/jcsample-sse2.asm | 331 + simd/x86_64/jdcolext-avx2.asm | 497 ++ simd/x86_64/jdcolext-sse2.asm | 440 ++ simd/x86_64/jdcolor-avx2.asm | 120 + simd/x86_64/jdcolor-sse2.asm | 119 + simd/x86_64/jdmerge-avx2.asm | 138 + simd/x86_64/jdmerge-sse2.asm | 137 + simd/x86_64/jdmrgext-avx2.asm | 595 ++ simd/x86_64/jdmrgext-sse2.asm | 537 ++ simd/x86_64/jdsample-avx2.asm | 697 ++ simd/x86_64/jdsample-sse2.asm | 666 ++ simd/x86_64/jfdctflt-sse.asm | 357 + simd/x86_64/jfdctfst-sse2.asm | 391 + simd/x86_64/jfdctint-avx2.asm | 322 + simd/x86_64/jfdctint-sse2.asm | 621 ++ simd/x86_64/jidctflt-sse2.asm | 483 ++ simd/x86_64/jidctfst-sse2.asm | 492 ++ simd/x86_64/jidctint-avx2.asm | 419 + simd/x86_64/jidctint-sse2.asm | 848 ++ simd/x86_64/jidctred-sse2.asm | 575 ++ simd/x86_64/jquantf-sse2.asm | 156 + simd/x86_64/jquanti-avx2.asm | 164 + simd/x86_64/jquanti-sse2.asm | 189 + simd/x86_64/jsimd.c | 1076 +++ simd/x86_64/jsimdcpu.asm | 79 + structure.txt | 44 +- testimages/test1.icc | Bin 0 -> 557536 bytes testimages/test1.icc.txt | 20 + testimages/test2.icc | Bin 0 -> 654496 bytes testimages/test2.icc.txt | 20 + tjbench.c | 1861 ++--- tjbenchtest.in | 50 +- tjbenchtest.java.in | 66 +- tjexample.c | 395 + tjexampletest.in | 29 +- tjexampletest.java.in | 151 + tjunittest.c | 1410 ++-- tjutil.c | 38 +- tjutil.h | 20 +- transupp.c | 412 +- transupp.h | 45 +- turbojpeg-jni.c | 1807 ++--- turbojpeg-mapfile | 91 +- turbojpeg-mapfile.jni | 155 +- turbojpeg.c | 3752 +++++---- turbojpeg.h | 551 +- usage.txt | 2 +- win/jconfig.h.in | 41 +- win/jconfigint.h.in | 13 - win/jpeg62-memsrcdst.def | 212 +- win/jpeg62.def | 208 +- win/jpeg7-memsrcdst.def | 216 +- win/jpeg7.def | 212 +- win/jpeg8.def | 218 +- wizard.txt | 3 +- wrbmp.c | 293 +- wrgif.c | 82 +- wrjpgcom.1 | 2 +- wrjpgcom.c | 117 +- wrppm.c | 184 +- wrrle.c | 83 +- wrtarga.c | 76 +- 446 files changed, 75727 insertions(+), 56983 deletions(-) delete mode 100644 .gitignore delete mode 100644 .travis.yml create mode 100644 Brewfile delete mode 100644 Makefile.am mode change 100755 => 100644 README.md delete mode 100644 acinclude.m4 delete mode 100644 appveyor.yml delete mode 100644 bmp.c delete mode 100644 bmp.h delete mode 100644 ci/keys.enc create mode 100644 cmakescripts/BuildPackages.cmake create mode 100644 cmakescripts/GNUInstallDirs.cmake create mode 100644 cmyk.h delete mode 100644 configure.ac rename example.c => example.txt (92%) delete mode 100644 java/Makefile.am create mode 100644 java/doc/script.js rename java/org/libjpegturbo/turbojpeg/{TJLoader.java.tmpl => TJLoader-unix.java.in} (78%) rename java/org/libjpegturbo/turbojpeg/{TJLoader.java.in => TJLoader-win.java.in} (100%) create mode 100644 jcicc.c create mode 100644 jdicc.c delete mode 100644 md5/Makefile.am rename release/{Distribution.xml => Distribution.xml.in} (63%) rename release/{deb-control.tmpl => deb-control.in} (82%) create mode 100755 release/installer.nsi.in delete mode 100755 release/libjpeg-turbo.nsi.in delete mode 100644 release/libjpeg-turbo.spec.in create mode 100644 release/makerpm.in create mode 100644 release/makesrpm.in create mode 100644 release/maketarball.in create mode 100644 release/rpm.spec.in delete mode 100644 simd/Makefile.am rename simd/{jsimd_arm.c => arm/jsimd.c} (52%) rename simd/{jsimd_arm_neon.S => arm/jsimd_neon.S} (91%) rename simd/{jsimd_arm64.c => arm64/jsimd.c} (54%) rename simd/{jsimd_arm64_neon.S => arm64/jsimd_neon.S} (92%) create mode 100755 simd/gas-preprocessor.in create mode 100644 simd/i386/jccolext-avx2.asm create mode 100644 simd/i386/jccolext-mmx.asm create mode 100644 simd/i386/jccolext-sse2.asm create mode 100644 simd/i386/jccolor-avx2.asm create mode 100644 simd/i386/jccolor-mmx.asm create mode 100644 simd/i386/jccolor-sse2.asm create mode 100644 simd/i386/jcgray-avx2.asm create mode 100644 simd/i386/jcgray-mmx.asm create mode 100644 simd/i386/jcgray-sse2.asm create mode 100644 simd/i386/jcgryext-avx2.asm create mode 100644 simd/i386/jcgryext-mmx.asm create mode 100644 simd/i386/jcgryext-sse2.asm create mode 100644 simd/i386/jchuff-sse2.asm create mode 100644 simd/i386/jcphuff-sse2.asm create mode 100644 simd/i386/jcsample-avx2.asm create mode 100644 simd/i386/jcsample-mmx.asm create mode 100644 simd/i386/jcsample-sse2.asm create mode 100644 simd/i386/jdcolext-avx2.asm create mode 100644 simd/i386/jdcolext-mmx.asm create mode 100644 simd/i386/jdcolext-sse2.asm create mode 100644 simd/i386/jdcolor-avx2.asm create mode 100644 simd/i386/jdcolor-mmx.asm create mode 100644 simd/i386/jdcolor-sse2.asm create mode 100644 simd/i386/jdmerge-avx2.asm create mode 100644 simd/i386/jdmerge-mmx.asm create mode 100644 simd/i386/jdmerge-sse2.asm create mode 100644 simd/i386/jdmrgext-avx2.asm create mode 100644 simd/i386/jdmrgext-mmx.asm create mode 100644 simd/i386/jdmrgext-sse2.asm create mode 100644 simd/i386/jdsample-avx2.asm create mode 100644 simd/i386/jdsample-mmx.asm create mode 100644 simd/i386/jdsample-sse2.asm create mode 100644 simd/i386/jfdctflt-3dn.asm create mode 100644 simd/i386/jfdctflt-sse.asm create mode 100644 simd/i386/jfdctfst-mmx.asm create mode 100644 simd/i386/jfdctfst-sse2.asm create mode 100644 simd/i386/jfdctint-avx2.asm create mode 100644 simd/i386/jfdctint-mmx.asm create mode 100644 simd/i386/jfdctint-sse2.asm create mode 100644 simd/i386/jidctflt-3dn.asm create mode 100644 simd/i386/jidctflt-sse.asm create mode 100644 simd/i386/jidctflt-sse2.asm create mode 100644 simd/i386/jidctfst-mmx.asm create mode 100644 simd/i386/jidctfst-sse2.asm create mode 100644 simd/i386/jidctint-avx2.asm create mode 100644 simd/i386/jidctint-mmx.asm create mode 100644 simd/i386/jidctint-sse2.asm create mode 100644 simd/i386/jidctred-mmx.asm create mode 100644 simd/i386/jidctred-sse2.asm create mode 100644 simd/i386/jquant-3dn.asm create mode 100644 simd/i386/jquant-mmx.asm create mode 100644 simd/i386/jquant-sse.asm create mode 100644 simd/i386/jquantf-sse2.asm create mode 100644 simd/i386/jquanti-avx2.asm create mode 100644 simd/i386/jquanti-sse2.asm create mode 100644 simd/i386/jsimd.c create mode 100644 simd/i386/jsimdcpu.asm delete mode 100644 simd/jccolext-mmx.asm delete mode 100644 simd/jccolext-sse2-64.asm delete mode 100644 simd/jccolext-sse2.asm delete mode 100644 simd/jccolor-altivec.c delete mode 100644 simd/jccolor-mmx.asm delete mode 100644 simd/jccolor-sse2-64.asm delete mode 100644 simd/jccolor-sse2.asm delete mode 100644 simd/jcgray-altivec.c delete mode 100644 simd/jcgray-mmx.asm delete mode 100644 simd/jcgray-sse2-64.asm delete mode 100644 simd/jcgray-sse2.asm delete mode 100644 simd/jcgryext-mmx.asm delete mode 100644 simd/jcgryext-sse2-64.asm delete mode 100644 simd/jcgryext-sse2.asm delete mode 100644 simd/jchuff-sse2-64.asm delete mode 100644 simd/jchuff-sse2.asm delete mode 100644 simd/jcolsamp.inc delete mode 100644 simd/jcsample-mmx.asm delete mode 100644 simd/jcsample-sse2-64.asm delete mode 100644 simd/jcsample-sse2.asm delete mode 100644 simd/jdcolext-mmx.asm delete mode 100644 simd/jdcolext-sse2-64.asm delete mode 100644 simd/jdcolext-sse2.asm delete mode 100644 simd/jdcolor-altivec.c delete mode 100644 simd/jdcolor-mmx.asm delete mode 100644 simd/jdcolor-sse2-64.asm delete mode 100644 simd/jdcolor-sse2.asm delete mode 100644 simd/jdmerge-altivec.c delete mode 100644 simd/jdmerge-mmx.asm delete mode 100644 simd/jdmerge-sse2-64.asm delete mode 100644 simd/jdmerge-sse2.asm delete mode 100644 simd/jdmrgext-mmx.asm delete mode 100644 simd/jdmrgext-sse2-64.asm delete mode 100644 simd/jdmrgext-sse2.asm delete mode 100644 simd/jdsample-mmx.asm delete mode 100644 simd/jdsample-sse2-64.asm delete mode 100644 simd/jdsample-sse2.asm delete mode 100644 simd/jfdctflt-3dn.asm delete mode 100644 simd/jfdctflt-sse-64.asm delete mode 100644 simd/jfdctflt-sse.asm delete mode 100644 simd/jfdctfst-mmx.asm delete mode 100644 simd/jfdctfst-sse2-64.asm delete mode 100644 simd/jfdctfst-sse2.asm delete mode 100644 simd/jfdctint-altivec.c delete mode 100644 simd/jfdctint-mmx.asm delete mode 100644 simd/jfdctint-sse2-64.asm delete mode 100644 simd/jfdctint-sse2.asm delete mode 100644 simd/jidctflt-3dn.asm delete mode 100644 simd/jidctflt-sse.asm delete mode 100644 simd/jidctflt-sse2-64.asm delete mode 100644 simd/jidctflt-sse2.asm delete mode 100644 simd/jidctfst-mmx.asm delete mode 100644 simd/jidctfst-sse2-64.asm delete mode 100644 simd/jidctfst-sse2.asm delete mode 100644 simd/jidctint-mmx.asm delete mode 100644 simd/jidctint-sse2-64.asm delete mode 100644 simd/jidctint-sse2.asm delete mode 100644 simd/jidctred-mmx.asm delete mode 100644 simd/jidctred-sse2-64.asm delete mode 100644 simd/jidctred-sse2.asm delete mode 100644 simd/jquant-3dn.asm delete mode 100644 simd/jquant-mmx.asm delete mode 100644 simd/jquant-sse.asm delete mode 100644 simd/jquantf-sse2-64.asm delete mode 100644 simd/jquantf-sse2.asm delete mode 100644 simd/jquanti-sse2-64.asm delete mode 100644 simd/jquanti-sse2.asm delete mode 100644 simd/jsimd_i386.c delete mode 100644 simd/jsimd_mips.c delete mode 100644 simd/jsimd_mips_dspr2.S delete mode 100644 simd/jsimd_mips_dspr2_asm.h delete mode 100644 simd/jsimd_x86_64.c delete mode 100644 simd/jsimdcfg.inc.h delete mode 100644 simd/jsimdcpu.asm delete mode 100644 simd/jsimdext.inc create mode 100644 simd/loongson/jccolext-mmi.c create mode 100644 simd/loongson/jccolor-mmi.c create mode 100644 simd/loongson/jcsample-mmi.c rename simd/{ => loongson}/jcsample.h (76%) create mode 100644 simd/loongson/jdcolext-mmi.c create mode 100644 simd/loongson/jdcolor-mmi.c create mode 100644 simd/loongson/jdsample-mmi.c create mode 100644 simd/loongson/jfdctint-mmi.c create mode 100644 simd/loongson/jidctint-mmi.c create mode 100644 simd/loongson/jquanti-mmi.c create mode 100644 simd/loongson/jsimd.c create mode 100644 simd/loongson/jsimd_mmi.h create mode 100644 simd/loongson/loongson-mmintrin.h create mode 100644 simd/mips/jsimd.c create mode 100644 simd/mips/jsimd_dspr2.S create mode 100644 simd/mips/jsimd_dspr2_asm.h create mode 100644 simd/nasm/jcolsamp.inc rename simd/{ => nasm}/jdct.inc (64%) rename simd/{ => nasm}/jpeg_nbits_table.inc (92%) rename {win => simd/nasm}/jsimdcfg.inc (97%) mode change 100755 => 100644 create mode 100644 simd/nasm/jsimdcfg.inc.h create mode 100644 simd/nasm/jsimdext.inc delete mode 100755 simd/nasm_lt.sh rename simd/{ => powerpc}/jccolext-altivec.c (95%) create mode 100644 simd/powerpc/jccolor-altivec.c create mode 100644 simd/powerpc/jcgray-altivec.c rename simd/{ => powerpc}/jcgryext-altivec.c (93%) rename simd/{ => powerpc}/jcsample-altivec.c (84%) create mode 100644 simd/powerpc/jcsample.h rename simd/{ => powerpc}/jdcolext-altivec.c (96%) create mode 100644 simd/powerpc/jdcolor-altivec.c create mode 100644 simd/powerpc/jdmerge-altivec.c rename simd/{ => powerpc}/jdmrgext-altivec.c (91%) rename simd/{ => powerpc}/jdsample-altivec.c (83%) rename simd/{ => powerpc}/jfdctfst-altivec.c (68%) create mode 100644 simd/powerpc/jfdctint-altivec.c rename simd/{ => powerpc}/jidctfst-altivec.c (71%) rename simd/{ => powerpc}/jidctint-altivec.c (53%) rename simd/{ => powerpc}/jquanti-altivec.c (88%) rename simd/{jsimd_powerpc.c => powerpc/jsimd.c} (51%) rename simd/{ => powerpc}/jsimd_altivec.h (75%) create mode 100644 simd/x86_64/jccolext-avx2.asm create mode 100644 simd/x86_64/jccolext-sse2.asm create mode 100644 simd/x86_64/jccolor-avx2.asm create mode 100644 simd/x86_64/jccolor-sse2.asm create mode 100644 simd/x86_64/jcgray-avx2.asm create mode 100644 simd/x86_64/jcgray-sse2.asm create mode 100644 simd/x86_64/jcgryext-avx2.asm create mode 100644 simd/x86_64/jcgryext-sse2.asm create mode 100644 simd/x86_64/jchuff-sse2.asm create mode 100644 simd/x86_64/jcphuff-sse2.asm create mode 100644 simd/x86_64/jcsample-avx2.asm create mode 100644 simd/x86_64/jcsample-sse2.asm create mode 100644 simd/x86_64/jdcolext-avx2.asm create mode 100644 simd/x86_64/jdcolext-sse2.asm create mode 100644 simd/x86_64/jdcolor-avx2.asm create mode 100644 simd/x86_64/jdcolor-sse2.asm create mode 100644 simd/x86_64/jdmerge-avx2.asm create mode 100644 simd/x86_64/jdmerge-sse2.asm create mode 100644 simd/x86_64/jdmrgext-avx2.asm create mode 100644 simd/x86_64/jdmrgext-sse2.asm create mode 100644 simd/x86_64/jdsample-avx2.asm create mode 100644 simd/x86_64/jdsample-sse2.asm create mode 100644 simd/x86_64/jfdctflt-sse.asm create mode 100644 simd/x86_64/jfdctfst-sse2.asm create mode 100644 simd/x86_64/jfdctint-avx2.asm create mode 100644 simd/x86_64/jfdctint-sse2.asm create mode 100644 simd/x86_64/jidctflt-sse2.asm create mode 100644 simd/x86_64/jidctfst-sse2.asm create mode 100644 simd/x86_64/jidctint-avx2.asm create mode 100644 simd/x86_64/jidctint-sse2.asm create mode 100644 simd/x86_64/jidctred-sse2.asm create mode 100644 simd/x86_64/jquantf-sse2.asm create mode 100644 simd/x86_64/jquanti-avx2.asm create mode 100644 simd/x86_64/jquanti-sse2.asm create mode 100644 simd/x86_64/jsimd.c create mode 100644 simd/x86_64/jsimdcpu.asm create mode 100644 testimages/test1.icc create mode 100644 testimages/test1.icc.txt create mode 100644 testimages/test2.icc create mode 100644 testimages/test2.icc.txt create mode 100644 tjexample.c create mode 100755 tjexampletest.java.in delete mode 100644 win/jconfigint.h.in diff --git a/.gitignore b/.gitignore deleted file mode 100644 index db7a0c4..0000000 --- a/.gitignore +++ /dev/null @@ -1,14 +0,0 @@ -.DS_Store -Makefile.in -aclocal.m4 -ar-lib -autom4te.cache -compile -config.guess -config.h.in -config.sub -configure -depcomp -install-sh -ltmain.sh -missing diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index f273012..0000000 --- a/.travis.yml +++ /dev/null @@ -1,131 +0,0 @@ -language: c - -matrix: - include: - - os: linux - env: BUILD_OFFICIAL=1 - sudo: required - services: - - docker - - os: osx - env: BUILD_OFFICIAL=1 - osx_image: xcode7.3 - - os: linux - compiler: clang - env: - CFLAGS="-O1 -g -fsanitize=address -fno-omit-frame-pointer" - CONFIGURE_FLAGS="--disable-shared" - ASAN_OPTIONS="detect_leaks=1 symbolize=1" - addons: - apt: - packages: - - nasm - - os: linux - compiler: gcc - env: CONFIGURE_FLAGS="--with-12bit" - - os: linux - compiler: gcc - env: CONFIGURE_FLAGS="--with-jpeg7" - addons: - apt: - packages: - - nasm - - os: linux - compiler: gcc - env: CONFIGURE_FLAGS="--with-jpeg8" - addons: - apt: - packages: - - nasm - - os: linux - compiler: gcc - env: CONFIGURE_FLAGS="--without-simd" - -before_install: - - if [ "$TRAVIS_OS_NAME" = "osx" ]; then - brew update && - brew install nasm homebrew/versions/gcc5 md5sha1sum Caskroom/versions/java6 && - ln -fs /usr/local/bin/gpg1 /usr/local/bin/gpg && - git clone --depth=1 https://github.com/libjpeg-turbo/gas-preprocessor.git ~/src/gas-preprocessor && - ln -fs /Applications/Xcode.app /Applications/Xcode72.app; - fi - - if [ "${BUILD_OFFICIAL:-}" != "" ]; then - if [ "$TRAVIS_OS_NAME" = "linux" ]; then - docker pull dcommander/buildljt; - fi && - git clone --depth=1 https://github.com/libjpeg-turbo/buildscripts.git -b $TRAVIS_BRANCH ~/src/buildscripts && - openssl aes-256-cbc -K $encrypted_f92e8533f6f1_key -iv $encrypted_f92e8533f6f1_iv -in ci/keys.enc -out ci/keys -d && - tar xf ci/keys && - rm ci/keys && - mv ci/gpgsign ~/src/buildscripts && - gpg --import ci/sign_ljt && - rm ci/sign_ljt; - fi - -script: - - if [ "${BUILD_OFFICIAL:-}" != "" ]; then - mkdir -p ~/src/ljt.nightly && - if [ -f .git/shallow ]; then - mv .git/shallow .git/shallow.bak; - fi && - if [ "$TRAVIS_OS_NAME" = "linux" ]; then - docker run -v $HOME/src/ljt.nightly:/root/src/ljt.nightly -v $HOME/src/buildscripts:/root/src/buildscripts -v $TRAVIS_BUILD_DIR:/root/src/libjpeg-turbo -v $HOME/.gnupg:/root/.gnupg -t dcommander/buildljt:latest bash -c "rpm --import http://pgp.mit.edu/pks/lookup?op=get\&search=0x0575F26BD5B3FDB1 && ~/src/buildscripts/buildljt -r file:///root/src/libjpeg-turbo $TRAVIS_BRANCH -v" && - sudo chown -R travis:travis ~/src/ljt.nightly && - mv ~/src/ljt.nightly/latest/log-$TRAVIS_OS_NAME.txt ~/src/ljt.nightly/latest/files/; - else - PATH=$PATH:~/src/gas-preprocessor ~/src/buildscripts/buildljt -r file://$TRAVIS_BUILD_DIR $TRAVIS_BRANCH -v && - mv ~/src/ljt.nightly/latest/log-$TRAVIS_OS_NAME.txt ~/src/ljt.nightly/latest/files/; - fi && - if [ -f .git/shallow.bak ]; then - mv .git/shallow.bak .git/shallow; - fi - fi - - if [ "${BUILD_OFFICIAL:-}" == "" ]; then - autoreconf -fiv && - mkdir build && - pushd build && - ../configure ${CONFIGURE_FLAGS} && - export NUMCPUS=`grep -c '^processor' /proc/cpuinfo` && - make -j$NUMCPUS --load-average=$NUMCPUS && - if [[ "${CONFIGURE_FLAGS}" =~ "with-12bit" || - "${CONFIGURE_FLAGS}" =~ "without-simd" ]]; then - make test FLOATTEST=32bit; - else - make test FLOATTEST=sse && - JSIMD_FORCENONE=1 make test FLOATTEST=32bit; - fi && - popd; - fi - -after_failure: - - if [ "${BUILD_OFFICIAL:-}" == "" ]; then - if [ -f $TRAVIS_BUILD_DIR/build/config.log ]; then - cat $TRAVIS_BUILD_DIR/build/config.log; - fi - fi - -deploy: - - provider: s3 - bucket: libjpeg-turbo-pr - access_key_id: - secure: bmFEt4H90/oR/LiN9XI+G26Pd6hiyrTw3+Vg3lS4ynwAYk33weApaVM8CyzQTgIhGSPzFStqVm9fTrb3RmrYP/PnNS+/surOeWLkH2DMRxvc0qmetBuNx1+vAN7FUkY8MO/u5uE9WXHAdp4e64pXcLXEbKmh+wgDm72b35WmMxErtHsGbpqy+j47rQkY4BJGi7XQzjjafaamfm4PzitsjkYYsgX8KLI16jyJEIirvyDHCPTn9wKR/jSjelDl+xTlgZGuCqmLCBW8f6JgycIspWjcYfO4WpWvkbnnI2sl3rCMPvOYc4wHe8SwzG0l4tM1PblZZDRcU7vjE15PmNf1Xfq9Vx3RpgBJv+UBNL/Vn0rKdpUCeEcfC12hxrske8DWpV6waBiDivjQJreE+YRXqa5YBhV/EdkoKYCqafnJvRASlOko9evje8F9KXTNsIGTT1HPmU9QM9WoJwLs/Xa3t09EmA2IjhcuAvvUmwCTuBBQVAlDjExiTT3Zhc9IYZDD92JgpAYLgridtzR87ElOxKhTkR4PowdI6UiLYArPjMFTjoz5Rivb9qNpbLaQC8HCYgLWxpWtUTzlW/9rM8izHpF8ySFHjO6E2aA9OJFc0tcbEGwAs2jLGD01OduU+DbBfsIkW0EgfXCPbD3FVgHsn3tkuzgO/bg20SM7uuCEYKQ= - secret_access_key: - secure: mrkOpEtqd2dEmi/qNJyX9vkME+6xgVBnXaRETKF7jT+flcQCQ0ayQkRkMV7lzGqq44XFg+n6Cpfn6oW0gH9RNdcC8YQvFP+kgzPx6nw6V/M31Vz6ySapJf59HBzVevf0NJkr0/1JoWsp1iq4IoN10WPzsCXZB55Io3Cf7DgpR+yiyBlWOctDfNdjJ97Juw3ENE80MHDf0fVqdUOIknQka1p68yAGkjar9kc2Oe7o94RzzmoqEn8tuFumiBQjIcuVRALsKqz+eIxBNgkL3BF9shVyRjOWLAeBhMPVFxZs5Dgd4ECbvU0i33gfmje3d6qqcw78N2lZaLefoVvWol3pOzVO133ewOSY9/lmpqEiRUU2ohEe8T4aSoS7posBW42itUTO4Y5w+eVOnHsm4sRQaI+/AXWTe7GPel+P8Qbe8Ya10A5gnpoag7o3raRDcHx+/qaZw1Af/u4XiAOYz3be3U90Qc+YMc/kS5i8BH0GXBbSfaWQ00CwRFlZQ3n1xUqmjC2CmjZTki3W/p7mEt0DjhcH9ZIXscK603sCC+mF6pEd9019k5fG/8fr2Y4Ptai9kd3BxZJCX9/jSoMfWOBbgkA5bRgHU0xrAj+p49qD6Ej9Xr8GE3+uebz3sEuhSFRnCKwKoOHOemfgevfO2y/jQXP677WPf3xQX7bVDfTFSHU= - acl: public_read - local-dir: $HOME/src/ljt.nightly/latest/files - upload-dir: $TRAVIS_BRANCH/$TRAVIS_OS_NAME - on: - branch: master - condition: -n "$BUILD_OFFICIAL" - - provider: s3 - bucket: libjpeg-turbo-pr - access_key_id: - secure: bmFEt4H90/oR/LiN9XI+G26Pd6hiyrTw3+Vg3lS4ynwAYk33weApaVM8CyzQTgIhGSPzFStqVm9fTrb3RmrYP/PnNS+/surOeWLkH2DMRxvc0qmetBuNx1+vAN7FUkY8MO/u5uE9WXHAdp4e64pXcLXEbKmh+wgDm72b35WmMxErtHsGbpqy+j47rQkY4BJGi7XQzjjafaamfm4PzitsjkYYsgX8KLI16jyJEIirvyDHCPTn9wKR/jSjelDl+xTlgZGuCqmLCBW8f6JgycIspWjcYfO4WpWvkbnnI2sl3rCMPvOYc4wHe8SwzG0l4tM1PblZZDRcU7vjE15PmNf1Xfq9Vx3RpgBJv+UBNL/Vn0rKdpUCeEcfC12hxrske8DWpV6waBiDivjQJreE+YRXqa5YBhV/EdkoKYCqafnJvRASlOko9evje8F9KXTNsIGTT1HPmU9QM9WoJwLs/Xa3t09EmA2IjhcuAvvUmwCTuBBQVAlDjExiTT3Zhc9IYZDD92JgpAYLgridtzR87ElOxKhTkR4PowdI6UiLYArPjMFTjoz5Rivb9qNpbLaQC8HCYgLWxpWtUTzlW/9rM8izHpF8ySFHjO6E2aA9OJFc0tcbEGwAs2jLGD01OduU+DbBfsIkW0EgfXCPbD3FVgHsn3tkuzgO/bg20SM7uuCEYKQ= - secret_access_key: - secure: mrkOpEtqd2dEmi/qNJyX9vkME+6xgVBnXaRETKF7jT+flcQCQ0ayQkRkMV7lzGqq44XFg+n6Cpfn6oW0gH9RNdcC8YQvFP+kgzPx6nw6V/M31Vz6ySapJf59HBzVevf0NJkr0/1JoWsp1iq4IoN10WPzsCXZB55Io3Cf7DgpR+yiyBlWOctDfNdjJ97Juw3ENE80MHDf0fVqdUOIknQka1p68yAGkjar9kc2Oe7o94RzzmoqEn8tuFumiBQjIcuVRALsKqz+eIxBNgkL3BF9shVyRjOWLAeBhMPVFxZs5Dgd4ECbvU0i33gfmje3d6qqcw78N2lZaLefoVvWol3pOzVO133ewOSY9/lmpqEiRUU2ohEe8T4aSoS7posBW42itUTO4Y5w+eVOnHsm4sRQaI+/AXWTe7GPel+P8Qbe8Ya10A5gnpoag7o3raRDcHx+/qaZw1Af/u4XiAOYz3be3U90Qc+YMc/kS5i8BH0GXBbSfaWQ00CwRFlZQ3n1xUqmjC2CmjZTki3W/p7mEt0DjhcH9ZIXscK603sCC+mF6pEd9019k5fG/8fr2Y4Ptai9kd3BxZJCX9/jSoMfWOBbgkA5bRgHU0xrAj+p49qD6Ej9Xr8GE3+uebz3sEuhSFRnCKwKoOHOemfgevfO2y/jQXP677WPf3xQX7bVDfTFSHU= - acl: public_read - local-dir: $HOME/src/ljt.nightly/latest/files - upload-dir: $TRAVIS_BRANCH/$TRAVIS_OS_NAME - on: - branch: dev - condition: -n "$BUILD_OFFICIAL" diff --git a/BUILDING.md b/BUILDING.md index 2725f30..5dbafd8 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -1,26 +1,27 @@ -Un*x Platforms (including Mac and Cygwin) -========================================= +Building libjpeg-turbo +====================== Build Requirements ------------------ -- autoconf 2.56 or later -- automake 1.7 or later -- libtool 1.4 or later - * If using Xcode 4.3 or later on OS X, autoconf and automake are no longer - provided. The easiest way to obtain them is from - [MacPorts](http://www.MacPorts.org) or [Homebrew](http://brew.sh/). + +### All Systems + +- [CMake](http://www.cmake.org) v2.8.12 or later - [NASM](http://www.nasm.us) or [YASM](http://yasm.tortall.net) (if building x86 or x86-64 SIMD extensions) - * If using NASM, 0.98, or 2.01 or later is required for an x86 build (0.99 - and 2.00 do not work properly with libjpeg-turbo's x86 SIMD code.) - * If using NASM, 2.00 or later is required for an x86-64 build. - * If using NASM, 2.07 or later (except 2.11.08) is required for an x86-64 - Mac build (2.11.08 does not work properly with libjpeg-turbo's x86-64 SIMD - code when building macho64 objects.) NASM or YASM can be obtained from + * If using NASM, 2.10 or later is required. + * If using NASM, 2.10 or later (except 2.11.08) is required for an x86-64 Mac + build (2.11.08 does not work properly with libjpeg-turbo's x86-64 SIMD code + when building macho64 objects.) NASM or YASM can be obtained from [MacPorts](http://www.macports.org/) or [Homebrew](http://brew.sh/). + * If using YASM, 1.2.0 or later is required. + - NOTE: Currently, if it is desirable to hide the SIMD function symbols in + Mac executables or shared libraries that statically link with + libjpeg-turbo, then YASM must be used when building libjpeg-turbo. + * If building on Windows, **nasm.exe**/**yasm.exe** should be in your `PATH`. The binary RPMs released by the NASM project do not work on older Linux systems, such as Red Hat Enterprise Linux 5. On such systems, you can easily @@ -36,6 +37,9 @@ Build Requirements NOTE: the NASM build will fail if texinfo is not installed. + +### Un*x Platforms (including Linux, Mac, FreeBSD, Solaris, and Cygwin) + - GCC v4.1 (or later) or Clang recommended for best performance - If building the TurboJPEG Java wrapper, JDK or OpenJDK 1.5 or later is @@ -44,16 +48,55 @@ Build Requirements install the Java Developer Package, which can be downloaded from (Apple ID required.) For other systems, you can obtain the Oracle Java Development Kit from - . + . + + * If using JDK 11 or later, CMake 3.10.x or later must also be used. + +### Windows + +- Microsoft Visual C++ 2005 or later + + If you don't already have Visual C++, then the easiest way to get it is by + installing the + [Windows SDK](http://msdn.microsoft.com/en-us/windows/bb980924.aspx). + The Windows SDK includes both 32-bit and 64-bit Visual C++ compilers and + everything necessary to build libjpeg-turbo. + + * You can also use Microsoft Visual Studio Express/Community Edition, which + is a free download. (NOTE: versions prior to 2012 can only be used to + build 32-bit code.) + * If you intend to build libjpeg-turbo from the command line, then add the + appropriate compiler and SDK directories to the `INCLUDE`, `LIB`, and + `PATH` environment variables. This is generally accomplished by + executing `vcvars32.bat` or `vcvars64.bat` and `SetEnv.cmd`. + `vcvars32.bat` and `vcvars64.bat` are part of Visual C++ and are located in + the same directory as the compiler. `SetEnv.cmd` is part of the Windows + SDK. You can pass optional arguments to `SetEnv.cmd` to specify a 32-bit + or 64-bit build environment. + + ... OR ... + +- MinGW + + [MSYS2](http://msys2.github.io/) or [tdm-gcc](http://tdm-gcc.tdragon.net/) + recommended if building on a Windows machine. Both distributions install a + Start Menu link that can be used to launch a command prompt with the + appropriate compiler paths automatically set. + +- If building the TurboJPEG Java wrapper, JDK 1.5 or later is required. This + can be downloaded from + . + + * If using JDK 11 or later, CMake 3.10.x or later must also be used. Out-of-Tree Builds ------------------ Binary objects, libraries, and executables are generated in the directory from -which `configure` is executed (the "binary directory"), and this directory need -not necessarily be the same as the libjpeg-turbo source directory. You can -create multiple independent binary directories, in which different versions of +which CMake is executed (the "binary directory"), and this directory need not +necessarily be the same as the libjpeg-turbo source directory. You can create +multiple independent binary directories, in which different versions of libjpeg-turbo can be built from the same source tree using different compilers or settings. In the sections below, *{build_directory}* refers to the binary directory, whereas *{source_directory}* refers to the libjpeg-turbo source @@ -63,20 +106,22 @@ directory. For in-tree builds, these directories are the same. Build Procedure --------------- +NOTE: The build procedures below assume that CMake is invoked from the command +line, but all of these procedures can be adapted to the CMake GUI as +well. + + +### Un*x + The following procedure will build libjpeg-turbo on Unix and Unix-like systems. (On Solaris, this generates a 32-bit build. See "Build Recipes" below for 64-bit build instructions.) - cd {source_directory} - autoreconf -fiv cd {build_directory} - sh {source_directory}/configure [additional configure flags] + cmake -G"Unix Makefiles" [additional CMake flags] {source_directory} make -NOTE: Running autoreconf in the source directory is not necessary if building -libjpeg-turbo from one of the official release tarballs. - -This will generate the following files under **.libs/**: +This will generate the following files under *{build_directory}*: **libjpeg.a**
Static link library for the libjpeg API @@ -100,8 +145,8 @@ Import library for the libjpeg API **libturbojpeg.a**
Static link library for the TurboJPEG API -**libturbojpeg.so.0.1.0** (Linux, Unix)
-**libturbojpeg.0.1.0.dylib** (Mac)
+**libturbojpeg.so.0.2.0** (Linux, Unix)
+**libturbojpeg.0.2.0.dylib** (Mac)
**cygturbojpeg-0.dll** (Cygwin)
Shared library for the TurboJPEG API @@ -113,146 +158,236 @@ Development symlink for the TurboJPEG API Import library for the TurboJPEG API -### libjpeg v7 or v8 API/ABI Emulation +### Visual C++ (Command Line) -Add `--with-jpeg7` to the `configure` command line to build a version of -libjpeg-turbo that is API/ABI-compatible with libjpeg v7. Add `--with-jpeg8` -to the `configure` command to build a version of libjpeg-turbo that is -API/ABI-compatible with libjpeg v8. See [README.md](README.md) for more -information about libjpeg v7 and v8 emulation. + cd {build_directory} + cmake -G"NMake Makefiles" -DCMAKE_BUILD_TYPE=Release [additional CMake flags] {source_directory} + nmake +This will build either a 32-bit or a 64-bit version of libjpeg-turbo, depending +on which version of **cl.exe** is in the `PATH`. -### In-Memory Source/Destination Managers +The following files will be generated under *{build_directory}*: -When using libjpeg v6b or v7 API/ABI emulation, add `--without-mem-srcdst` to -the `configure` command line to build a version of libjpeg-turbo that lacks the -`jpeg_mem_src()` and `jpeg_mem_dest()` functions. These functions were not -part of the original libjpeg v6b and v7 APIs, so removing them ensures strict -conformance with those APIs. See [README.md](README.md) for more information. +**jpeg-static.lib**
+Static link library for the libjpeg API +**jpeg{version}.dll**
+DLL for the libjpeg API -### Arithmetic Coding Support +**jpeg.lib**
+Import library for the libjpeg API -Since the patent on arithmetic coding has expired, this functionality has been -included in this release of libjpeg-turbo. libjpeg-turbo's implementation is -based on the implementation in libjpeg v8, but it works when emulating libjpeg -v7 or v6b as well. The default is to enable both arithmetic encoding and -decoding, but those who have philosophical objections to arithmetic coding can -add `--without-arith-enc` or `--without-arith-dec` to the `configure` command -line to disable encoding or decoding (respectively.) +**turbojpeg-static.lib**
+Static link library for the TurboJPEG API +**turbojpeg.dll**
+DLL for the TurboJPEG API -### TurboJPEG Java Wrapper +**turbojpeg.lib**
+Import library for the TurboJPEG API -Add `--with-java` to the `configure` command line to incorporate an optional -Java Native Interface (JNI) wrapper into the TurboJPEG shared library and build -the Java front-end classes to support it. This allows the TurboJPEG shared -library to be used directly from Java applications. See -[java/README](java/README) for more details. +*{version}* is 62, 7, or 8, depending on whether libjpeg v6b (default), v7, or +v8 emulation is enabled. -You can set the `JAVAC`, `JAR`, and `JAVA` configure variables to specify -alternate commands for javac, jar, and java (respectively.) You can also -set the `JAVACFLAGS` configure variable to specify arguments that should be -passed to the Java compiler when building the TurboJPEG classes, and -`JNI_CFLAGS` to specify arguments that should be passed to the C compiler when -building the JNI wrapper. Run `configure --help` for more details. +### Visual C++ (IDE) -Build Recipes -------------- +Choose the appropriate CMake generator option for your version of Visual Studio +(run `cmake` with no arguments for a list of available generators.) For +instance: + cd {build_directory} + cmake -G"Visual Studio 10" [additional CMake flags] {source_directory} -### 32-bit Build on 64-bit Linux +NOTE: Add "Win64" to the generator name (for example, "Visual Studio 10 Win64") +to build a 64-bit version of libjpeg-turbo. A separate build directory must be +used for 32-bit and 64-bit builds. -Add +You can then open **ALL_BUILD.vcproj** in Visual Studio and build one of the +configurations in that project ("Debug", "Release", etc.) to generate a full +build of libjpeg-turbo. - --host i686-pc-linux-gnu CFLAGS='-O3 -m32' LDFLAGS=-m32 +This will generate the following files under *{build_directory}*: -to the `configure` command line. +**{configuration}/jpeg-static.lib**
+Static link library for the libjpeg API +**{configuration}/jpeg{version}.dll**
+DLL for the libjpeg API -### 64-bit Build on 64-bit OS X +**{configuration}/jpeg.lib**
+Import library for the libjpeg API -Add +**{configuration}/turbojpeg-static.lib**
+Static link library for the TurboJPEG API - --host x86_64-apple-darwin NASM=/opt/local/bin/nasm +**{configuration}/turbojpeg.dll**
+DLL for the TurboJPEG API -to the `configure` command line. NASM 2.07 or later from MacPorts or Homebrew -must be installed. If using Homebrew, then replace `/opt/local` with -`/usr/local`. +**{configuration}/turbojpeg.lib**
+Import library for the TurboJPEG API +*{configuration}* is Debug, Release, RelWithDebInfo, or MinSizeRel, depending +on the configuration you built in the IDE, and *{version}* is 62, 7, or 8, +depending on whether libjpeg v6b (default), v7, or v8 emulation is enabled. -### 32-bit Build on 64-bit OS X -Add +### MinGW - --host i686-apple-darwin CFLAGS='-O3 -m32' LDFLAGS=-m32 +NOTE: This assumes that you are building on a Windows machine using the MSYS +environment. If you are cross-compiling on a Un*x platform (including Mac and +Cygwin), then see "Build Recipes" below. -to the `configure` command line. + cd {build_directory} + cmake -G"MSYS Makefiles" [additional CMake flags] {source_directory} + make +This will generate the following files under *{build_directory}*: -### 64-bit Backward-Compatible Build on 64-bit OS X +**libjpeg.a**
+Static link library for the libjpeg API -Add +**libjpeg-{version}.dll**
+DLL for the libjpeg API - --host x86_64-apple-darwin NASM=/opt/local/bin/nasm \ - CFLAGS='-mmacosx-version-min=10.5 -O3' \ - LDFLAGS='-mmacosx-version-min=10.5' +**libjpeg.dll.a**
+Import library for the libjpeg API -to the `configure` command line. NASM 2.07 or later from MacPorts or Homebrew -must be installed. If using Homebrew, then replace `/opt/local` with -`/usr/local`. +**libturbojpeg.a**
+Static link library for the TurboJPEG API +**libturbojpeg.dll**
+DLL for the TurboJPEG API -### 32-bit Backward-Compatible Build on OS X +**libturbojpeg.dll.a**
+Import library for the TurboJPEG API -Add +*{version}* is 62, 7, or 8, depending on whether libjpeg v6b (default), v7, or +v8 emulation is enabled. - --host i686-apple-darwin \ - CFLAGS='-mmacosx-version-min=10.5 -O3 -m32' \ - LDFLAGS='-mmacosx-version-min=10.5 -m32' -to the `configure` command line. +### Debug Build + +Add `-DCMAKE_BUILD_TYPE=Debug` to the CMake command line. Or, if building +with NMake, remove `-DCMAKE_BUILD_TYPE=Release` (Debug builds are the default +with NMake.) -### 64-bit Build on 64-bit Solaris +### libjpeg v7 or v8 API/ABI Emulation -Add +Add `-DWITH_JPEG7=1` to the CMake command line to build a version of +libjpeg-turbo that is API/ABI-compatible with libjpeg v7. Add `-DWITH_JPEG8=1` +to the CMake command line to build a version of libjpeg-turbo that is +API/ABI-compatible with libjpeg v8. See [README.md](README.md) for more +information about libjpeg v7 and v8 emulation. - --host x86_64-pc-solaris CFLAGS='-O3 -m64' LDFLAGS=-m64 -to the `configure` command line. +### In-Memory Source/Destination Managers +When using libjpeg v6b or v7 API/ABI emulation, add `-DWITH_MEM_SRCDST=0` to +the CMake command line to build a version of libjpeg-turbo that lacks the +`jpeg_mem_src()` and `jpeg_mem_dest()` functions. These functions were not +part of the original libjpeg v6b and v7 APIs, so removing them ensures strict +conformance with those APIs. See [README.md](README.md) for more information. -### 32-bit Build on 64-bit FreeBSD -Add +### Arithmetic Coding Support - --host i386-unknown-freebsd CFLAGS='-O3 -m32' LDFLAGS=-m32 +Since the patent on arithmetic coding has expired, this functionality has been +included in this release of libjpeg-turbo. libjpeg-turbo's implementation is +based on the implementation in libjpeg v8, but it works when emulating libjpeg +v7 or v6b as well. The default is to enable both arithmetic encoding and +decoding, but those who have philosophical objections to arithmetic coding can +add `-DWITH_ARITH_ENC=0` or `-DWITH_ARITH_DEC=0` to the CMake command line to +disable encoding or decoding (respectively.) -to the `configure` command line. NASM 2.07 or later from FreeBSD ports must be -installed. +### TurboJPEG Java Wrapper -### Oracle Solaris Studio +Add `-DWITH_JAVA=1` to the CMake command line to incorporate an optional Java +Native Interface (JNI) wrapper into the TurboJPEG shared library and build the +Java front-end classes to support it. This allows the TurboJPEG shared library +to be used directly from Java applications. See [java/README](java/README) for +more details. -Add +If Java is not in your `PATH`, or if you wish to use an alternate JDK to +build/test libjpeg-turbo, then (prior to running CMake) set the `JAVA_HOME` +environment variable to the location of the JDK that you wish to use. The +`Java_JAVAC_EXECUTABLE`, `Java_JAVA_EXECUTABLE`, and `Java_JAR_EXECUTABLE` +CMake variables can also be used to specify alternate commands or locations for +javac, jar, and java (respectively.) You can also set the +`CMAKE_JAVA_COMPILE_FLAGS` CMake variable or the `JAVAFLAGS` environment +variable to specify arguments that should be passed to the Java compiler when +building the TurboJPEG classes, and the `JAVAARGS` CMake variable to specify +arguments that should be passed to the JRE when running the TurboJPEG Java unit +tests. + + +Build Recipes +------------- + + +### 32-bit Build on 64-bit Linux/Unix/Mac + +Use export/setenv to set the following environment variables before running +CMake: + + CFLAGS=-m32 + LDFLAGS=-m32 + + +### 64-bit Build on Solaris + +Use export/setenv to set the following environment variables before running +CMake: + + CFLAGS=-m64 + LDFLAGS=-m64 + + +### Other Compilers + +On Un*x systems, prior to running CMake, you can set the `CC` environment +variable to the command used to invoke the C compiler. + + +### 32-bit MinGW Build on Un*x (including Mac and Cygwin) + +Create a file called **toolchain.cmake** under *{build_directory}*, with the +following contents: + + set(CMAKE_SYSTEM_NAME Windows) + set(CMAKE_SYSTEM_PROCESSOR X86) + set(CMAKE_C_COMPILER {mingw_binary_path}/i686-w64-mingw32-gcc) + set(CMAKE_RC_COMPILER {mingw_binary_path}/i686-w64-mingw32-windres) - CC=cc +*{mingw\_binary\_path}* is the directory under which the MinGW binaries are +located (usually **/usr/bin**.) Next, execute the following commands: -to the `configure` command line. libjpeg-turbo will automatically be built -with the maximum optimization level (-xO5) unless you override `CFLAGS`. + cd {build_directory} + cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \ + [additional CMake flags] {source_directory} + make -To build a 64-bit version of libjpeg-turbo using Oracle Solaris Studio, add - --host x86_64-pc-solaris CC=cc CFLAGS='-xO5 -m64' LDFLAGS=-m64 +### 64-bit MinGW Build on Un*x (including Mac and Cygwin) -to the `configure` command line. +Create a file called **toolchain.cmake** under *{build_directory}*, with the +following contents: + set(CMAKE_SYSTEM_NAME Windows) + set(CMAKE_SYSTEM_PROCESSOR AMD64) + set(CMAKE_C_COMPILER {mingw_binary_path}/x86_64-w64-mingw32-gcc) + set(CMAKE_RC_COMPILER {mingw_binary_path}/x86_64-w64-mingw32-windres) -### MinGW Build on Cygwin +*{mingw\_binary\_path}* is the directory under which the MinGW binaries are +located (usually **/usr/bin**.) Next, execute the following commands: -Use CMake (see recipes below) + cd {build_directory} + cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \ + [additional CMake flags] {source_directory} + make Building libjpeg-turbo for iOS @@ -283,13 +418,19 @@ iPhone 3GS-4S/iPad 1st-3rd Generation and newer: IOS_PLATFORMDIR=/Developer/Platforms/iPhoneOS.platform IOS_SYSROOT=($IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS*.sdk) - - export host_alias=arm-apple-darwin10 - export CC=${IOS_PLATFORMDIR}/Developer/usr/bin/arm-apple-darwin10-llvm-gcc-4.2 - export CFLAGS="-mfloat-abi=softfp -isysroot ${IOS_SYSROOT[0]} -O3 -march=armv7 -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon -miphoneos-version-min=3.0" + export CFLAGS="-mfloat-abi=softfp -march=armv7 -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon -miphoneos-version-min=3.0" cd {build_directory} - sh {source_directory}/configure [additional configure flags] + + cat <toolchain.cmake + set(CMAKE_SYSTEM_NAME Darwin) + set(CMAKE_SYSTEM_PROCESSOR arm) + set(CMAKE_C_COMPILER ${IOS_PLATFORMDIR}/Developer/usr/bin/arm-apple-darwin10-llvm-gcc-4.2) + EOF + + cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \ + -DCMAKE_OSX_SYSROOT=${IOS_SYSROOT[0]} \ + [additional CMake flags] {source_directory} make #### Xcode 4.3-4.6 (LLVM-GCC) @@ -302,14 +443,20 @@ Same as above, but replace the first line with: IOS_PLATFORMDIR=/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform IOS_SYSROOT=($IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS*.sdk) - - export host_alias=arm-apple-darwin10 - export CC=/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang - export CFLAGS="-mfloat-abi=softfp -isysroot ${IOS_SYSROOT[0]} -O3 -arch armv7 -miphoneos-version-min=3.0" - export CCASFLAGS="$CFLAGS -no-integrated-as" + export CFLAGS="-mfloat-abi=softfp -arch armv7 -miphoneos-version-min=3.0" + export ASMFLAGS="-no-integrated-as" cd {build_directory} - sh {source_directory}/configure [additional configure flags] + + cat <toolchain.cmake + set(CMAKE_SYSTEM_NAME Darwin) + set(CMAKE_SYSTEM_PROCESSOR arm) + set(CMAKE_C_COMPILER /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang) + EOF + + cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \ + -DCMAKE_OSX_SYSROOT=${IOS_SYSROOT[0]} \ + [additional CMake flags] {source_directory} make @@ -324,13 +471,19 @@ iPhone 5/iPad 4th Generation and newer: IOS_PLATFORMDIR=/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform IOS_SYSROOT=($IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS*.sdk) - - export host_alias=arm-apple-darwin10 - export CC=${IOS_PLATFORMDIR}/Developer/usr/bin/arm-apple-darwin10-llvm-gcc-4.2 - export CFLAGS="-mfloat-abi=softfp -isysroot ${IOS_SYSROOT[0]} -O3 -march=armv7s -mcpu=swift -mtune=swift -mfpu=neon -miphoneos-version-min=6.0" + export CFLAGS="-Wall -mfloat-abi=softfp -march=armv7s -mcpu=swift -mtune=swift -mfpu=neon -miphoneos-version-min=6.0" cd {build_directory} - sh {source_directory}/configure [additional configure flags] + + cat <toolchain.cmake + set(CMAKE_SYSTEM_NAME Darwin) + set(CMAKE_SYSTEM_PROCESSOR arm) + set(CMAKE_C_COMPILER ${IOS_PLATFORMDIR}/Developer/usr/bin/arm-apple-darwin10-llvm-gcc-4.2) + EOF + + cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \ + -DCMAKE_OSX_SYSROOT=${IOS_SYSROOT[0]} \ + [additional CMake flags] {source_directory} make #### Xcode 5 and later (Clang) @@ -338,7 +491,7 @@ iPhone 5/iPad 4th Generation and newer: Same as the ARMv7 build procedure for Xcode 5 and later, except replace the compiler flags as follows: - export CFLAGS="-mfloat-abi=softfp -isysroot ${IOS_SYSROOT[0]} -O3 -arch armv7s -miphoneos-version-min=6.0" + export CFLAGS="-Wall -mfloat-abi=softfp -arch armv7s -miphoneos-version-min=6.0" ### ARMv8 (64-bit) @@ -350,13 +503,19 @@ iPhone 5S/iPad Mini 2/iPad Air and newer. IOS_PLATFORMDIR=/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform IOS_SYSROOT=($IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS*.sdk) - - export host_alias=aarch64-apple-darwin - export CC=/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang - export CFLAGS="-isysroot ${IOS_SYSROOT[0]} -O3 -arch arm64 -miphoneos-version-min=7.0 -funwind-tables" + export CFLAGS="-Wall -arch arm64 -miphoneos-version-min=7.0 -funwind-tables" cd {build_directory} - sh {source_directory}/configure [additional configure flags] + + cat <toolchain.cmake + set(CMAKE_SYSTEM_NAME Darwin) + set(CMAKE_SYSTEM_PROCESSOR aarch64) + set(CMAKE_C_COMPILER /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang) + EOF + + cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \ + -DCMAKE_OSX_SYSROOT=${IOS_SYSROOT[0]} \ + [additional CMake flags] {source_directory} make Once built, lipo can be used to combine the ARMv7, v7s, and/or v8 variants into @@ -367,7 +526,7 @@ Building libjpeg-turbo for Android ---------------------------------- Building libjpeg-turbo for Android platforms requires the -[Android NDK](https://developer.android.com/tools/sdk/ndk) and autotools. +[Android NDK](https://developer.android.com/tools/sdk/ndk). ### ARMv7 (32-bit) @@ -387,23 +546,25 @@ needs. # It should not be necessary to modify the rest HOST=arm-linux-androideabi SYSROOT=${NDK_PATH}/platforms/android-${ANDROID_VERSION}/arch-arm - ANDROID_CFLAGS="-march=armv7-a -mfloat-abi=softfp -fprefetch-loop-arrays \ - --sysroot=${SYSROOT}" - + export CFLAGS="-march=armv7-a -mfloat-abi=softfp -fprefetch-loop-arrays \ + -D__ANDROID_API__=${ANDROID_VERSION} --sysroot=${SYSROOT} \ + -isystem ${NDK_PATH}/sysroot/usr/include \ + -isystem ${NDK_PATH}/sysroot/usr/include/${HOST}" + export LDFLAGS=-pie TOOLCHAIN=${NDK_PATH}/toolchains/${HOST}-${TOOLCHAIN_VERSION}/prebuilt/${BUILD_PLATFORM} - export CPP=${TOOLCHAIN}/bin/${HOST}-cpp - export AR=${TOOLCHAIN}/bin/${HOST}-ar - export NM=${TOOLCHAIN}/bin/${HOST}-nm - export CC=${TOOLCHAIN}/bin/${HOST}-gcc - export LD=${TOOLCHAIN}/bin/${HOST}-ld - export RANLIB=${TOOLCHAIN}/bin/${HOST}-ranlib - export OBJDUMP=${TOOLCHAIN}/bin/${HOST}-objdump - export STRIP=${TOOLCHAIN}/bin/${HOST}-strip + cd {build_directory} - sh {source_directory}/configure --host=${HOST} \ - CFLAGS="${ANDROID_CFLAGS} -O3 -fPIE" \ - CPPFLAGS="${ANDROID_CFLAGS}" \ - LDFLAGS="${ANDROID_CFLAGS} -pie" --with-simd ${1+"$@"} + + cat <toolchain.cmake + set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_SYSTEM_PROCESSOR arm) + set(CMAKE_C_COMPILER ${TOOLCHAIN}/bin/${HOST}-gcc) + set(CMAKE_FIND_ROOT_PATH ${TOOLCHAIN}/${HOST}) + EOF + + cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \ + -DCMAKE_POSITION_INDEPENDENT_CODE=1 \ + [additional CMake flags] {source_directory} make @@ -424,22 +585,24 @@ needs. # It should not be necessary to modify the rest HOST=aarch64-linux-android SYSROOT=${NDK_PATH}/platforms/android-${ANDROID_VERSION}/arch-arm64 - ANDROID_CFLAGS="--sysroot=${SYSROOT}" - + export CFLAGS="-D__ANDROID_API__=${ANDROID_VERSION} --sysroot=${SYSROOT} \ + -isystem ${NDK_PATH}/sysroot/usr/include \ + -isystem ${NDK_PATH}/sysroot/usr/include/${HOST}" + export LDFLAGS=-pie TOOLCHAIN=${NDK_PATH}/toolchains/${HOST}-${TOOLCHAIN_VERSION}/prebuilt/${BUILD_PLATFORM} - export CPP=${TOOLCHAIN}/bin/${HOST}-cpp - export AR=${TOOLCHAIN}/bin/${HOST}-ar - export NM=${TOOLCHAIN}/bin/${HOST}-nm - export CC=${TOOLCHAIN}/bin/${HOST}-gcc - export LD=${TOOLCHAIN}/bin/${HOST}-ld - export RANLIB=${TOOLCHAIN}/bin/${HOST}-ranlib - export OBJDUMP=${TOOLCHAIN}/bin/${HOST}-objdump - export STRIP=${TOOLCHAIN}/bin/${HOST}-strip + cd {build_directory} - sh {source_directory}/configure --host=${HOST} \ - CFLAGS="${ANDROID_CFLAGS} -O3 -fPIE" \ - CPPFLAGS="${ANDROID_CFLAGS}" \ - LDFLAGS="${ANDROID_CFLAGS} -pie" --with-simd ${1+"$@"} + + cat <toolchain.cmake + set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_SYSTEM_PROCESSOR aarch64) + set(CMAKE_C_COMPILER ${TOOLCHAIN}/bin/${HOST}-gcc) + set(CMAKE_FIND_ROOT_PATH ${TOOLCHAIN}/${HOST}) + EOF + + cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \ + -DCMAKE_POSITION_INDEPENDENT_CODE=1 \ + [additional CMake flags] {source_directory} make @@ -460,22 +623,24 @@ needs. # It should not be necessary to modify the rest HOST=i686-linux-android SYSROOT=${NDK_PATH}/platforms/android-${ANDROID_VERSION}/arch-x86 - ANDROID_CFLAGS="--sysroot=${SYSROOT}" - + export CFLAGS="-D__ANDROID_API__=${ANDROID_VERSION} --sysroot=${SYSROOT} \ + -isystem ${NDK_PATH}/sysroot/usr/include \ + -isystem ${NDK_PATH}/sysroot/usr/include/${HOST}" + export LDFLAGS=-pie TOOLCHAIN=${NDK_PATH}/toolchains/x86-${TOOLCHAIN_VERSION}/prebuilt/${BUILD_PLATFORM} - export CPP=${TOOLCHAIN}/bin/${HOST}-cpp - export AR=${TOOLCHAIN}/bin/${HOST}-ar - export NM=${TOOLCHAIN}/bin/${HOST}-nm - export CC=${TOOLCHAIN}/bin/${HOST}-gcc - export LD=${TOOLCHAIN}/bin/${HOST}-ld - export RANLIB=${TOOLCHAIN}/bin/${HOST}-ranlib - export OBJDUMP=${TOOLCHAIN}/bin/${HOST}-objdump - export STRIP=${TOOLCHAIN}/bin/${HOST}-strip + cd {build_directory} - sh {source_directory}/configure --host=${HOST} \ - CFLAGS="${ANDROID_CFLAGS} -O3 -fPIE" \ - CPPFLAGS="${ANDROID_CFLAGS}" \ - LDFLAGS="${ANDROID_CFLAGS} -pie" --with-simd ${1+"$@"} + + cat <toolchain.cmake + set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_SYSTEM_PROCESSOR i386) + set(CMAKE_C_COMPILER ${TOOLCHAIN}/bin/${HOST}-gcc) + set(CMAKE_FIND_ROOT_PATH ${TOOLCHAIN}/${HOST}) + EOF + + cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \ + -DCMAKE_POSITION_INDEPENDENT_CODE=1 \ + [additional CMake flags] {source_directory} make @@ -496,322 +661,54 @@ needs. # It should not be necessary to modify the rest HOST=x86_64-linux-android SYSROOT=${NDK_PATH}/platforms/android-${ANDROID_VERSION}/arch-x86_64 - ANDROID_CFLAGS="--sysroot=${SYSROOT}" - + export CFLAGS="-D__ANDROID_API__=${ANDROID_VERSION} --sysroot=${SYSROOT} \ + -isystem ${NDK_PATH}/sysroot/usr/include \ + -isystem ${NDK_PATH}/sysroot/usr/include/${HOST}" + export LDFLAGS=-pie TOOLCHAIN=${NDK_PATH}/toolchains/x86_64-${TOOLCHAIN_VERSION}/prebuilt/${BUILD_PLATFORM} - export CPP=${TOOLCHAIN}/bin/${HOST}-cpp - export AR=${TOOLCHAIN}/bin/${HOST}-ar - export NM=${TOOLCHAIN}/bin/${HOST}-nm - export CC=${TOOLCHAIN}/bin/${HOST}-gcc - export LD=${TOOLCHAIN}/bin/${HOST}-ld - export RANLIB=${TOOLCHAIN}/bin/${HOST}-ranlib - export OBJDUMP=${TOOLCHAIN}/bin/${HOST}-objdump - export STRIP=${TOOLCHAIN}/bin/${HOST}-strip - cd {build_directory} - sh {source_directory}/configure --host=${HOST} \ - CFLAGS="${ANDROID_CFLAGS} -O3 -fPIE" \ - CPPFLAGS="${ANDROID_CFLAGS}" \ - LDFLAGS="${ANDROID_CFLAGS} -pie" --with-simd ${1+"$@"} - make - - -If building for Android 4.0.x (API level < 16) or earlier, remove `-fPIE` from -`CFLAGS` and `-pie` from `LDFLAGS`. - - -Installing libjpeg-turbo ------------------------- - -To install libjpeg-turbo after it is built, replace `make` in the build -instructions with `make install`. - -The `--prefix` argument to configure (or the `prefix` configure variable) can -be used to specify an installation directory of your choosing. If you don't -specify an installation directory, then the default is to install libjpeg-turbo -under **/opt/libjpeg-turbo** and to place the libraries in -**/opt/libjpeg-turbo/lib32** (32-bit) or **/opt/libjpeg-turbo/lib64** (64-bit.) - -The `bindir`, `datadir`, `docdir`, `includedir`, `libdir`, and `mandir` -configure variables allow a finer degree of control over where specific files in -the libjpeg-turbo distribution should be installed. These variables can either -be specified at configure time or passed as arguments to `make install`. - - -Windows (Visual C++ or MinGW) -============================= - - -Build Requirements ------------------- - -- [CMake](http://www.cmake.org) v2.8.11 or later - -- [NASM](http://www.nasm.us) or [YASM](http://yasm.tortall.net) - * If using NASM, 0.98 or later is required for an x86 build. - * If using NASM, 2.05 or later is required for an x86-64 build. - * **nasm.exe**/**yasm.exe** should be in your `PATH`. - -- Microsoft Visual C++ 2005 or later - - If you don't already have Visual C++, then the easiest way to get it is by - installing the - [Windows SDK](http://msdn.microsoft.com/en-us/windows/bb980924.aspx). - The Windows SDK includes both 32-bit and 64-bit Visual C++ compilers and - everything necessary to build libjpeg-turbo. - - * You can also use Microsoft Visual Studio Express/Community Edition, which - is a free download. (NOTE: versions prior to 2012 can only be used to - build 32-bit code.) - * If you intend to build libjpeg-turbo from the command line, then add the - appropriate compiler and SDK directories to the `INCLUDE`, `LIB`, and - `PATH` environment variables. This is generally accomplished by - executing `vcvars32.bat` or `vcvars64.bat` and `SetEnv.cmd`. - `vcvars32.bat` and `vcvars64.bat` are part of Visual C++ and are located in - the same directory as the compiler. `SetEnv.cmd` is part of the Windows - SDK. You can pass optional arguments to `SetEnv.cmd` to specify a 32-bit - or 64-bit build environment. - - ... OR ... - -- MinGW - - [MSYS2](http://msys2.github.io/) or [tdm-gcc](http://tdm-gcc.tdragon.net/) - recommended if building on a Windows machine. Both distributions install a - Start Menu link that can be used to launch a command prompt with the - appropriate compiler paths automatically set. - -- If building the TurboJPEG Java wrapper, JDK 1.5 or later is required. This - can be downloaded from . - - -Out-of-Tree Builds ------------------- - -Binary objects, libraries, and executables are generated in the directory from -which CMake is executed (the "binary directory"), and this directory need not -necessarily be the same as the libjpeg-turbo source directory. You can create -multiple independent binary directories, in which different versions of -libjpeg-turbo can be built from the same source tree using different compilers -or settings. In the sections below, *{build_directory}* refers to the binary -directory, whereas *{source_directory}* refers to the libjpeg-turbo source -directory. For in-tree builds, these directories are the same. - - -Build Procedure ---------------- - -NOTE: The build procedures below assume that CMake is invoked from the command -line, but all of these procedures can be adapted to the CMake GUI as -well. - - -### Visual C++ (Command Line) cd {build_directory} - cmake -G"NMake Makefiles" -DCMAKE_BUILD_TYPE=Release [additional CMake flags] {source_directory} - nmake - -This will build either a 32-bit or a 64-bit version of libjpeg-turbo, depending -on which version of **cl.exe** is in the `PATH`. - -The following files will be generated under *{build_directory}*: - -**jpeg-static.lib**
-Static link library for the libjpeg API - -**sharedlib/jpeg{version}.dll**
-DLL for the libjpeg API - -**sharedlib/jpeg.lib**
-Import library for the libjpeg API - -**turbojpeg-static.lib**
-Static link library for the TurboJPEG API - -**turbojpeg.dll**
-DLL for the TurboJPEG API - -**turbojpeg.lib**
-Import library for the TurboJPEG API - -*{version}* is 62, 7, or 8, depending on whether libjpeg v6b (default), v7, or -v8 emulation is enabled. - -### Visual C++ (IDE) - -Choose the appropriate CMake generator option for your version of Visual Studio -(run `cmake` with no arguments for a list of available generators.) For -instance: - - cd {build_directory} - cmake -G"Visual Studio 10" [additional CMake flags] {source_directory} - -NOTE: Add "Win64" to the generator name (for example, "Visual Studio 10 -Win64") to build a 64-bit version of libjpeg-turbo. A separate build directory -must be used for 32-bit and 64-bit builds. - -You can then open **ALL_BUILD.vcproj** in Visual Studio and build one of the -configurations in that project ("Debug", "Release", etc.) to generate a full -build of libjpeg-turbo. - -This will generate the following files under *{build_directory}*: - -**{configuration}/jpeg-static.lib**
-Static link library for the libjpeg API - -**sharedlib/{configuration}/jpeg{version}.dll**
-DLL for the libjpeg API - -**sharedlib/{configuration}/jpeg.lib**
-Import library for the libjpeg API - -**{configuration}/turbojpeg-static.lib**
-Static link library for the TurboJPEG API - -**{configuration}/turbojpeg.dll**
-DLL for the TurboJPEG API - -**{configuration}/turbojpeg.lib**
-Import library for the TurboJPEG API + cat <toolchain.cmake + set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_SYSTEM_PROCESSOR x86_64) + set(CMAKE_C_COMPILER ${TOOLCHAIN}/bin/${HOST}-gcc) + set(CMAKE_FIND_ROOT_PATH ${TOOLCHAIN}/${HOST}) + EOF -*{configuration}* is Debug, Release, RelWithDebInfo, or MinSizeRel, depending -on the configuration you built in the IDE, and *{version}* is 62, 7, or 8, -depending on whether libjpeg v6b (default), v7, or v8 emulation is enabled. - - -### MinGW - -NOTE: This assumes that you are building on a Windows machine using the MSYS -environment. If you are cross-compiling on a Un*x platform (including Mac and -Cygwin), then see "Build Recipes" below. - - cd {build_directory} - cmake -G"MSYS Makefiles" [additional CMake flags] {source_directory} + cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \ + -DCMAKE_POSITION_INDEPENDENT_CODE=1 \ + [additional CMake flags] {source_directory} make -This will generate the following files under *{build_directory}*: - -**libjpeg.a**
-Static link library for the libjpeg API - -**sharedlib/libjpeg-{version}.dll**
-DLL for the libjpeg API - -**sharedlib/libjpeg.dll.a**
-Import library for the libjpeg API - -**libturbojpeg.a**
-Static link library for the TurboJPEG API - -**libturbojpeg.dll**
-DLL for the TurboJPEG API - -**libturbojpeg.dll.a**
-Import library for the TurboJPEG API - -*{version}* is 62, 7, or 8, depending on whether libjpeg v6b (default), v7, or -v8 emulation is enabled. - - -### Debug Build - -Add `-DCMAKE_BUILD_TYPE=Debug` to the CMake command line. Or, if building -with NMake, remove `-DCMAKE_BUILD_TYPE=Release` (Debug builds are the default -with NMake.) - - -### libjpeg v7 or v8 API/ABI Emulation - -Add `-DWITH_JPEG7=1` to the CMake command line to build a version of -libjpeg-turbo that is API/ABI-compatible with libjpeg v7. Add `-DWITH_JPEG8=1` -to the CMake command line to build a version of libjpeg-turbo that is -API/ABI-compatible with libjpeg v8. See [README.md](README.md) for more -information about libjpeg v7 and v8 emulation. - - -### In-Memory Source/Destination Managers - -When using libjpeg v6b or v7 API/ABI emulation, add `-DWITH_MEM_SRCDST=0` to -the CMake command line to build a version of libjpeg-turbo that lacks the -`jpeg_mem_src()` and `jpeg_mem_dest()` functions. These functions were not -part of the original libjpeg v6b and v7 APIs, so removing them ensures strict -conformance with those APIs. See [README.md](README.md) for more information. - - -### Arithmetic Coding Support - -Since the patent on arithmetic coding has expired, this functionality has been -included in this release of libjpeg-turbo. libjpeg-turbo's implementation is -based on the implementation in libjpeg v8, but it works when emulating libjpeg -v7 or v6b as well. The default is to enable both arithmetic encoding and -decoding, but those who have philosophical objections to arithmetic coding can -add `-DWITH_ARITH_ENC=0` or `-DWITH_ARITH_DEC=0` to the CMake command line to -disable encoding or decoding (respectively.) - -### TurboJPEG Java Wrapper +If building for Android 4.0.x (API level < 16) or earlier, remove +`-DCMAKE_POSITION_INDEPENDENT_CODE=1` from the CMake arguments and `-pie` from +`LDFLAGS`. -Add `-DWITH_JAVA=1` to the CMake command line to incorporate an optional Java -Native Interface (JNI) wrapper into the TurboJPEG shared library and build the -Java front-end classes to support it. This allows the TurboJPEG shared library -to be used directly from Java applications. See [java/README](java/README) for -more details. +If building on Windows, add `.exe` to the end of `CMAKE_C_COMPILER`. -If Java is not in your `PATH`, or if you wish to use an alternate JDK to -build/test libjpeg-turbo, then (prior to running CMake) set the `JAVA_HOME` -environment variable to the location of the JDK that you wish to use. The -`Java_JAVAC_EXECUTABLE`, `Java_JAVA_EXECUTABLE`, and `Java_JAR_EXECUTABLE` -CMake variables can also be used to specify alternate commands or locations for -javac, jar, and java (respectively.) You can also set the `JAVACFLAGS` CMake -variable to specify arguments that should be passed to the Java compiler when -building the TurboJPEG classes. +Advanced CMake Options +---------------------- -Build Recipes -------------- +To list and configure other CMake options not specifically mentioned in this +guide, run + ccmake {source_directory} -### 32-bit MinGW Build on Un*x (including Mac and Cygwin) +or -Create a file called **toolchain.cmake** under *{build_directory}*, with the -following contents: + cmake-gui {source_directory} - set(CMAKE_SYSTEM_NAME Windows) - set(CMAKE_SYSTEM_PROCESSOR X86) - set(CMAKE_C_COMPILER {mingw_binary_path}/i686-w64-mingw32-gcc) - set(CMAKE_RC_COMPILER {mingw_binary_path}/i686-w64-mingw32-windres) - -*{mingw\_binary\_path}* is the directory under which the MinGW binaries are -located (usually **/usr/bin**.) Next, execute the following commands: - - cd {build_directory} - cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \ - [additional CMake flags] {source_directory} - make - - -### 64-bit MinGW Build on Un*x (including Mac and Cygwin) - -Create a file called **toolchain.cmake** under *{build_directory}*, with the -following contents: - - set(CMAKE_SYSTEM_NAME Windows) - set(CMAKE_SYSTEM_PROCESSOR AMD64) - set(CMAKE_C_COMPILER {mingw_binary_path}/x86_64-w64-mingw32-gcc) - set(CMAKE_RC_COMPILER {mingw_binary_path}/x86_64-w64-mingw32-windres) - -*{mingw\_binary\_path}* is the directory under which the MinGW binaries are -located (usually **/usr/bin**.) Next, execute the following commands: - - cd {build_directory} - cmake -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake \ - [additional CMake flags] {source_directory} - make +from the build directory after initially configuring the build. CCMake is a +text-based interactive version of CMake, and CMake-GUI is a GUI version. Both +will display all variables that are relevant to the libjpeg-turbo build, their +current values, and a help string describing what they do. Installing libjpeg-turbo ------------------------- +======================== You can use the build system to install libjpeg-turbo (as opposed to creating an installer package.) To do this, run `make install` or `nmake install` @@ -835,6 +732,37 @@ MinGW 32-bit build **c:\libjpeg-turbo-gcc64**
MinGW 64-bit build +**/opt/libjpeg-turbo**
+Un*x + +The default value of `CMAKE_INSTALL_PREFIX` causes the libjpeg-turbo files to +be installed with a directory structure resembling that of the official +libjpeg-turbo binary packages. Changing the value of `CMAKE_INSTALL_PREFIX` +(for instance, to **/usr/local**) causes the libjpeg-turbo files to be +installed with a directory structure that conforms to GNU standards. + +The `CMAKE_INSTALL_BINDIR`, `CMAKE_INSTALL_DATAROOTDIR`, +`CMAKE_INSTALL_DOCDIR`, `CMAKE_INSTALL_INCLUDEDIR`, `CMAKE_INSTALL_JAVADIR`, +`CMAKE_INSTALL_LIBDIR`, and `CMAKE_INSTALL_MANDIR` CMake variables allow a +finer degree of control over where specific files in the libjpeg-turbo +distribution should be installed. These directory variables can either be +specified as absolute paths or as paths relative to `CMAKE_INSTALL_PREFIX` (for +instance, setting `CMAKE_INSTALL_DOCDIR` to **doc** would cause the +documentation to be installed in **${CMAKE\_INSTALL\_PREFIX}/doc**.) If a +directory variable contains the name of another directory variable in angle +brackets, then its final value will depend on the final value of that other +variable. For instance, the default value of `CMAKE_INSTALL_MANDIR` is +**\/man**. + +NOTE: If setting one of these directory variables to a relative path using the +CMake command line, you must specify that the variable is of type `PATH`. +For example: + + cmake -G"{generator type}" -DCMAKE_INSTALL_LIBDIR:PATH=lib {source_directory} + +Otherwise, CMake will assume that the path is relative to the build directory +rather than the install directory. + Creating Distribution Packages ============================== @@ -871,40 +799,35 @@ installing Xcode 3.2.6 (with the "Unix Development" option) on OS X 10.6. Packages built in this manner can be installed on OS X 10.5 and later, but they must be built on OS X 10.6 or later. - make udmg [BUILDDIR32={32-bit build directory}] - -On 64-bit OS X systems, this creates a Mac package/disk image that contains -universal i386/x86-64 binaries. You should first configure a 32-bit -out-of-tree build of libjpeg-turbo, then configure a 64-bit out-of-tree build, -then run `make udmg` from the 64-bit build directory. The build system will -look for the 32-bit build under *{source_directory}*/osxx86 by default, but you -can override this by setting the `BUILDDIR32` variable on the make command line -as shown above. - - make iosdmg [BUILDDIR32={32-bit build directory}] \ - [BUILDDIRARMV7={ARMv7 build directory}] \ - [BUILDDIRARMV7S={ARMv7s build directory}] \ - [BUILDDIRARMV8={ARMv8 build directory}] - -This creates a Mac package/disk image in which the libjpeg-turbo libraries -contain ARM architectures necessary to build iOS applications. If building on -an x86-64 system, the binaries will also contain the i386 architecture, as with -`make udmg` above. You should first configure ARMv7, ARMv7s, and/or ARMv8 -out-of-tree builds of libjpeg-turbo (see "Building libjpeg-turbo for iOS" -above.) If you are building an x86-64 version of libjpeg-turbo, you should -configure a 32-bit out-of-tree build as well. Next, build libjpeg-turbo as you -would normally, using an out-of-tree build. When it is built, run `make -iosdmg` from the build directory. The build system will look for the ARMv7 -build under *{source_directory}*/iosarmv7 by default, the ARMv7s build under -*{source_directory}*/iosarmv7s by default, the ARMv8 build under -*{source_directory}*/iosarmv8 by default, and (if applicable) the 32-bit build -under *{source_directory}*/osxx86 by default, but you can override this by -setting the `BUILDDIR32`, `BUILDDIRARMV7`, `BUILDDIRARMV7S`, and/or -`BUILDDIRARMV8` variables on the `make` command line as shown above. - -NOTE: If including an ARMv8 build in the package, then you may need to use -Xcode's version of lipo instead of the operating system's. To do this, pass -an argument of `LIPO="xcrun lipo"` on the make command line. + make udmg + +This creates a Mac package/disk image that contains universal x86-64/i386/ARM +binaries. The following CMake variables control which architectures are +included in the universal binaries. Setting any of these variables to an empty +string excludes that architecture from the package. + +* `OSX_32BIT_BUILD`: Directory containing an i386 (32-bit) Mac build of + libjpeg-turbo (default: *{source_directory}*/osxx86) +* `IOS_ARMV7_BUILD`: Directory containing an ARMv7 (32-bit) iOS build of + libjpeg-turbo (default: *{source_directory}*/iosarmv7) +* `IOS_ARMV7S_BUILD`: Directory containing an ARMv7s (32-bit) iOS build of + libjpeg-turbo (default: *{source_directory}*/iosarmv7s) +* `IOS_ARMV8_BUILD`: Directory containing an ARMv8 (64-bit) iOS build of + libjpeg-turbo (default: *{source_directory}*/iosarmv8) + +You should first use CMake to configure i386, ARMv7, ARMv7s, and/or ARMv8 +sub-builds of libjpeg-turbo (see "Build Recipes" and "Building libjpeg-turbo +for iOS" above) in build directories that match those specified in the +aforementioned CMake variables. Next, configure the primary build of +libjpeg-turbo as an out-of-tree build, and build it. Once the primary build +has been built, run `make udmg` from the build directory. The packaging system +will build the sub-builds, use lipo to combine them into a single set of +universal binaries, then package the universal binaries in the same manner as +`make dmg`. + + +Cygwin +------ make cygwinpkg diff --git a/Brewfile b/Brewfile new file mode 100644 index 0000000..4a9cb3d --- /dev/null +++ b/Brewfile @@ -0,0 +1,4 @@ +brew 'yasm' +brew 'gcc@5' +brew 'md5sha1sum' +cask 'Caskroom/versions/java6' diff --git a/CMakeLists.txt b/CMakeLists.txt index fb5e182..efeb51a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,15 +1,11 @@ -# -# Setup -# +cmake_minimum_required(VERSION 2.8.12) -cmake_minimum_required(VERSION 2.8.11) -# Use LINK_INTERFACE_LIBRARIES instead of INTERFACE_LINK_LIBRARIES -if(POLICY CMP0022) - cmake_policy(SET CMP0022 OLD) +if(CMAKE_EXECUTABLE_SUFFIX) + set(CMAKE_EXECUTABLE_SUFFIX_TMP ${CMAKE_EXECUTABLE_SUFFIX}) endif() project(libjpeg-turbo C) -set(VERSION 1.5.3) +set(VERSION 2.0.1) string(REPLACE "." ";" VERSION_TRIPLET ${VERSION}) list(GET VERSION_TRIPLET 0 VERSION_MAJOR) list(GET VERSION_TRIPLET 1 VERSION_MINOR) @@ -29,116 +25,283 @@ pad_number(VERSION_MINOR 3) pad_number(VERSION_REVISION 3) set(LIBJPEG_TURBO_VERSION_NUMBER ${VERSION_MAJOR}${VERSION_MINOR}${VERSION_REVISION}) -if(NOT WIN32) - message(FATAL_ERROR "Platform not supported by this build system. Use autotools instead.") -endif() - -string(TIMESTAMP BUILD "%Y%m%d") +string(TIMESTAMP DEFAULT_BUILD "%Y%m%d") +set(BUILD ${DEFAULT_BUILD} CACHE STRING "Build string (default: ${DEFAULT_BUILD})") -# This does nothing except when using MinGW. CMAKE_BUILD_TYPE has no meaning -# in Visual Studio, and it always defaults to Debug when using NMake. +# NOTE: On Windows, this does nothing except when using MinGW or Cygwin. +# CMAKE_BUILD_TYPE has no meaning in Visual Studio, and it always defaults to +# Debug when using NMake. if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() - message(STATUS "CMAKE_BUILD_TYPE = ${CMAKE_BUILD_TYPE}") -# This only works if building from the command line. There is currently no way -# to set a variable's value based on the build type when using Visual Studio. -if(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(BUILD "${BUILD}d") +message(STATUS "VERSION = ${VERSION}, BUILD = ${BUILD}") + +# Detect CPU type and whether we're building 64-bit or 32-bit code +math(EXPR BITS "${CMAKE_SIZEOF_VOID_P} * 8") +string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} CMAKE_SYSTEM_PROCESSOR_LC) +if(CMAKE_SYSTEM_PROCESSOR_LC MATCHES "x86_64" OR + CMAKE_SYSTEM_PROCESSOR_LC MATCHES "amd64" OR + CMAKE_SYSTEM_PROCESSOR_LC MATCHES "i[0-9]86" OR + CMAKE_SYSTEM_PROCESSOR_LC MATCHES "x86" OR + CMAKE_SYSTEM_PROCESSOR_LC MATCHES "ia32") + if(BITS EQUAL 64) + set(CPU_TYPE x86_64) + else() + set(CPU_TYPE i386) + endif() + if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL ${CPU_TYPE}) + set(CMAKE_SYSTEM_PROCESSOR ${CPU_TYPE}) + endif() +elseif(CMAKE_SYSTEM_PROCESSOR_LC STREQUAL "aarch64" OR + CMAKE_SYSTEM_PROCESSOR_LC MATCHES "arm*64*") + set(CPU_TYPE arm64) +elseif(CMAKE_SYSTEM_PROCESSOR_LC MATCHES "arm*") + set(CPU_TYPE arm) +elseif(CMAKE_SYSTEM_PROCESSOR_LC MATCHES "ppc*" OR + CMAKE_SYSTEM_PROCESSOR_LC MATCHES "powerpc*") + set(CPU_TYPE powerpc) +else() + set(CPU_TYPE ${CMAKE_SYSTEM_PROCESSOR_LC}) endif() +message(STATUS "${BITS}-bit build (${CPU_TYPE})") -message(STATUS "VERSION = ${VERSION}, BUILD = ${BUILD}") -option(WITH_SIMD "Include SIMD extensions" TRUE) -option(WITH_ARITH_ENC "Include arithmetic encoding support when emulating the libjpeg v6b API/ABI" TRUE) +############################################################################### +# INSTALL DIRECTORIES +############################################################################### + +if(WIN32) + if(MSVC) + set(CMAKE_INSTALL_DEFAULT_PREFIX "c:/${CMAKE_PROJECT_NAME}") + else() + set(CMAKE_INSTALL_DEFAULT_PREFIX "c:/${CMAKE_PROJECT_NAME}-gcc") + endif() + if(BITS EQUAL 64) + set(CMAKE_INSTALL_DEFAULT_PREFIX "${CMAKE_INSTALL_DEFAULT_PREFIX}64") + endif() +else() + set(CMAKE_INSTALL_DEFAULT_PREFIX /opt/${CMAKE_PROJECT_NAME}) +endif() +if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set(CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_DEFAULT_PREFIX}" CACHE PATH + "Directory into which to install ${CMAKE_PROJECT_NAME} (default: ${CMAKE_INSTALL_DEFAULT_PREFIX})" + FORCE) +endif() +message(STATUS "CMAKE_INSTALL_PREFIX = ${CMAKE_INSTALL_PREFIX}") + +# When the prefix is /opt/${CMAKE_PROJECT_NAME}, we assume that an "official" +# build is being created, and thus we install things into specific locations. + +if(CMAKE_INSTALL_PREFIX STREQUAL "${CMAKE_INSTALL_DEFAULT_PREFIX}") + set(CMAKE_INSTALL_DEFAULT_DATAROOTDIR "") + set(CMAKE_INSTALL_DEFAULT_DOCDIR "/doc") + set(CMAKE_INSTALL_DEFAULT_JAVADIR "/classes") + if(UNIX AND NOT APPLE) + if(BITS EQUAL 64) + set(CMAKE_INSTALL_DEFAULT_LIBDIR "lib64") + else() + set(CMAKE_INSTALL_DEFAULT_LIBDIR "lib32") + endif() + endif() +endif() + +include(cmakescripts/GNUInstallDirs.cmake) + +if(ENABLE_SHARED) + set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_FULL_LIBDIR}) +endif() + +macro(report_directory var) + if(CMAKE_INSTALL_${var} STREQUAL CMAKE_INSTALL_FULL_${var}) + message(STATUS "CMAKE_INSTALL_${var} = ${CMAKE_INSTALL_${var}}") + else() + message(STATUS "CMAKE_INSTALL_${var} = ${CMAKE_INSTALL_${var}} (${CMAKE_INSTALL_FULL_${var}})") + endif() + mark_as_advanced(CLEAR CMAKE_INSTALL_${var}) +endmacro() + +set(DIRLIST "BINDIR;DATAROOTDIR;DOCDIR;INCLUDEDIR;LIBDIR") +if(UNIX) + list(APPEND DIRLIST "MANDIR") +endif() +foreach(dir ${DIRLIST}) + report_directory(${dir}) +endforeach() + + +############################################################################### +# CONFIGURATION OPTIONS +############################################################################### + +macro(boolean_number var) + if(${var}) + set(${var} 1) + else() + set(${var} 0) + endif() +endmacro() + +option(ENABLE_SHARED "Build shared libraries" TRUE) +boolean_number(ENABLE_SHARED) +option(ENABLE_STATIC "Build static libraries" TRUE) +boolean_number(ENABLE_STATIC) +option(REQUIRE_SIMD "Generate a fatal error if SIMD extensions are not available for this platform (default is to fall back to a non-SIMD build)" FALSE) +boolean_number(REQUIRE_SIMD) +option(WITH_12BIT "Encode/decode JPEG images with 12-bit samples (implies WITH_ARITH_DEC=0 WITH_ARITH_ENC=0 WITH_JAVA=0 WITH_SIMD=0 WITH_TURBOJPEG=0 )" FALSE) +boolean_number(WITH_12BIT) option(WITH_ARITH_DEC "Include arithmetic decoding support when emulating the libjpeg v6b API/ABI" TRUE) -option(WITH_JPEG7 "Emulate libjpeg v7 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b)" FALSE) -option(WITH_JPEG8 "Emulate libjpeg v8 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b)" FALSE) +boolean_number(WITH_ARITH_DEC) +option(WITH_ARITH_ENC "Include arithmetic encoding support when emulating the libjpeg v6b API/ABI" TRUE) +boolean_number(WITH_ARITH_ENC) +option(WITH_JAVA "Build Java wrapper for the TurboJPEG API library (implies ENABLE_SHARED=1)" FALSE) +boolean_number(WITH_JAVA) +option(WITH_JPEG7 "Emulate libjpeg v7 API/ABI (this makes ${CMAKE_PROJECT_NAME} backward-incompatible with libjpeg v6b)" FALSE) +boolean_number(WITH_JPEG7) +option(WITH_JPEG8 "Emulate libjpeg v8 API/ABI (this makes ${CMAKE_PROJECT_NAME} backward-incompatible with libjpeg v6b)" FALSE) +boolean_number(WITH_JPEG8) option(WITH_MEM_SRCDST "Include in-memory source/destination manager functions when emulating the libjpeg v6b or v7 API/ABI" TRUE) -option(WITH_TURBOJPEG "Include the TurboJPEG wrapper library and associated test programs" TRUE) -option(WITH_JAVA "Build Java wrapper for the TurboJPEG library" FALSE) -option(WITH_12BIT "Encode/decode JPEG images with 12-bit samples (implies WITH_SIMD=0 WITH_TURBOJPEG=0 WITH_ARITH_ENC=0 WITH_ARITH_DEC=0)" FALSE) -option(ENABLE_STATIC "Build static libraries" TRUE) -option(ENABLE_SHARED "Build shared libraries" TRUE) +boolean_number(WITH_MEM_SRCDST) +option(WITH_SIMD "Include SIMD extensions, if available for this platform" TRUE) +boolean_number(WITH_SIMD) +option(WITH_TURBOJPEG "Include the TurboJPEG API library and associated test programs" TRUE) +boolean_number(WITH_TURBOJPEG) + +macro(report_option var desc) + if(${var}) + message(STATUS "${desc} enabled (${var} = ${${var}})") + else() + message(STATUS "${desc} disabled (${var} = ${${var}})") + endif() +endmacro() + +if(WITH_JAVA) + set(ENABLE_SHARED 1) +endif() + +# Explicitly setting CMAKE_POSITION_INDEPENDENT_CODE=FALSE disables PIC for all +# targets, which will cause the shared library builds to fail. Thus, if shared +# libraries are enabled and CMAKE_POSITION_INDEPENDENT_CODE is explicitly set +# to FALSE, we need to unset it, thus restoring the default behavior +# (automatically using PIC for shared library targets.) +if(DEFINED CMAKE_POSITION_INDEPENDENT_CODE AND + NOT CMAKE_POSITION_INDEPENDENT_CODE AND ENABLE_SHARED) + unset(CMAKE_POSITION_INDEPENDENT_CODE CACHE) +endif() + +report_option(ENABLE_SHARED "Shared libraries") +report_option(ENABLE_STATIC "Static libraries") if(WITH_12BIT) - set(WITH_SIMD FALSE) - set(WITH_TURBOJPEG FALSE) - set(WITH_JAVA FALSE) - set(WITH_ARITH_ENC FALSE) - set(WITH_ARITH_DEC FALSE) + set(WITH_ARITH_DEC 0) + set(WITH_ARITH_ENC 0) + set(WITH_JAVA 0) + set(WITH_SIMD 0) + set(WITH_TURBOJPEG 0) set(BITS_IN_JSAMPLE 12) - message(STATUS "12-bit JPEG support enabled") else() set(BITS_IN_JSAMPLE 8) endif() +report_option(WITH_12BIT "12-bit JPEG support") if(WITH_JPEG8 OR WITH_JPEG7) set(WITH_ARITH_ENC 1) set(WITH_ARITH_DEC 1) endif() if(WITH_JPEG8) - set(WITH_MEM_SRCDST 1) + set(WITH_MEM_SRCDST 0) +endif() + +if(WITH_ARITH_DEC) + set(D_ARITH_CODING_SUPPORTED 1) +endif() +if(NOT WITH_12BIT) + report_option(WITH_ARITH_DEC "Arithmetic decoding support") endif() if(WITH_ARITH_ENC) set(C_ARITH_CODING_SUPPORTED 1) - message(STATUS "Arithmetic encoding support enabled") -else() - message(STATUS "Arithmetic encoding support disabled") endif() - -if(WITH_ARITH_DEC) - set(D_ARITH_CODING_SUPPORTED 1) - message(STATUS "Arithmetic decoding support enabled") -else() - message(STATUS "Arithmetic decoding support disabled") +if(NOT WITH_12BIT) + report_option(WITH_ARITH_ENC "Arithmetic encoding support") endif() -if(WITH_TURBOJPEG) - message(STATUS "TurboJPEG C wrapper enabled") -else() - message(STATUS "TurboJPEG C wrapper disabled") +if(NOT WITH_12BIT) + report_option(WITH_TURBOJPEG "TurboJPEG API library") + report_option(WITH_JAVA "TurboJPEG Java wrapper") endif() -if(WITH_JAVA) - message(STATUS "TurboJPEG Java wrapper enabled") -else() - message(STATUS "TurboJPEG Java wrapper disabled") +if(WITH_MEM_SRCDST) + set(MEM_SRCDST_SUPPORTED 1) + set(MEM_SRCDST_FUNCTIONS "global: jpeg_mem_dest; jpeg_mem_src;") +endif() +if(NOT WITH_JPEG8) + report_option(WITH_MEM_SRCDST "In-memory source/destination managers") endif() -set(SO_AGE 0) +set(SO_AGE 2) if(WITH_MEM_SRCDST) - set(SO_AGE 1) + set(SO_AGE 3) endif() -set(JPEG_LIB_VERSION 62) -set(DLL_VERSION ${JPEG_LIB_VERSION}) -set(FULLVERSION ${DLL_VERSION}.${SO_AGE}.0) if(WITH_JPEG8) set(JPEG_LIB_VERSION 80) - set(DLL_VERSION 8) - set(FULLVERSION ${DLL_VERSION}.0.2) - message(STATUS "Emulating libjpeg v8 API/ABI") elseif(WITH_JPEG7) set(JPEG_LIB_VERSION 70) - set(DLL_VERSION 7) - set(FULLVERSION ${DLL_VERSION}.${SO_AGE}.0) - message(STATUS "Emulating libjpeg v7 API/ABI") -endif(WITH_JPEG8) +else() + set(JPEG_LIB_VERSION 62) +endif() -if(WITH_MEM_SRCDST) - set(MEM_SRCDST_SUPPORTED 1) - message(STATUS "In-memory source/destination managers enabled") +math(EXPR JPEG_LIB_VERSION_DIV10 "${JPEG_LIB_VERSION} / 10") +math(EXPR JPEG_LIB_VERSION_MOD10 "${JPEG_LIB_VERSION} % 10") +if(JPEG_LIB_VERSION STREQUAL "62") + set(DEFAULT_SO_MAJOR_VERSION ${JPEG_LIB_VERSION}) else() - message(STATUS "In-memory source/destination managers disabled") + set(DEFAULT_SO_MAJOR_VERSION ${JPEG_LIB_VERSION_DIV10}) +endif() +if(JPEG_LIB_VERSION STREQUAL "80") + set(DEFAULT_SO_MINOR_VERSION 2) +else() + set(DEFAULT_SO_MINOR_VERSION 0) +endif() + +# This causes SO_MAJOR_VERSION/SO_MINOR_VERSION to reset to defaults if +# WITH_JPEG7 or WITH_JPEG8 has changed. +if((DEFINED WITH_JPEG7_INT AND NOT WITH_JPEG7 EQUAL WITH_JPEG7_INT) OR + (DEFINED WITH_JPEG8_INT AND NOT WITH_JPEG8 EQUAL WITH_JPEG8_INT)) + set(FORCE_SO_VERSION "FORCE") endif() +set(WITH_JPEG7_INT ${WITH_JPEG7} CACHE INTERNAL "") +set(WITH_JPEG8_INT ${WITH_JPEG8} CACHE INTERNAL "") + +set(SO_MAJOR_VERSION ${DEFAULT_SO_MAJOR_VERSION} CACHE STRING + "Major version of the libjpeg API shared library (default: ${DEFAULT_SO_MAJOR_VERSION})" + ${FORCE_SO_VERSION}) +set(SO_MINOR_VERSION ${DEFAULT_SO_MINOR_VERSION} CACHE STRING + "Minor version of the libjpeg API shared library (default: ${DEFAULT_SO_MINOR_VERSION})" + ${FORCE_SO_VERSION}) + +set(JPEG_LIB_VERSION_DECIMAL "${JPEG_LIB_VERSION_DIV10}.${JPEG_LIB_VERSION_MOD10}") +message(STATUS "Emulating libjpeg API/ABI v${JPEG_LIB_VERSION_DECIMAL} (WITH_JPEG7 = ${WITH_JPEG7}, WITH_JPEG8 = ${WITH_JPEG8})") +message(STATUS "libjpeg API shared library version = ${SO_MAJOR_VERSION}.${SO_AGE}.${SO_MINOR_VERSION}") + +# Because the TurboJPEG API library uses versioned symbols and changes the +# names of functions whenever they are modified in a backward-incompatible +# manner, it is always backward-ABI-compatible with itself, so the major and +# minor SO versions don't change. However, we increase the middle number (the +# SO "age") whenever functions are added to the API. +set(TURBOJPEG_SO_MAJOR_VERSION 0) +set(TURBOJPEG_SO_VERSION 0.2.0) + + +############################################################################### +# COMPILER SETTINGS +############################################################################### if(MSVC) option(WITH_CRT_DLL - "Link all libjpeg-turbo libraries and executables with the C run-time DLL (msvcr*.dll) instead of the static C run-time library (libcmt*.lib.) The default is to use the C run-time DLL only with the libraries and executables that need it." + "Link all ${CMAKE_PROJECT_NAME} libraries and executables with the C run-time DLL (msvcr*.dll) instead of the static C run-time library (libcmt*.lib.) The default is to use the C run-time DLL only with the libraries and executables that need it." FALSE) if(NOT WITH_CRT_DLL) # Use the static C library for all build types @@ -149,45 +312,32 @@ if(MSVC) endif() endforeach() endif() - add_definitions(-W3 -wd4996) -endif() - -# Detect whether compiler is 64-bit -if(MSVC AND CMAKE_CL_64) - set(SIMD_X86_64 1) - set(64BIT 1) -elseif(CMAKE_SIZEOF_VOID_P MATCHES 8) - set(SIMD_X86_64 1) - set(64BIT 1) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W3 /wd4996") endif() -if(64BIT) - message(STATUS "64-bit build") -else() - message(STATUS "32-bit build") +if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID STREQUAL "Clang") + # Use the maximum optimization level for release builds + foreach(var CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_RELWITHDEBINFO) + if(${var} MATCHES "-O2") + string(REGEX REPLACE "-O2" "-O3" ${var} "${${var}}") + endif() + endforeach() endif() -if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) - if(MSVC) - set(CMAKE_INSTALL_PREFIX_DEFAULT ${CMAKE_PROJECT_NAME}) - else() - set(CMAKE_INSTALL_PREFIX_DEFAULT ${CMAKE_PROJECT_NAME}-gcc) - endif() - if(64BIT) - set(CMAKE_INSTALL_PREFIX_DEFAULT ${CMAKE_INSTALL_PREFIX_DEFAULT}64) +if(CMAKE_SYSTEM_NAME STREQUAL "SunOS") + if(CMAKE_C_COMPILER_ID MATCHES "SunPro") + # Use the maximum optimization level for release builds + foreach(var CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_RELWITHDEBINFO) + if(${var} MATCHES "-xO3") + string(REGEX REPLACE "-xO3" "-xO5" ${var} "${${var}}") + endif() + if(${var} MATCHES "-xO2") + string(REGEX REPLACE "-xO2" "-xO5" ${var} "${${var}}") + endif() + endforeach() endif() - set(CMAKE_INSTALL_PREFIX "c:/${CMAKE_INSTALL_PREFIX_DEFAULT}" CACHE PATH - "Directory into which to install libjpeg-turbo (default: c:/${CMAKE_INSTALL_PREFIX_DEFAULT})" - FORCE) endif() -message(STATUS "Install directory = ${CMAKE_INSTALL_PREFIX}") - -configure_file(win/jconfig.h.in jconfig.h) -configure_file(win/jconfigint.h.in jconfigint.h) - -include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_SOURCE_DIR}) - string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC) set(EFFECTIVE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UC}}") @@ -196,26 +346,176 @@ message(STATUS "Compiler flags = ${EFFECTIVE_C_FLAGS}") set(EFFECTIVE_LD_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_${CMAKE_BUILD_TYPE_UC}}") message(STATUS "Linker flags = ${EFFECTIVE_LD_FLAGS}") -if(WITH_JAVA) - find_package(Java) - find_package(JNI) - if(DEFINED JAVACFLAGS) - message(STATUS "Java compiler flags = ${JAVACFLAGS}") +include(CheckCSourceCompiles) +include(CheckIncludeFiles) +include(CheckTypeSize) + +check_type_size("size_t" SIZE_T) +check_type_size("unsigned long" UNSIGNED_LONG) + +if(SIZE_T EQUAL UNSIGNED_LONG) + check_c_source_compiles("int main(int argc, char **argv) { unsigned long a = argc; return __builtin_ctzl(a); }" + HAVE_BUILTIN_CTZL) +endif() +if(MSVC) + check_include_files("intrin.h" HAVE_INTRIN_H) +endif() + +if(UNIX) + # Check for headers + check_include_files(locale.h HAVE_LOCALE_H) + check_include_files(stddef.h HAVE_STDDEF_H) + check_include_files(stdlib.h HAVE_STDLIB_H) + check_include_files(sys/types.h NEED_SYS_TYPES_H) + + # Check for functions + include(CheckSymbolExists) + check_symbol_exists(memset string.h HAVE_MEMSET) + check_symbol_exists(memcpy string.h HAVE_MEMCPY) + if(NOT HAVE_MEMSET AND NOT HAVE_MEMCPY) + set(NEED_BSD_STRINGS 1) + endif() + + # Check for types + check_type_size("unsigned char" UNSIGNED_CHAR) + check_type_size("unsigned short" UNSIGNED_SHORT) + + # Check for compiler features + check_c_source_compiles("int main(void) { typedef struct undefined_structure *undef_struct_ptr; undef_struct_ptr ptr = 0; return ptr != 0; }" + INCOMPLETE_TYPES) + if(INCOMPLETE_TYPES) + message(STATUS "Compiler supports pointers to undefined structures.") + else() + set(INCOMPLETE_TYPES_BROKEN 1) + message(STATUS "Compiler does not support pointers to undefined structures.") + endif() + + if(CMAKE_CROSSCOMPILING) + set(RIGHT_SHIFT_IS_UNSIGNED 0) + else() + include(CheckCSourceRuns) + check_c_source_runs(" + #include + #include + int is_shifting_signed (long arg) { + long res = arg >> 4; + if (res == -0x7F7E80CL) + return 1; /* right shift is signed */ + /* see if unsigned-shift hack will fix it. */ + /* we can't just test exact value since it depends on width of long... */ + res |= (~0L) << (32-4); + if (res == -0x7F7E80CL) + return 0; /* right shift is unsigned */ + printf(\"Right shift isn't acting as I expect it to.\\\\n\"); + printf(\"I fear the JPEG software will not work at all.\\\\n\\\\n\"); + return 0; /* try it with unsigned anyway */ + } + int main (void) { + exit(is_shifting_signed(-0x7F7E80B1L)); + }" RIGHT_SHIFT_IS_UNSIGNED) + endif() + + if(CMAKE_CROSSCOMPILING) + set(__CHAR_UNSIGNED__ 0) + else() + check_c_source_runs("int main(void) { return ((char) -1 < 0); }" + __CHAR_UNSIGNED__) + endif() +endif() + +if(MSVC) + set(INLINE_OPTIONS "__inline;inline") +else() + set(INLINE_OPTIONS "__inline__;inline") +endif() +option(FORCE_INLINE "Force function inlining" TRUE) +boolean_number(FORCE_INLINE) +if(FORCE_INLINE) + if(MSVC) + list(INSERT INLINE_OPTIONS 0 "__forceinline") + else() + list(INSERT INLINE_OPTIONS 0 "inline __attribute__((always_inline))") + list(INSERT INLINE_OPTIONS 0 "__inline__ __attribute__((always_inline))") + endif() +endif() +foreach(inline ${INLINE_OPTIONS}) + check_c_source_compiles("${inline} static int foo(void) { return 0; } int main(void) { return foo(); }" + INLINE_WORKS) + if(INLINE_WORKS) + set(INLINE ${inline}) + break() + endif() +endforeach() +if(NOT INLINE_WORKS) + message(FATAL_ERROR "Could not determine how to inline functions.") +endif() +message(STATUS "INLINE = ${INLINE} (FORCE_INLINE = ${FORCE_INLINE})") + +if(UNIX AND NOT APPLE) + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/conftest.map "VERS_1 { global: *; };") + set(CMAKE_REQUIRED_FLAGS + "-Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/conftest.map") + check_c_source_compiles("int main(void) { return 0; }" HAVE_VERSION_SCRIPT) + set(CMAKE_REQUIRED_FLAGS) + file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/conftest.map) + if(HAVE_VERSION_SCRIPT) + message(STATUS "Linker supports GNU-style version scripts") + set(MAPFLAG "-Wl,--version-script,") + set(TJMAPFLAG "-Wl,--version-script,") + else() + message(STATUS "Linker does not support GNU-style version scripts") + if(CMAKE_SYSTEM_NAME STREQUAL "SunOS") + # The Solaris linker doesn't like our version script for the libjpeg API + # library, but the version script for the TurboJPEG API library should + # still work. + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/conftest.map + "VERS_1 { global: foo; local: *; }; VERS_2 { global: foo2; } VERS_1;") + set(CMAKE_REQUIRED_FLAGS "-Wl,-M,${CMAKE_CURRENT_BINARY_DIR}/conftest.map") + check_c_source_compiles("void foo() {} void foo2() {} int main(void) { return 0; }" + HAVE_MAPFILE) + set(CMAKE_REQUIRED_FLAGS) + file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/conftest.map) + if(HAVE_MAPFILE) + message(STATUS "Linker supports mapfiles") + set(TJMAPFLAG "-Wl,-M,") + else() + message(STATUS "Linker does not support mapfiles") + endif() + endif() endif() endif() +# Generate files +if(WIN32) + configure_file(win/jconfig.h.in jconfig.h) +else() + configure_file(jconfig.h.in jconfig.h) +endif() +configure_file(jconfigint.h.in jconfigint.h) +if(UNIX) + configure_file(libjpeg.map.in libjpeg.map) +endif() + +# Include directories and compiler definitions +include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) -# -# Targets -# + +############################################################################### +# TARGETS +############################################################################### + +if(CMAKE_EXECUTABLE_SUFFIX_TMP) + set(CMAKE_EXECUTABLE_SUFFIX ${CMAKE_EXECUTABLE_SUFFIX_TMP}) +endif() +message(STATUS "CMAKE_EXECUTABLE_SUFFIX = ${CMAKE_EXECUTABLE_SUFFIX}") set(JPEG_SOURCES jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c - jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c jcphuff.c - jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c jdatadst.c jdatasrc.c - jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c jdinput.c jdmainct.c jdmarker.c - jdmaster.c jdmerge.c jdphuff.c jdpostct.c jdsample.c jdtrans.c jerror.c - jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c - jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c) + jcicc.c jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c + jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c jdatadst.c + jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c jdicc.c jdinput.c + jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c jdpostct.c jdsample.c + jdtrans.c jerror.c jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c + jidctint.c jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c) if(WITH_ARITH_ENC OR WITH_ARITH_DEC) set(JPEG_SOURCES ${JPEG_SOURCES} jaricom.c) @@ -230,90 +530,111 @@ if(WITH_ARITH_DEC) endif() if(WITH_SIMD) - add_definitions(-DWITH_SIMD) add_subdirectory(simd) - if(SIMD_X86_64) - set(JPEG_SOURCES ${JPEG_SOURCES} simd/jsimd_x86_64.c) - else() - set(JPEG_SOURCES ${JPEG_SOURCES} simd/jsimd_i386.c) +elseif(NOT WITH_12BIT) + message(STATUS "SIMD extensions: None (WITH_SIMD = ${WITH_SIMD})") +endif() +if(WITH_SIMD) + message(STATUS "SIMD extensions: ${CPU_TYPE} (WITH_SIMD = ${WITH_SIMD})") + if(MSVC_IDE) + set_source_files_properties(${SIMD_OBJS} PROPERTIES GENERATED 1) endif() - # This tells CMake that the "source" files haven't been generated yet - set_source_files_properties(${SIMD_OBJS} PROPERTIES GENERATED 1) else() - set(JPEG_SOURCES ${JPEG_SOURCES} jsimd_none.c) - message(STATUS "Not using SIMD acceleration") + add_library(simd OBJECT jsimd_none.c) endif() if(WITH_JAVA) add_subdirectory(java) - set(ENABLE_SHARED TRUE) endif() if(ENABLE_SHARED) add_subdirectory(sharedlib) endif() -if(ENABLE_STATIC OR WITH_TURBOJPEG) - add_library(jpeg-static STATIC ${JPEG_SOURCES} ${SIMD_OBJS}) +if(ENABLE_STATIC) + add_library(jpeg-static STATIC ${JPEG_SOURCES} $ + ${SIMD_OBJS}) if(NOT MSVC) set_target_properties(jpeg-static PROPERTIES OUTPUT_NAME jpeg) endif() - if(WITH_SIMD) - add_dependencies(jpeg-static simd) - endif() endif() if(WITH_TURBOJPEG) - set(TURBOJPEG_SOURCES turbojpeg.c transupp.c jdatadst-tj.c jdatasrc-tj.c) - if(WITH_JAVA) - set(TURBOJPEG_SOURCES ${TURBOJPEG_SOURCES} turbojpeg-jni.c) - include_directories(${JAVA_INCLUDE_PATH} ${JAVA_INCLUDE_PATH2}) - endif() - if(ENABLE_SHARED) + set(TURBOJPEG_SOURCES ${JPEG_SOURCES} $ ${SIMD_OBJS} + turbojpeg.c transupp.c jdatadst-tj.c jdatasrc-tj.c rdbmp.c rdppm.c + wrbmp.c wrppm.c) + set(TJMAPFILE ${CMAKE_CURRENT_SOURCE_DIR}/turbojpeg-mapfile) + if(WITH_JAVA) + set(TURBOJPEG_SOURCES ${TURBOJPEG_SOURCES} turbojpeg-jni.c) + include_directories(${JAVA_INCLUDE_PATH} ${JAVA_INCLUDE_PATH2}) + set(TJMAPFILE ${CMAKE_CURRENT_SOURCE_DIR}/turbojpeg-mapfile.jni) + endif() add_library(turbojpeg SHARED ${TURBOJPEG_SOURCES}) - set_target_properties(turbojpeg PROPERTIES DEFINE_SYMBOL DLLDEFINE) + set_property(TARGET turbojpeg PROPERTY COMPILE_FLAGS + "-DBMP_SUPPORTED -DPPM_SUPPORTED") + if(WIN32) + set_target_properties(turbojpeg PROPERTIES DEFINE_SYMBOL DLLDEFINE) + endif() if(MINGW) set_target_properties(turbojpeg PROPERTIES LINK_FLAGS -Wl,--kill-at) endif() - target_link_libraries(turbojpeg jpeg-static) - set_target_properties(turbojpeg PROPERTIES LINK_INTERFACE_LIBRARIES "") + if(APPLE AND (NOT CMAKE_OSX_DEPLOYMENT_TARGET OR + CMAKE_OSX_DEPLOYMENT_TARGET VERSION_GREATER 10.4)) + if(NOT CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG) + set(CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG "-Wl,-rpath,") + endif() + set_target_properties(turbojpeg PROPERTIES MACOSX_RPATH 1) + endif() + set_target_properties(turbojpeg PROPERTIES + SOVERSION ${TURBOJPEG_SO_MAJOR_VERSION} VERSION ${TURBOJPEG_SO_VERSION}) + if(TJMAPFLAG) + set_target_properties(turbojpeg PROPERTIES + LINK_FLAGS "${TJMAPFLAG}${TJMAPFILE}") + endif() - add_executable(tjunittest tjunittest.c tjutil.c) + add_executable(tjunittest tjunittest.c tjutil.c md5/md5.c md5/md5hl.c) target_link_libraries(tjunittest turbojpeg) - add_executable(tjbench tjbench.c bmp.c tjutil.c rdbmp.c rdppm.c wrbmp.c - wrppm.c) - target_link_libraries(tjbench turbojpeg jpeg-static) - set_property(TARGET tjbench PROPERTY COMPILE_FLAGS - "-DBMP_SUPPORTED -DPPM_SUPPORTED") + add_executable(tjbench tjbench.c tjutil.c) + target_link_libraries(tjbench turbojpeg) + if(UNIX) + target_link_libraries(tjbench m) + endif() + + add_executable(tjexample tjexample.c) + target_link_libraries(tjexample turbojpeg) endif() if(ENABLE_STATIC) - add_library(turbojpeg-static STATIC ${JPEG_SOURCES} ${SIMD_OBJS} - turbojpeg.c transupp.c jdatadst-tj.c jdatasrc-tj.c) + add_library(turbojpeg-static STATIC ${JPEG_SOURCES} $ + ${SIMD_OBJS} turbojpeg.c transupp.c jdatadst-tj.c jdatasrc-tj.c rdbmp.c + rdppm.c wrbmp.c wrppm.c) + set_property(TARGET turbojpeg-static PROPERTY COMPILE_FLAGS + "-DBMP_SUPPORTED -DPPM_SUPPORTED") if(NOT MSVC) set_target_properties(turbojpeg-static PROPERTIES OUTPUT_NAME turbojpeg) endif() - if(WITH_SIMD) - add_dependencies(turbojpeg-static simd) - endif() - add_executable(tjunittest-static tjunittest.c tjutil.c) + add_executable(tjunittest-static tjunittest.c tjutil.c md5/md5.c + md5/md5hl.c) target_link_libraries(tjunittest-static turbojpeg-static) - add_executable(tjbench-static tjbench.c bmp.c tjutil.c rdbmp.c rdppm.c - wrbmp.c wrppm.c) - target_link_libraries(tjbench-static turbojpeg-static jpeg-static) - set_property(TARGET tjbench-static PROPERTY COMPILE_FLAGS - "-DBMP_SUPPORTED -DPPM_SUPPORTED") + add_executable(tjbench-static tjbench.c tjutil.c) + target_link_libraries(tjbench-static turbojpeg-static) + if(UNIX) + target_link_libraries(tjbench-static m) + endif() endif() endif() +if(WIN32) + set(USE_SETMODE "-DUSE_SETMODE") +endif() if(WITH_12BIT) - set(COMPILE_FLAGS "-DGIF_SUPPORTED -DPPM_SUPPORTED -DUSE_SETMODE") + set(COMPILE_FLAGS "-DGIF_SUPPORTED -DPPM_SUPPORTED ${USE_SETMODE}") else() - set(COMPILE_FLAGS "-DBMP_SUPPORTED -DGIF_SUPPORTED -DPPM_SUPPORTED -DTARGA_SUPPORTED -DUSE_SETMODE") + set(COMPILE_FLAGS "-DBMP_SUPPORTED -DGIF_SUPPORTED -DPPM_SUPPORTED -DTARGA_SUPPORTED ${USE_SETMODE}") set(CJPEG_BMP_SOURCES rdbmp.c rdtarga.c) set(DJPEG_BMP_SOURCES wrbmp.c wrtarga.c) endif() @@ -331,7 +652,7 @@ if(ENABLE_STATIC) add_executable(jpegtran-static jpegtran.c cdjpeg.c rdswitch.c transupp.c) target_link_libraries(jpegtran-static jpeg-static) - set_property(TARGET jpegtran-static PROPERTY COMPILE_FLAGS "-DUSE_SETMODE") + set_property(TARGET jpegtran-static PROPERTY COMPILE_FLAGS "${USE_SETMODE}") endif() add_executable(rdjpgcom rdjpgcom.c) @@ -339,9 +660,9 @@ add_executable(rdjpgcom rdjpgcom.c) add_executable(wrjpgcom wrjpgcom.c) -# -# Tests -# +############################################################################### +# TESTS +############################################################################### add_subdirectory(md5) @@ -355,7 +676,8 @@ enable_testing() if(WITH_12BIT) set(TESTORIG testorig12.jpg) - set(MD5_JPEG_RGB_ISLOW 9620f424569594bb9242b48498ad801f) + set(MD5_JPEG_RGB_ISLOW 9d7369207c520d37f2c1cbfcb82b2964) + set(MD5_JPEG_RGB_ISLOW2 a00bd20d8ae49684640ef7177d2e0b64) set(MD5_PPM_RGB_ISLOW f3301d2219783b8b3d942b7239fa50c0) set(MD5_JPEG_422_IFAST_OPT 7322e3bd2f127f7de4b40d4480ce60e4) set(MD5_PPM_422_IFAST 79807fa552899e66a04708f533e16950) @@ -367,20 +689,20 @@ if(WITH_12BIT) set(MD5_PPM_GRAY_ISLOW 7213c10af507ad467da5578ca5ee1fca) set(MD5_PPM_GRAY_ISLOW_RGB e96ee81c30a6ed422d466338bd3de65d) set(MD5_JPEG_420S_IFAST_OPT 7af8e60be4d9c227ec63ac9b6630855e) - if(64BIT) - # Windows/x64 uses SSE for floating point - set(MD5_JPEG_3x2_FLOAT_PROG a8c17daf77b457725ec929e215b603f8) - set(MD5_PPM_3x2_FLOAT 42876ab9e5c2f76a87d08db5fbd57956) - else() - # Windows/x86 uses the 387 FPU for floating point - if(MSVC) - set(MD5_JPEG_3x2_FLOAT_PROG e27840755870fa849872e58aa0cd1400) - set(MD5_PPM_3x2_FLOAT 6c2880b83bb1aa41dfe330e7a9768690) - else() - set(MD5_JPEG_3x2_FLOAT_PROG bc6dbbefac2872f6b9d6c4a0ae60c3c0) - set(MD5_PPM_3x2_FLOAT f58119ee294198ac9b4a9f5645a34266) - endif() - endif() + + set(MD5_JPEG_3x2_FLOAT_PROG_SSE a8c17daf77b457725ec929e215b603f8) + set(MD5_PPM_3x2_FLOAT_SSE 42876ab9e5c2f76a87d08db5fbd57956) + set(MD5_JPEG_3x2_FLOAT_PROG_32BIT a8c17daf77b457725ec929e215b603f8) + set(MD5_PPM_3x2_FLOAT_32BIT ${MD5_PPM_3x2_FLOAT_SSE}) + set(MD5_JPEG_3x2_FLOAT_PROG_64BIT ${MD5_JPEG_3x2_FLOAT_PROG_32BIT}) + set(MD5_PPM_3x2_FLOAT_64BIT ${MD5_PPM_3x2_FLOAT_SSE}) + set(MD5_JPEG_3x2_FLOAT_PROG_387 bc6dbbefac2872f6b9d6c4a0ae60c3c0) + set(MD5_PPM_3x2_FLOAT_387 bcc5723c61560463ac60f772e742d092) + set(MD5_JPEG_3x2_FLOAT_PROG_MSVC e27840755870fa849872e58aa0cd1400) + set(MD5_PPM_3x2_FLOAT_MSVC 6c2880b83bb1aa41dfe330e7a9768690) + + set(MD5_JPEG_3x2_IFAST_PROG 1396cc2b7185cfe943d408c9d305339e) + set(MD5_PPM_3x2_IFAST 3975985ef6eeb0a2cdc58daa651ccc00) set(MD5_PPM_420M_ISLOW_2_1 4ca6be2a6f326ff9eaab63e70a8259c0) set(MD5_PPM_420M_ISLOW_15_8 12aa9f9534c1b3d7ba047322226365eb) set(MD5_PPM_420M_ISLOW_13_8 f7e22817c7b25e1393e4ec101e9d4e96) @@ -400,7 +722,8 @@ if(WITH_12BIT) set(MD5_JPEG_CROP cdb35ff4b4519392690ea040c56ea99c) else() set(TESTORIG testorig.jpg) - set(MD5_JPEG_RGB_ISLOW 768e970dd57b340ff1b83c9d3d47c77b) + set(MD5_JPEG_RGB_ISLOW 1d44a406f61da743b5fd31c0a9abdca3) + set(MD5_JPEG_RGB_ISLOW2 31d121e57b6c2934c890a7fc7763bcd4) set(MD5_PPM_RGB_ISLOW 00a257f5393fef8821f2b88ac7421291) set(MD5_BMP_RGB_ISLOW_565 f07d2e75073e4bb10f6c6f4d36e2e3be) set(MD5_BMP_RGB_ISLOW_565D 4cfa0928ef3e6bb626d7728c924cfda4) @@ -418,23 +741,20 @@ else() set(MD5_BMP_GRAY_ISLOW_565 12f78118e56a2f48b966f792fedf23cc) set(MD5_BMP_GRAY_ISLOW_565D bdbbd616441a24354c98553df5dc82db) set(MD5_JPEG_420S_IFAST_OPT 388708217ac46273ca33086b22827ed8) - if(WITH_SIMD) - set(MD5_JPEG_3x2_FLOAT_PROG 343e3f8caf8af5986ebaf0bdc13b5c71) - set(MD5_PPM_3x2_FLOAT 1a75f36e5904d6fc3a85a43da9ad89bb) - else() - if(64BIT) - set(MD5_JPEG_3x2_FLOAT_PROG 9bca803d2042bd1eb03819e2bf92b3e5) - set(MD5_PPM_3x2_FLOAT f6bfab038438ed8f5522fbd33595dcdc) - else() - if(MSVC) - set(MD5_JPEG_3x2_FLOAT_PROG 7999ce9cd0ee9b6c7043b7351ab7639d) - set(MD5_PPM_3x2_FLOAT 28cdc448a6b75e97892f0e0f8d4b21f3) - else() - set(MD5_JPEG_3x2_FLOAT_PROG 1657664a410e0822c924b54f6f65e6e9) - set(MD5_PPM_3x2_FLOAT cb0a1f027f3d2917c902b5640214e025) - endif() - endif() - endif() + + set(MD5_JPEG_3x2_FLOAT_PROG_SSE 343e3f8caf8af5986ebaf0bdc13b5c71) + set(MD5_PPM_3x2_FLOAT_SSE 1a75f36e5904d6fc3a85a43da9ad89bb) + set(MD5_JPEG_3x2_FLOAT_PROG_32BIT 9bca803d2042bd1eb03819e2bf92b3e5) + set(MD5_PPM_3x2_FLOAT_32BIT f6bfab038438ed8f5522fbd33595dcdc) + set(MD5_JPEG_3x2_FLOAT_PROG_64BIT ${MD5_JPEG_3x2_FLOAT_PROG_32BIT}) + set(MD5_PPM_3x2_FLOAT_64BIT 0e917a34193ef976b679a6b069b1be26) + set(MD5_JPEG_3x2_FLOAT_PROG_387 1657664a410e0822c924b54f6f65e6e9) + set(MD5_PPM_3x2_FLOAT_387 cb0a1f027f3d2917c902b5640214e025) + set(MD5_JPEG_3x2_FLOAT_PROG_MSVC 7999ce9cd0ee9b6c7043b7351ab7639d) + set(MD5_PPM_3x2_FLOAT_MSVC 28cdc448a6b75e97892f0e0f8d4b21f3) + + set(MD5_JPEG_3x2_IFAST_PROG 1ee5d2c1a77f2da495f993c8c7cceca5) + set(MD5_PPM_3x2_IFAST fd283664b3b49127984af0a7f118fccd) set(MD5_JPEG_420_ISLOW_ARI e986fb0a637a8d833d96e8a6d6d84ea1) set(MD5_JPEG_444_ISLOW_PROGARI 0a8f1c8f66e113c3cf635df0a475a617) set(MD5_PPM_420M_IFAST_ARI 72b59a99bcf1de24c5b27d151bde2437) @@ -455,7 +775,7 @@ else() set(MD5_BMP_420_ISLOW_565 bf9d13e16c4923b92e1faa604d7922cb) set(MD5_BMP_420_ISLOW_565D 6bde71526acc44bcff76f696df8638d2) set(MD5_BMP_420M_ISLOW_565 8dc0185245353cfa32ad97027342216f) - set(MD5_BMP_420M_ISLOW_565D d1be3a3339166255e76fa50a0d70d73e) + set(MD5_BMP_420M_ISLOW_565D ce034037d212bc403330df6f915c161b) set(MD5_PPM_420_ISLOW_SKIP15_31 c4c65c1e43d7275cd50328a61e6534f0) set(MD5_PPM_420_ISLOW_ARI_SKIP16_139 087c6b123db16ac00cb88c5b590bb74a) set(MD5_PPM_420_ISLOW_PROG_CROP62x62_71_71 26eb36ccc7d1f0cb80cdabb0ac8b5d99) @@ -468,27 +788,27 @@ endif() if(WITH_JAVA) add_test(TJUnitTest - ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar + ${Java_JAVA_EXECUTABLE} ${JAVAARGS} -cp java/turbojpeg.jar -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR} TJUnitTest) add_test(TJUnitTest-yuv - ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar + ${Java_JAVA_EXECUTABLE} ${JAVAARGS} -cp java/turbojpeg.jar -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR} TJUnitTest -yuv) add_test(TJUnitTest-yuv-nopad - ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar + ${Java_JAVA_EXECUTABLE} ${JAVAARGS} -cp java/turbojpeg.jar -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR} TJUnitTest -yuv -noyuvpad) add_test(TJUnitTest-bi - ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar + ${Java_JAVA_EXECUTABLE} ${JAVAARGS} -cp java/turbojpeg.jar -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR} TJUnitTest -bi) add_test(TJUnitTest-bi-yuv - ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar + ${Java_JAVA_EXECUTABLE} ${JAVAARGS} -cp java/turbojpeg.jar -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR} TJUnitTest -bi -yuv) add_test(TJUnitTest-bi-yuv-nopad - ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar + ${Java_JAVA_EXECUTABLE} ${JAVAARGS} -cp java/turbojpeg.jar -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR} TJUnitTest -bi -yuv -noyuvpad) endif() @@ -501,214 +821,325 @@ if(ENABLE_STATIC) set(TEST_LIBTYPES ${TEST_LIBTYPES} static) endif() -set(TESTIMAGES ${CMAKE_SOURCE_DIR}/testimages) +set(TESTIMAGES ${CMAKE_CURRENT_SOURCE_DIR}/testimages) set(MD5CMP ${CMAKE_CURRENT_BINARY_DIR}/md5/md5cmp) if(CMAKE_CROSSCOMPILING) file(RELATIVE_PATH TESTIMAGES ${CMAKE_CURRENT_BINARY_DIR} ${TESTIMAGES}) file(RELATIVE_PATH MD5CMP ${CMAKE_CURRENT_BINARY_DIR} ${MD5CMP}) endif() +# The output of the floating point DCT/IDCT algorithms differs depending on the +# type of floating point math used, so the FLOATTEST CMake variable must be +# set in order to tell the testing system which floating point results it +# should expect: +# +# sse = validate against the expected results from the libjpeg-turbo SSE SIMD +# extensions +# 32bit = validate against the expected results from the C code when running on +# a 32-bit FPU (or when SSE is being used for floating point math, +# which is generally the default with x86-64 compilers) +# 64bit = validate against the expected results from the C code when running +# on a 64-bit FPU +# 387 = validate against the expected results from the C code when the 387 FPU +# is being used for floating point math (which is generally the default +# with x86 compilers) +# msvc = validate against the expected results from the C code when compiled +# with a 32-bit version of Visual C++ + +if(CPU_TYPE STREQUAL "x86_64" OR CPU_TYPE STREQUAL "i386") + if(WITH_SIMD) + set(DEFAULT_FLOATTEST sse) + elseif(CPU_TYPE STREQUAL "x86_64") + set(DEFAULT_FLOATTEST 32bit) + elseif(CPU_TYPE STREQUAL "i386" AND MSVC) + set(DEFAULT_FLOATTEST msvc) + endif() +else() + if(BITS EQUAL 64) + set(DEFAULT_FLOATTEST 64bit) + elseif(BITS EQUAL 32) + set(DEFAULT_FLOATTEST 32bit) + endif() +endif() + +# This causes FLOATTEST to reset to the default value if WITH_SIMD has +# changed. +if(DEFINED WITH_SIMD_INT AND NOT WITH_SIMD EQUAL WITH_SIMD_INT) + set(FORCE_FLOATTEST "FORCE") +endif() +set(WITH_SIMD_INT ${WITH_SIMD} CACHE INTERNAL "") +set(FLOATTEST ${DEFAULT_FLOATTEST} CACHE STRING + "The type of floating point math used by the floating point DCT/IDCT algorithms. This tells the testing system which numerical results it should expect from those tests. [sse = libjpeg-turbo x86/x86-64 SIMD extensions, 32bit = generic 32-bit FPU or SSE, 64bit = generic 64-bit FPU, 387 = 387 FPU, msvc = 32-bit Visual Studio] (default = ${DEFAULT_FLOATTEST})" + ${FORCE_FLOATTEST}) +message(STATUS "FLOATTEST = ${FLOATTEST}") + +if(FLOATTEST) + string(TOUPPER ${FLOATTEST} FLOATTEST_UC) + string(TOLOWER ${FLOATTEST} FLOATTEST) + if(NOT FLOATTEST STREQUAL "sse" AND NOT FLOATTEST STREQUAL "32bit" AND + NOT FLOATTEST STREQUAL "64bit" AND NOT FLOATTEST STREQUAL "387" AND + NOT FLOATTEST STREQUAL "msvc") + message(FATAL_ERROR "\"${FLOATTEST}\" is not a valid value for FLOATTEST.") + endif() +endif() + foreach(libtype ${TEST_LIBTYPES}) - if(libtype STREQUAL "shared") - set(dir sharedlib/) - else() - set(dir "") + if(libtype STREQUAL "static") set(suffix -static) endif() if(WITH_TURBOJPEG) - add_test(tjunittest${suffix} tjunittest${suffix}) - add_test(tjunittest${suffix}-alloc tjunittest${suffix} -alloc) - add_test(tjunittest${suffix}-yuv tjunittest${suffix} -yuv) - add_test(tjunittest${suffix}-yuv-alloc tjunittest${suffix} -yuv -alloc) - add_test(tjunittest${suffix}-yuv-nopad tjunittest${suffix} -yuv -noyuvpad) + add_test(tjunittest-${libtype} tjunittest${suffix}) + add_test(tjunittest-${libtype}-alloc tjunittest${suffix} -alloc) + add_test(tjunittest-${libtype}-yuv tjunittest${suffix} -yuv) + add_test(tjunittest-${libtype}-yuv-alloc tjunittest${suffix} -yuv -alloc) + add_test(tjunittest-${libtype}-yuv-nopad tjunittest${suffix} -yuv -noyuvpad) + add_test(tjunittest-${libtype}-bmp tjunittest${suffix} -bmp) + + set(MD5_PPM_GRAY_TILE 89d3ca21213d9d864b50b4e4e7de4ca6) + set(MD5_PPM_420_8x8_TILE 847fceab15c5b7b911cb986cf0f71de3) + set(MD5_PPM_420_16x16_TILE ca45552a93687e078f7137cc4126a7b0) + set(MD5_PPM_420_32x32_TILE d8676f1d6b68df358353bba9844f4a00) + set(MD5_PPM_420_64x64_TILE 4e4c1a3d7ea4bace4f868bcbe83b7050) + set(MD5_PPM_420_128x128_TILE f24c3429c52265832beab9df72a0ceae) + set(MD5_PPM_420M_8x8_TILE bc25320e1f4c31ce2e610e43e9fd173c) + set(MD5_PPM_420M_TILE 75ffdf14602258c5c189522af57fa605) + set(MD5_PPM_422_8x8_TILE d83dacd9fc73b0a6f10c09acad64eb1e) + set(MD5_PPM_422_16x16_TILE 35077fb610d72dd743b1eb0cbcfe10fb) + set(MD5_PPM_422_32x32_TILE e6902ed8a449ecc0f0d6f2bf945f65f7) + set(MD5_PPM_422_64x64_TILE 2b4502a8f316cedbde1da7bce3d2231e) + set(MD5_PPM_422_128x128_TILE f0b5617d578f5e13c8eee215d64d4877) + set(MD5_PPM_422M_8x8_TILE 828941d7f41cd6283abd6beffb7fd51d) + set(MD5_PPM_422M_TILE e877ae1324c4a280b95376f7f018172f) + set(MD5_PPM_444_TILE 7964e41e67cfb8d0a587c0aa4798f9c3) + + # Test compressing from/decompressing to an arbitrary subregion of a larger + # image buffer + add_test(tjbench-${libtype}-tile-cp + ${CMAKE_COMMAND} -E copy_if_different ${TESTIMAGES}/testorig.ppm + testout_tile.ppm) + add_test(tjbench-${libtype}-tile + tjbench${suffix} testout_tile.ppm 95 -rgb -quiet -tile -benchtime 0.01 + -warmup 0) + set_tests_properties(tjbench-${libtype}-tile + PROPERTIES DEPENDS tjbench-${libtype}-tile-cp) + + foreach(tile 8 16 32 64 128) + add_test(tjbench-${libtype}-tile-gray-${tile}x${tile}-cmp + ${MD5CMP} ${MD5_PPM_GRAY_TILE} + testout_tile_GRAY_Q95_${tile}x${tile}.ppm) + foreach(subsamp 420 422) + add_test(tjbench-${libtype}-tile-${subsamp}-${tile}x${tile}-cmp + ${MD5CMP} ${MD5_PPM_${subsamp}_${tile}x${tile}_TILE} + testout_tile_${subsamp}_Q95_${tile}x${tile}.ppm) + endforeach() + add_test(tjbench-${libtype}-tile-444-${tile}x${tile}-cmp + ${MD5CMP} ${MD5_PPM_444_TILE} + testout_tile_444_Q95_${tile}x${tile}.ppm) + foreach(subsamp gray 420 422 444) + set_tests_properties(tjbench-${libtype}-tile-${subsamp}-${tile}x${tile}-cmp + PROPERTIES DEPENDS tjbench-${libtype}-tile) + endforeach() + endforeach() + + add_test(tjbench-${libtype}-tilem-cp + ${CMAKE_COMMAND} -E copy_if_different ${TESTIMAGES}/testorig.ppm + testout_tilem.ppm) + add_test(tjbench-${libtype}-tilem + tjbench${suffix} testout_tilem.ppm 95 -rgb -fastupsample -quiet -tile + -benchtime 0.01 -warmup 0) + set_tests_properties(tjbench-${libtype}-tilem + PROPERTIES DEPENDS tjbench-${libtype}-tilem-cp) + + add_test(tjbench-${libtype}-tile-420m-8x8-cmp + ${MD5CMP} ${MD5_PPM_420M_8x8_TILE} testout_tilem_420_Q95_8x8.ppm) + add_test(tjbench-${libtype}-tile-422m-8x8-cmp + ${MD5CMP} ${MD5_PPM_422M_8x8_TILE} testout_tilem_422_Q95_8x8.ppm) + foreach(tile 16 32 64 128) + foreach(subsamp 420 422) + add_test(tjbench-${libtype}-tile-${subsamp}m-${tile}x${tile}-cmp + ${MD5CMP} ${MD5_PPM_${subsamp}M_TILE} + testout_tilem_${subsamp}_Q95_${tile}x${tile}.ppm) + endforeach() + endforeach() + foreach(tile 8 16 32 64 128) + foreach(subsamp 420 422) + set_tests_properties(tjbench-${libtype}-tile-${subsamp}m-${tile}x${tile}-cmp + PROPERTIES DEPENDS tjbench-${libtype}-tilem) + endforeach() + endforeach() endif() - # These tests are carefully chosen to provide full coverage of as many of the - # underlying algorithms as possible (including all of the SIMD-accelerated - # ones.) + # These tests are carefully crafted to provide full coverage of as many of + # the underlying algorithms as possible (including all of the + # SIMD-accelerated ones.) + + macro(add_bittest PROG NAME ARGS OUTFILE INFILE MD5SUM) + add_test(${PROG}-${libtype}-${NAME} + ${PROG}${suffix} ${ARGS} -outfile ${OUTFILE} ${INFILE}) + add_test(${PROG}-${libtype}-${NAME}-cmp + ${MD5CMP} ${MD5SUM} ${OUTFILE}) + set_tests_properties(${PROG}-${libtype}-${NAME}-cmp PROPERTIES + DEPENDS ${PROG}-${libtype}-${NAME}) + if(${ARGC} GREATER 6) + set(DEPENDS ${ARGN}) + set_tests_properties(${PROG}-${libtype}-${NAME} PROPERTIES + DEPENDS ${DEPENDS}) + endif() + endmacro() # CC: null SAMP: fullsize FDCT: islow ENT: huff - add_test(cjpeg${suffix}-rgb-islow - ${dir}cjpeg${suffix} -rgb -dct int - -outfile testout_rgb_islow.jpg ${TESTIMAGES}/testorig.ppm) - add_test(cjpeg${suffix}-rgb-islow-cmp - ${MD5CMP} ${MD5_JPEG_RGB_ISLOW} testout_rgb_islow.jpg) + add_bittest(cjpeg rgb-islow "-rgb;-dct;int;-icc;${TESTIMAGES}/test1.icc" + testout_rgb_islow.jpg ${TESTIMAGES}/testorig.ppm + ${MD5_JPEG_RGB_ISLOW}) # CC: null SAMP: fullsize IDCT: islow ENT: huff - add_test(djpeg${suffix}-rgb-islow - ${dir}djpeg${suffix} -dct int -ppm - -outfile testout_rgb_islow.ppm testout_rgb_islow.jpg) - add_test(djpeg${suffix}-rgb-islow-cmp - ${MD5CMP} ${MD5_PPM_RGB_ISLOW} testout_rgb_islow.ppm) + add_bittest(djpeg rgb-islow "-dct;int;-ppm;-icc;testout_rgb_islow.icc" + testout_rgb_islow.ppm testout_rgb_islow.jpg + ${MD5_PPM_RGB_ISLOW} cjpeg-${libtype}-rgb-islow) + + add_test(djpeg-${libtype}-rgb-islow-icc-cmp + ${MD5CMP} b06a39d730129122e85c1363ed1bbc9e testout_rgb_islow.icc) + + add_bittest(jpegtran icc "-copy;all;-icc;${TESTIMAGES}/test2.icc" + testout_rgb_islow2.jpg testout_rgb_islow.jpg ${MD5_JPEG_RGB_ISLOW2}) if(NOT WITH_12BIT) # CC: RGB->RGB565 SAMP: fullsize IDCT: islow ENT: huff - add_test(djpeg${suffix}-rgb-islow-565 - ${dir}djpeg${suffix} -dct int -rgb565 -dither none -bmp - -outfile testout_rgb_islow_565.bmp testout_rgb_islow.jpg) - add_test(djpeg${suffix}-rgb-islow-565-cmp - ${MD5CMP} ${MD5_BMP_RGB_ISLOW_565} testout_rgb_islow_565.bmp) + add_bittest(djpeg rgb-islow-565 "-dct;int;-rgb565;-dither;none;-bmp" + testout_rgb_islow_565.bmp testout_rgb_islow.jpg + ${MD5_BMP_RGB_ISLOW_565} cjpeg-${libtype}-rgb-islow) # CC: RGB->RGB565 (dithered) SAMP: fullsize IDCT: islow ENT: huff - add_test(djpeg${suffix}-rgb-islow-565D - ${dir}djpeg${suffix} -dct int -rgb565 -bmp - -outfile testout_rgb_islow_565D.bmp testout_rgb_islow.jpg) - add_test(djpeg${suffix}-rgb-islow-565D-cmp - ${MD5CMP} ${MD5_BMP_RGB_ISLOW_565D} testout_rgb_islow_565D.bmp) + add_bittest(djpeg rgb-islow-565D "-dct;int;-rgb565;-bmp" + testout_rgb_islow_565D.bmp testout_rgb_islow.jpg + ${MD5_BMP_RGB_ISLOW_565D} cjpeg-${libtype}-rgb-islow) endif() # CC: RGB->YCC SAMP: fullsize/h2v1 FDCT: ifast ENT: 2-pass huff - add_test(cjpeg${suffix}-422-ifast-opt - ${dir}cjpeg${suffix} -sample 2x1 -dct fast -opt - -outfile testout_422_ifast_opt.jpg ${TESTIMAGES}/testorig.ppm) - add_test(cjpeg${suffix}-422-ifast-opt-cmp - ${MD5CMP} ${MD5_JPEG_422_IFAST_OPT} testout_422_ifast_opt.jpg) + add_bittest(cjpeg 422-ifast-opt "-sample;2x1;-dct;fast;-opt" + testout_422_ifast_opt.jpg ${TESTIMAGES}/testorig.ppm + ${MD5_JPEG_422_IFAST_OPT}) # CC: YCC->RGB SAMP: fullsize/h2v1 fancy IDCT: ifast ENT: huff - add_test(djpeg${suffix}-422-ifast - ${dir}djpeg${suffix} -dct fast - -outfile testout_422_ifast.ppm testout_422_ifast_opt.jpg) - add_test(djpeg${suffix}-422-ifast-cmp - ${MD5CMP} ${MD5_PPM_422_IFAST} testout_422_ifast.ppm) + add_bittest(djpeg 422-ifast "-dct;fast" + testout_422_ifast.ppm testout_422_ifast_opt.jpg + ${MD5_PPM_422_IFAST} cjpeg-${libtype}-422-ifast-opt) # CC: YCC->RGB SAMP: h2v1 merged IDCT: ifast ENT: huff - add_test(djpeg${suffix}-422m-ifast - ${dir}djpeg${suffix} -dct fast -nosmooth - -outfile testout_422m_ifast.ppm testout_422_ifast_opt.jpg) - add_test(djpeg${suffix}-422m-ifast-cmp - ${MD5CMP} ${MD5_PPM_422M_IFAST} testout_422m_ifast.ppm) + add_bittest(djpeg 422m-ifast "-dct;fast;-nosmooth" + testout_422m_ifast.ppm testout_422_ifast_opt.jpg + ${MD5_PPM_422M_IFAST} cjpeg-${libtype}-422-ifast-opt) if(NOT WITH_12BIT) # CC: YCC->RGB565 SAMP: h2v1 merged IDCT: ifast ENT: huff - add_test(djpeg${suffix}-422m-ifast-565 - ${dir}djpeg${suffix} -dct int -nosmooth -rgb565 -dither none -bmp - -outfile testout_422m_ifast_565.bmp testout_422_ifast_opt.jpg) - add_test(djpeg${suffix}-422m-ifast-565-cmp - ${MD5CMP} ${MD5_BMP_422M_IFAST_565} testout_422m_ifast_565.bmp) + add_bittest(djpeg 422m-ifast-565 + "-dct;int;-nosmooth;-rgb565;-dither;none;-bmp" + testout_422m_ifast_565.bmp testout_422_ifast_opt.jpg + ${MD5_BMP_422M_IFAST_565} cjpeg-${libtype}-422-ifast-opt) # CC: YCC->RGB565 (dithered) SAMP: h2v1 merged IDCT: ifast ENT: huff - add_test(djpeg${suffix}-422m-ifast-565D - ${dir}djpeg${suffix} -dct int -nosmooth -rgb565 -bmp - -outfile testout_422m_ifast_565D.bmp testout_422_ifast_opt.jpg) - add_test(djpeg${suffix}-422m-ifast-565D-cmp - ${MD5CMP} ${MD5_BMP_422M_IFAST_565D} testout_422m_ifast_565D.bmp) + add_bittest(djpeg 422m-ifast-565D "-dct;int;-nosmooth;-rgb565;-bmp" + testout_422m_ifast_565D.bmp testout_422_ifast_opt.jpg + ${MD5_BMP_422M_IFAST_565D} cjpeg-${libtype}-422-ifast-opt) endif() # CC: RGB->YCC SAMP: fullsize/h2v2 FDCT: ifast ENT: prog huff - add_test(cjpeg${suffix}-420-q100-ifast-prog - ${dir}cjpeg${suffix} -sample 2x2 -quality 100 -dct fast -prog - -outfile testout_420_q100_ifast_prog.jpg ${TESTIMAGES}/testorig.ppm) - add_test(cjpeg${suffix}-420-q100-ifast-prog-cmp - ${MD5CMP} ${MD5_JPEG_420_IFAST_Q100_PROG} testout_420_q100_ifast_prog.jpg) + add_bittest(cjpeg 420-q100-ifast-prog + "-sample;2x2;-quality;100;-dct;fast;-prog" + testout_420_q100_ifast_prog.jpg ${TESTIMAGES}/testorig.ppm + ${MD5_JPEG_420_IFAST_Q100_PROG}) # CC: YCC->RGB SAMP: fullsize/h2v2 fancy IDCT: ifast ENT: prog huff - add_test(djpeg${suffix}-420-q100-ifast-prog - ${dir}djpeg${suffix} -dct fast - -outfile testout_420_q100_ifast.ppm testout_420_q100_ifast_prog.jpg) - add_test(djpeg${suffix}-420-q100-ifast-prog-cmp - ${MD5CMP} ${MD5_PPM_420_Q100_IFAST} testout_420_q100_ifast.ppm) + add_bittest(djpeg 420-q100-ifast-prog "-dct;fast" + testout_420_q100_ifast.ppm testout_420_q100_ifast_prog.jpg + ${MD5_PPM_420_Q100_IFAST} cjpeg-${libtype}-420-q100-ifast-prog) # CC: YCC->RGB SAMP: h2v2 merged IDCT: ifast ENT: prog huff - add_test(djpeg${suffix}-420m-q100-ifast-prog - ${dir}djpeg${suffix} -dct fast -nosmooth - -outfile testout_420m_q100_ifast.ppm testout_420_q100_ifast_prog.jpg) - add_test(djpeg${suffix}-420m-q100-ifast-prog-cmp - ${MD5CMP} ${MD5_PPM_420M_Q100_IFAST} testout_420m_q100_ifast.ppm) + add_bittest(djpeg 420m-q100-ifast-prog "-dct;fast;-nosmooth" + testout_420m_q100_ifast.ppm testout_420_q100_ifast_prog.jpg + ${MD5_PPM_420M_Q100_IFAST} cjpeg-${libtype}-420-q100-ifast-prog) # CC: RGB->Gray SAMP: fullsize FDCT: islow ENT: huff - add_test(cjpeg${suffix}-gray-islow - ${dir}cjpeg${suffix} -gray -dct int - -outfile testout_gray_islow.jpg ${TESTIMAGES}/testorig.ppm) - add_test(cjpeg${suffix}-gray-islow-cmp - ${MD5CMP} ${MD5_JPEG_GRAY_ISLOW} testout_gray_islow.jpg) + add_bittest(cjpeg gray-islow "-gray;-dct;int" + testout_gray_islow.jpg ${TESTIMAGES}/testorig.ppm + ${MD5_JPEG_GRAY_ISLOW}) # CC: Gray->Gray SAMP: fullsize IDCT: islow ENT: huff - add_test(djpeg${suffix}-gray-islow - ${dir}djpeg${suffix} -dct int - -outfile testout_gray_islow.ppm testout_gray_islow.jpg) - add_test(djpeg${suffix}-gray-islow-cmp - ${MD5CMP} ${MD5_PPM_GRAY_ISLOW} testout_gray_islow.ppm) + add_bittest(djpeg gray-islow "-dct;int" + testout_gray_islow.ppm testout_gray_islow.jpg + ${MD5_PPM_GRAY_ISLOW} cjpeg-${libtype}-gray-islow) # CC: Gray->RGB SAMP: fullsize IDCT: islow ENT: huff - add_test(djpeg${suffix}-gray-islow-rgb - ${dir}djpeg${suffix} -dct int -rgb - -outfile testout_gray_islow_rgb.ppm testout_gray_islow.jpg) - add_test(djpeg${suffix}-gray-islow-rgb-cmp - ${MD5CMP} ${MD5_PPM_GRAY_ISLOW_RGB} testout_gray_islow_rgb.ppm) + add_bittest(djpeg gray-islow-rgb "-dct;int;-rgb" + testout_gray_islow_rgb.ppm testout_gray_islow.jpg + ${MD5_PPM_GRAY_ISLOW_RGB} cjpeg-${libtype}-gray-islow) if(NOT WITH_12BIT) # CC: Gray->RGB565 SAMP: fullsize IDCT: islow ENT: huff - add_test(djpeg${suffix}-gray-islow-565 - ${dir}djpeg${suffix} -dct int -rgb565 -dither none -bmp - -outfile testout_gray_islow_565.bmp testout_gray_islow.jpg) - add_test(djpeg${suffix}-gray-islow-565-cmp - ${MD5CMP} ${MD5_BMP_GRAY_ISLOW_565} testout_gray_islow_565.bmp) + add_bittest(djpeg gray-islow-565 "-dct;int;-rgb565;-dither;none;-bmp" + testout_gray_islow_565.bmp testout_gray_islow.jpg + ${MD5_BMP_GRAY_ISLOW_565} cjpeg-${libtype}-gray-islow) # CC: Gray->RGB565 (dithered) SAMP: fullsize IDCT: islow ENT: huff - add_test(djpeg${suffix}-gray-islow-565D - ${dir}djpeg${suffix} -dct int -rgb565 -bmp - -outfile testout_gray_islow_565D.bmp testout_gray_islow.jpg) - add_test(djpeg${suffix}-gray-islow-565D-cmp - ${MD5CMP} ${MD5_BMP_GRAY_ISLOW_565D} testout_gray_islow_565D.bmp) + add_bittest(djpeg gray-islow-565D "-dct;int;-rgb565;-bmp" + testout_gray_islow_565D.bmp testout_gray_islow.jpg + ${MD5_BMP_GRAY_ISLOW_565D} cjpeg-${libtype}-gray-islow) endif() # CC: RGB->YCC SAMP: fullsize smooth/h2v2 smooth FDCT: islow # ENT: 2-pass huff - add_test(cjpeg${suffix}-420s-ifast-opt - ${dir}cjpeg${suffix} -sample 2x2 -smooth 1 -dct int -opt - -outfile testout_420s_ifast_opt.jpg ${TESTIMAGES}/testorig.ppm) - add_test(cjpeg${suffix}-420s-ifast-opt-cmp - ${MD5CMP} ${MD5_JPEG_420S_IFAST_OPT} testout_420s_ifast_opt.jpg) - - # CC: RGB->YCC SAMP: fullsize/int FDCT: float ENT: prog huff - add_test(cjpeg${suffix}-3x2-float-prog - ${dir}cjpeg${suffix} -sample 3x2 -dct float -prog - -outfile testout_3x2_float_prog.jpg ${TESTIMAGES}/testorig.ppm) - add_test(cjpeg${suffix}-3x2-float-prog-cmp - ${MD5CMP} ${MD5_JPEG_3x2_FLOAT_PROG} testout_3x2_float_prog.jpg) - - # CC: YCC->RGB SAMP: fullsize/int IDCT: float ENT: prog huff - add_test(djpeg${suffix}-3x2-float-prog - ${dir}djpeg${suffix} -dct float - -outfile testout_3x2_float.ppm testout_3x2_float_prog.jpg) - add_test(djpeg${suffix}-3x2-float-prog-cmp - ${MD5CMP} ${MD5_PPM_3x2_FLOAT} testout_3x2_float.ppm) + add_bittest(cjpeg 420s-ifast-opt "-sample;2x2;-smooth;1;-dct;int;-opt" + testout_420s_ifast_opt.jpg ${TESTIMAGES}/testorig.ppm + ${MD5_JPEG_420S_IFAST_OPT}) + + if(FLOATTEST) + # CC: RGB->YCC SAMP: fullsize/int FDCT: float ENT: prog huff + add_bittest(cjpeg 3x2-float-prog "-sample;3x2;-dct;float;-prog" + testout_3x2_float_prog.jpg ${TESTIMAGES}/testorig.ppm + ${MD5_JPEG_3x2_FLOAT_PROG_${FLOATTEST_UC}}) + + # CC: YCC->RGB SAMP: fullsize/int IDCT: float ENT: prog huff + add_bittest(djpeg 3x2-float-prog "-dct;float" + testout_3x2_float.ppm testout_3x2_float_prog.jpg + ${MD5_PPM_3x2_FLOAT_${FLOATTEST_UC}} cjpeg-${libtype}-3x2-float-prog) + endif() + + # CC: RGB->YCC SAMP: fullsize/int FDCT: ifast ENT: prog huff + add_bittest(cjpeg 3x2-ifast-prog "-sample;3x2;-dct;fast;-prog" + testout_3x2_ifast_prog.jpg ${TESTIMAGES}/testorig.ppm + ${MD5_JPEG_3x2_IFAST_PROG}) + + # CC: YCC->RGB SAMP: fullsize/int IDCT: ifast ENT: prog huff + add_bittest(djpeg 3x2-ifast-prog "-dct;fast" + testout_3x2_ifast.ppm testout_3x2_ifast_prog.jpg + ${MD5_PPM_3x2_IFAST} cjpeg-${libtype}-3x2-ifast-prog) if(WITH_ARITH_ENC) # CC: YCC->RGB SAMP: fullsize/h2v2 FDCT: islow ENT: arith - add_test(cjpeg${suffix}-420-islow-ari - ${dir}cjpeg${suffix} -dct int -arithmetic - -outfile testout_420_islow_ari.jpg ${TESTIMAGES}/testorig.ppm) - add_test(cjpeg${suffix}-420-islow-ari-cmp - ${MD5CMP} ${MD5_JPEG_420_ISLOW_ARI} testout_420_islow_ari.jpg) - - add_test(jpegtran${suffix}-420-islow-ari - ${dir}jpegtran${suffix} -arithmetic - -outfile testout_420_islow_ari.jpg ${TESTIMAGES}/testimgint.jpg) - add_test(jpegtran${suffix}-420-islow-ari-cmp - ${MD5CMP} ${MD5_JPEG_420_ISLOW_ARI} testout_420_islow_ari.jpg) + add_bittest(cjpeg 420-islow-ari "-dct;int;-arithmetic" + testout_420_islow_ari.jpg ${TESTIMAGES}/testorig.ppm + ${MD5_JPEG_420_ISLOW_ARI}) + + add_bittest(jpegtran 420-islow-ari "-arithmetic" + testout_420_islow_ari2.jpg ${TESTIMAGES}/testimgint.jpg + ${MD5_JPEG_420_ISLOW_ARI}) # CC: YCC->RGB SAMP: fullsize FDCT: islow ENT: prog arith - add_test(cjpeg${suffix}-444-islow-progari - ${dir}cjpeg${suffix} -sample 1x1 -dct int -prog -arithmetic - -outfile testout_444_islow_progari.jpg ${TESTIMAGES}/testorig.ppm) - add_test(cjpeg${suffix}-444-islow-progari-cmp - ${MD5CMP} ${MD5_JPEG_444_ISLOW_PROGARI} testout_444_islow_progari.jpg) + add_bittest(cjpeg 444-islow-progari + "-sample;1x1;-dct;int;-prog;-arithmetic" + testout_444_islow_progari.jpg ${TESTIMAGES}/testorig.ppm + ${MD5_JPEG_444_ISLOW_PROGARI}) endif() if(WITH_ARITH_DEC) # CC: RGB->YCC SAMP: h2v2 merged IDCT: ifast ENT: arith - add_test(djpeg${suffix}-420m-ifast-ari - ${dir}djpeg${suffix} -fast -ppm - -outfile testout_420m_ifast_ari.ppm ${TESTIMAGES}/testimgari.jpg) - add_test(djpeg${suffix}-420m-ifast-ari-cmp - ${MD5CMP} ${MD5_PPM_420M_IFAST_ARI} testout_420m_ifast_ari.ppm) + add_bittest(djpeg 420m-ifast-ari "-fast;-ppm" + testout_420m_ifast_ari.ppm ${TESTIMAGES}/testimgari.jpg + ${MD5_PPM_420M_IFAST_ARI}) - add_test(jpegtran${suffix}-420-islow - ${dir}jpegtran${suffix} - -outfile testout_420_islow.jpg ${TESTIMAGES}/testimgari.jpg) - add_test(jpegtran${suffix}-420-islow-cmp - ${MD5CMP} ${MD5_JPEG_420_ISLOW} testout_420_islow.jpg) + add_bittest(jpegtran 420-islow "" + testout_420_islow.jpg ${TESTIMAGES}/testimgari.jpg + ${MD5_JPEG_420_ISLOW}) endif() # 2/1-- CC: YCC->RGB SAMP: h2v2 merged IDCT: 16x16 islow ENT: huff @@ -732,231 +1163,231 @@ foreach(libtype ${TEST_LIBTYPES}) # ENT: huff foreach(scale 2_1 15_8 13_8 11_8 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8) string(REGEX REPLACE "_" "/" scalearg ${scale}) - add_test(djpeg${suffix}-420m-islow-${scale} - ${dir}djpeg${suffix} -dct int -scale ${scalearg} -nosmooth -ppm - -outfile testout_420m_islow_${scale}.ppm ${TESTIMAGES}/${TESTORIG}) - add_test(djpeg${suffix}-420m-islow-${scale}-cmp - ${MD5CMP} ${MD5_PPM_420M_ISLOW_${scale}} testout_420m_islow_${scale}.ppm) + add_bittest(djpeg 420m-islow-${scale} + "-dct;int;-scale;${scalearg};-nosmooth;-ppm" + testout_420m_islow_${scale}.ppm ${TESTIMAGES}/${TESTORIG} + ${MD5_PPM_420M_ISLOW_${scale}}) endforeach() if(NOT WITH_12BIT) # CC: YCC->RGB (dithered) SAMP: h2v2 fancy IDCT: islow ENT: huff - add_test(djpeg${suffix}-420-islow-256 - ${dir}djpeg${suffix} -dct int -colors 256 -bmp - -outfile testout_420_islow_256.bmp ${TESTIMAGES}/${TESTORIG}) - add_test(djpeg${suffix}-420-islow-256-cmp - ${MD5CMP} ${MD5_BMP_420_ISLOW_256} testout_420_islow_256.bmp) + add_bittest(djpeg 420-islow-256 "-dct;int;-colors;256;-bmp" + testout_420_islow_256.bmp ${TESTIMAGES}/${TESTORIG} + ${MD5_BMP_420_ISLOW_256}) # CC: YCC->RGB565 SAMP: h2v2 fancy IDCT: islow ENT: huff - add_test(djpeg${suffix}-420-islow-565 - ${dir}djpeg${suffix} -dct int -rgb565 -dither none -bmp - -outfile testout_420_islow_565.bmp ${TESTIMAGES}/${TESTORIG}) - add_test(djpeg${suffix}-420-islow-565-cmp - ${MD5CMP} ${MD5_BMP_420_ISLOW_565} testout_420_islow_565.bmp) + add_bittest(djpeg 420-islow-565 "-dct;int;-rgb565;-dither;none;-bmp" + testout_420_islow_565.bmp ${TESTIMAGES}/${TESTORIG} + ${MD5_BMP_420_ISLOW_565}) # CC: YCC->RGB565 (dithered) SAMP: h2v2 fancy IDCT: islow ENT: huff - add_test(djpeg${suffix}-420-islow-565D - ${dir}djpeg${suffix} -dct int -rgb565 -bmp - -outfile testout_420_islow_565D.bmp ${TESTIMAGES}/${TESTORIG}) - add_test(djpeg${suffix}-420-islow-565D-cmp - ${MD5CMP} ${MD5_BMP_420_ISLOW_565D} testout_420_islow_565D.bmp) + add_bittest(djpeg 420-islow-565D "-dct;int;-rgb565;-bmp" + testout_420_islow_565D.bmp ${TESTIMAGES}/${TESTORIG} + ${MD5_BMP_420_ISLOW_565D}) # CC: YCC->RGB565 SAMP: h2v2 merged IDCT: islow ENT: huff - add_test(djpeg${suffix}-420m-islow-565 - ${dir}djpeg${suffix} -dct int -nosmooth -rgb565 -dither none -bmp - -outfile testout_420m_islow_565.bmp ${TESTIMAGES}/${TESTORIG}) - add_test(djpeg${suffix}-420m-islow-565-cmp - ${MD5CMP} ${MD5_BMP_420M_ISLOW_565} testout_420m_islow_565.bmp) + add_bittest(djpeg 420m-islow-565 + "-dct;int;-nosmooth;-rgb565;-dither;none;-bmp" + testout_420m_islow_565.bmp ${TESTIMAGES}/${TESTORIG} + ${MD5_BMP_420M_ISLOW_565}) # CC: YCC->RGB565 (dithered) SAMP: h2v2 merged IDCT: islow ENT: huff - add_test(djpeg${suffix}-420m-islow-565D - ${dir}djpeg${suffix} -dct int -nosmooth -rgb565 -bmp - -outfile testout_420m_islow_565D.bmp ${TESTIMAGES}/${TESTORIG}) - add_test(djpeg${suffix}-420m-islow-565D-cmp - ${MD5CMP} ${MD5_BMP_420M_ISLOW_565D} testout_420m_islow_565D.bmp) + add_bittest(djpeg 420m-islow-565D "-dct;int;-nosmooth;-rgb565;-bmp" + testout_420m_islow_565D.bmp ${TESTIMAGES}/${TESTORIG} + ${MD5_BMP_420M_ISLOW_565D}) endif() # Partial decode tests. These tests are designed to cover all of the # possible code paths in jpeg_skip_scanlines(). # Context rows: Yes Intra-iMCU row: Yes iMCU row prefetch: No ENT: huff - add_test(djpeg${suffix}-420-islow-skip15_31 - ${dir}djpeg${suffix} -dct int -skip 15,31 -ppm - -outfile testout_420_islow_skip15,31.ppm ${TESTIMAGES}/${TESTORIG}) - add_test(djpeg${suffix}-420-islow-skip15_31-cmp - ${MD5CMP} ${MD5_PPM_420_ISLOW_SKIP15_31} testout_420_islow_skip15,31.ppm) + add_bittest(djpeg 420-islow-skip15_31 "-dct;int;-skip;15,31;-ppm" + testout_420_islow_skip15,31.ppm ${TESTIMAGES}/${TESTORIG} + ${MD5_PPM_420_ISLOW_SKIP15_31}) # Context rows: Yes Intra-iMCU row: No iMCU row prefetch: Yes ENT: arith if(WITH_ARITH_DEC) - add_test(djpeg${suffix}-420-islow-ari-skip16_139 - ${dir}djpeg${suffix} -dct int -skip 16,139 -ppm - -outfile testout_420_islow_ari_skip16,139.ppm - ${TESTIMAGES}/testimgari.jpg) - add_test(djpeg${suffix}-420-islow-ari_skip16_139-cmp - ${MD5CMP} ${MD5_PPM_420_ISLOW_ARI_SKIP16_139} - testout_420_islow_ari_skip16,139.ppm) + add_bittest(djpeg 420-islow-ari-skip16_139 "-dct;int;-skip;16,139;-ppm" + testout_420_islow_ari_skip16,139.ppm ${TESTIMAGES}/testimgari.jpg + ${MD5_PPM_420_ISLOW_ARI_SKIP16_139}) endif() # Context rows: Yes Intra-iMCU row: No iMCU row prefetch: No ENT: prog huff - add_test(cjpeg${suffix}-420-islow-prog - ${dir}cjpeg${suffix} -dct int -prog + add_test(cjpeg-${libtype}-420-islow-prog + cjpeg${suffix} -dct int -prog -outfile testout_420_islow_prog.jpg ${TESTIMAGES}/testorig.ppm) - add_test(djpeg${suffix}-420-islow-prog-crop62x62_71_71 - ${dir}djpeg${suffix} -dct int -crop 62x62+71+71 -ppm - -outfile testout_420_islow_prog_crop62x62,71,71.ppm - testout_420_islow_prog.jpg) - add_test(djpeg${suffix}-420-islow-prog-crop62x62_71_71-cmp - ${MD5CMP} ${MD5_PPM_420_ISLOW_PROG_CROP62x62_71_71} - testout_420_islow_prog_crop62x62,71,71.ppm) + add_bittest(djpeg 420-islow-prog-crop62x62_71_71 + "-dct;int;-crop;62x62+71+71;-ppm" + testout_420_islow_prog_crop62x62,71,71.ppm testout_420_islow_prog.jpg + ${MD5_PPM_420_ISLOW_PROG_CROP62x62_71_71} cjpeg-${libtype}-420-islow-prog) # Context rows: Yes Intra-iMCU row: No iMCU row prefetch: No ENT: arith if(WITH_ARITH_DEC) - add_test(djpeg${suffix}-420-islow-ari-crop53x53_4_4 - ${dir}djpeg${suffix} -dct int -crop 53x53+4+4 -ppm - -outfile testout_420_islow_ari_crop53x53,4,4.ppm - ${TESTIMAGES}/testimgari.jpg) - add_test(djpeg${suffix}-420-islow-ari-crop53x53_4_4-cmp - ${MD5CMP} ${MD5_PPM_420_ISLOW_ARI_CROP53x53_4_4} - testout_420_islow_ari_crop53x53,4,4.ppm) + add_bittest(djpeg 420-islow-ari-crop53x53_4_4 + "-dct;int;-crop;53x53+4+4;-ppm" + testout_420_islow_ari_crop53x53,4,4.ppm ${TESTIMAGES}/testimgari.jpg + ${MD5_PPM_420_ISLOW_ARI_CROP53x53_4_4}) endif() # Context rows: No Intra-iMCU row: Yes ENT: huff - add_test(cjpeg${suffix}-444-islow - ${dir}cjpeg${suffix} -dct int -sample 1x1 + add_test(cjpeg-${libtype}-444-islow + cjpeg${suffix} -dct int -sample 1x1 -outfile testout_444_islow.jpg ${TESTIMAGES}/testorig.ppm) - add_test(djpeg${suffix}-444-islow-skip1_6 - ${dir}djpeg${suffix} -dct int -skip 1,6 -ppm - -outfile testout_444_islow_skip1,6.ppm testout_444_islow.jpg) - add_test(djpeg${suffix}-444-islow-skip1_6-cmp - ${MD5CMP} ${MD5_PPM_444_ISLOW_SKIP1_6} testout_444_islow_skip1,6.ppm) + add_bittest(djpeg 444-islow-skip1_6 "-dct;int;-skip;1,6;-ppm" + testout_444_islow_skip1,6.ppm testout_444_islow.jpg + ${MD5_PPM_444_ISLOW_SKIP1_6} cjpeg-${libtype}-444-islow) # Context rows: No Intra-iMCU row: No ENT: prog huff - add_test(cjpeg${suffix}-444-islow-prog - ${dir}cjpeg${suffix} -dct int -prog -sample 1x1 + add_test(cjpeg-${libtype}-444-islow-prog + cjpeg${suffix} -dct int -prog -sample 1x1 -outfile testout_444_islow_prog.jpg ${TESTIMAGES}/testorig.ppm) - add_test(djpeg${suffix}-444-islow-prog-crop98x98_13_13 - ${dir}djpeg${suffix} -dct int -crop 98x98+13+13 -ppm - -outfile testout_444_islow_prog_crop98x98,13,13.ppm - testout_444_islow_prog.jpg) - add_test(djpeg${suffix}-444-islow-prog_crop98x98_13_13-cmp - ${MD5CMP} ${MD5_PPM_444_ISLOW_PROG_CROP98x98_13_13} - testout_444_islow_prog_crop98x98,13,13.ppm) + add_bittest(djpeg 444-islow-prog-crop98x98_13_13 + "-dct;int;-crop;98x98+13+13;-ppm" + testout_444_islow_prog_crop98x98,13,13.ppm testout_444_islow_prog.jpg + ${MD5_PPM_444_ISLOW_PROG_CROP98x98_13_13} cjpeg-${libtype}-444-islow-prog) # Context rows: No Intra-iMCU row: No ENT: arith if(WITH_ARITH_ENC) - add_test(cjpeg${suffix}-444-islow-ari - ${dir}cjpeg${suffix} -dct int -arithmetic -sample 1x1 + add_test(cjpeg-${libtype}-444-islow-ari + cjpeg${suffix} -dct int -arithmetic -sample 1x1 -outfile testout_444_islow_ari.jpg ${TESTIMAGES}/testorig.ppm) if(WITH_ARITH_DEC) - add_test(djpeg${suffix}-444-islow-ari-crop37x37_0_0 - ${dir}djpeg${suffix} -dct int -crop 37x37+0+0 -ppm - -outfile testout_444_islow_ari_crop37x37,0,0.ppm - testout_444_islow_ari.jpg) - add_test(djpeg${suffix}-444-islow-ari-crop37x37_0_0-cmp - ${MD5CMP} ${MD5_PPM_444_ISLOW_ARI_CROP37x37_0_0} - testout_444_islow_ari_crop37x37,0,0.ppm) + add_bittest(djpeg 444-islow-ari-crop37x37_0_0 + "-dct;int;-crop;37x37+0+0;-ppm" + testout_444_islow_ari_crop37x37,0,0.ppm testout_444_islow_ari.jpg + ${MD5_PPM_444_ISLOW_ARI_CROP37x37_0_0} cjpeg-${libtype}-444-islow-ari) endif() endif() - add_test(jpegtran${suffix}-crop - ${dir}jpegtran${suffix} -crop 120x90+20+50 -transpose -perfect - -outfile testout_crop.jpg ${TESTIMAGES}/${TESTORIG}) - add_test(jpegtran${suffix}-crop-cmp - ${MD5CMP} ${MD5_JPEG_CROP} testout_crop.jpg) + add_bittest(jpegtran crop "-crop;120x90+20+50;-transpose;-perfect" + testout_crop.jpg ${TESTIMAGES}/${TESTORIG} + ${MD5_JPEG_CROP}) endforeach() add_custom_target(testclean COMMAND ${CMAKE_COMMAND} -P - ${CMAKE_SOURCE_DIR}/cmakescripts/testclean.cmake) - - -# -# Installer -# - -if(MSVC) - set(INST_PLATFORM "Visual C++") - set(INST_NAME ${CMAKE_PROJECT_NAME}-${VERSION}-vc) - set(INST_REG_NAME ${CMAKE_PROJECT_NAME}) -elseif(MINGW) - set(INST_PLATFORM GCC) - set(INST_NAME ${CMAKE_PROJECT_NAME}-${VERSION}-gcc) - set(INST_REG_NAME ${CMAKE_PROJECT_NAME}-gcc) - set(INST_DEFS -DGCC) -endif() - -if(64BIT) - set(INST_PLATFORM "${INST_PLATFORM} 64-bit") - set(INST_NAME ${INST_NAME}64) - set(INST_REG_NAME ${INST_DIR}64) - set(INST_DEFS ${INST_DEFS} -DWIN64) -endif() - -if(WITH_JAVA) - set(INST_DEFS ${INST_DEFS} -DJAVA) -endif() + ${CMAKE_CURRENT_SOURCE_DIR}/cmakescripts/testclean.cmake) -if(MSVC_IDE) - set(INST_DEFS ${INST_DEFS} "-DBUILDDIR=${CMAKE_CFG_INTDIR}\\") -else() - set(INST_DEFS ${INST_DEFS} "-DBUILDDIR=") +if(WITH_TURBOJPEG) + configure_file(tjbenchtest.in tjbenchtest @ONLY) + configure_file(tjexampletest.in tjexampletest @ONLY) + if(WIN32) + set(BASH bash) + endif() + if(WITH_JAVA) + configure_file(tjbenchtest.java.in tjbenchtest.java @ONLY) + configure_file(tjexampletest.java.in tjexampletest.java @ONLY) + add_custom_target(tjtest + COMMAND echo tjbenchtest + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest + COMMAND echo tjbenchtest -alloc + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -alloc + COMMAND echo tjbenchtest -yuv + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -yuv + COMMAND echo tjbenchtest -yuv -alloc + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -yuv -alloc + COMMAND echo tjbenchtest -progressive + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -progressive + COMMAND echo tjexampletest + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjexampletest + COMMAND echo tjbenchtest.java + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest.java + COMMAND echo tjbenchtest.java -yuv + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest.java -yuv + COMMAND echo tjbenchtest.java -progressive + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest.java -progressive + COMMAND echo tjexampletest.java + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjexampletest.java + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest + ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest.java + ${CMAKE_CURRENT_BINARY_DIR}/tjexampletest) + else() + add_custom_target(tjtest + COMMAND echo tjbenchtest + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest + COMMAND echo tjbenchtest -alloc + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -alloc + COMMAND echo tjbenchtest -yuv + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -yuv + COMMAND echo tjbenchtest -yuv -alloc + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest -yuv -alloc + COMMAND echo tjexampletest + COMMAND ${BASH} ${CMAKE_CURRENT_BINARY_DIR}/tjexampletest + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/tjbenchtest) + endif() endif() -STRING(REGEX REPLACE "/" "\\\\" INST_DIR ${CMAKE_INSTALL_PREFIX}) -configure_file(release/libjpeg-turbo.nsi.in libjpeg-turbo.nsi @ONLY) +############################################################################### +# INSTALLATION +############################################################################### -if(WITH_JAVA) - set(JAVA_DEPEND java) -endif() -add_custom_target(installer - makensis -nocd ${INST_DEFS} libjpeg-turbo.nsi - DEPENDS jpeg jpeg-static turbojpeg turbojpeg-static rdjpgcom wrjpgcom - cjpeg djpeg jpegtran tjbench ${JAVA_DEPEND} - SOURCES libjpeg-turbo.nsi) +set(EXE ${CMAKE_EXECUTABLE_SUFFIX}) if(WITH_TURBOJPEG) if(ENABLE_SHARED) install(TARGETS turbojpeg tjbench - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib - RUNTIME DESTINATION bin) + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) endif() if(ENABLE_STATIC) - install(TARGETS turbojpeg-static ARCHIVE DESTINATION lib) + install(TARGETS turbojpeg-static ARCHIVE + DESTINATION ${CMAKE_INSTALL_LIBDIR}) if(NOT ENABLE_SHARED) - install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/tjbench-static.exe - DESTINATION bin RENAME tjbench.exe) + install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/tjbench-static${EXE} + DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME tjbench${EXE}) endif() endif() - install(FILES ${CMAKE_SOURCE_DIR}/turbojpeg.h DESTINATION include) + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/turbojpeg.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) endif() if(ENABLE_STATIC) - install(TARGETS jpeg-static ARCHIVE DESTINATION lib) + install(TARGETS jpeg-static ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) if(NOT ENABLE_SHARED) - install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/cjpeg-static.exe - DESTINATION bin RENAME cjpeg.exe) - install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/djpeg-static.exe - DESTINATION bin RENAME djpeg.exe) - install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/jpegtran-static.exe - DESTINATION bin RENAME jpegtran.exe) + install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/cjpeg-static${EXE} + DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME cjpeg${EXE}) + install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/djpeg-static${EXE} + DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME djpeg${EXE}) + install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/jpegtran-static${EXE} + DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME jpegtran${EXE}) endif() endif() -install(TARGETS rdjpgcom wrjpgcom RUNTIME DESTINATION bin) +install(TARGETS rdjpgcom wrjpgcom RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) + +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/README.ijg + ${CMAKE_CURRENT_SOURCE_DIR}/README.md ${CMAKE_CURRENT_SOURCE_DIR}/example.txt + ${CMAKE_CURRENT_SOURCE_DIR}/tjexample.c + ${CMAKE_CURRENT_SOURCE_DIR}/libjpeg.txt + ${CMAKE_CURRENT_SOURCE_DIR}/structure.txt + ${CMAKE_CURRENT_SOURCE_DIR}/usage.txt ${CMAKE_CURRENT_SOURCE_DIR}/wizard.txt + ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md DESTINATION ${CMAKE_INSTALL_DOCDIR}) +if(WITH_JAVA) + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/java/TJExample.java + DESTINATION ${CMAKE_INSTALL_DOCDIR}) +endif() + +if(UNIX OR MINGW) + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/cjpeg.1 + ${CMAKE_CURRENT_SOURCE_DIR}/djpeg.1 ${CMAKE_CURRENT_SOURCE_DIR}/jpegtran.1 + ${CMAKE_CURRENT_SOURCE_DIR}/rdjpgcom.1 + ${CMAKE_CURRENT_SOURCE_DIR}/wrjpgcom.1 + DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) +endif() +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/pkgscripts/libjpeg.pc + ${CMAKE_CURRENT_BINARY_DIR}/pkgscripts/libturbojpeg.pc + DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) -install(FILES ${CMAKE_SOURCE_DIR}/README.ijg ${CMAKE_SOURCE_DIR}/README.md - ${CMAKE_SOURCE_DIR}/example.c ${CMAKE_SOURCE_DIR}/libjpeg.txt - ${CMAKE_SOURCE_DIR}/structure.txt ${CMAKE_SOURCE_DIR}/usage.txt - ${CMAKE_SOURCE_DIR}/wizard.txt - DESTINATION doc) +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/jconfig.h + ${CMAKE_CURRENT_SOURCE_DIR}/jerror.h ${CMAKE_CURRENT_SOURCE_DIR}/jmorecfg.h + ${CMAKE_CURRENT_SOURCE_DIR}/jpeglib.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) -install(FILES ${CMAKE_BINARY_DIR}/jconfig.h ${CMAKE_SOURCE_DIR}/jerror.h - ${CMAKE_SOURCE_DIR}/jmorecfg.h ${CMAKE_SOURCE_DIR}/jpeglib.h - DESTINATION include) +include(cmakescripts/BuildPackages.cmake) -configure_file("${CMAKE_SOURCE_DIR}/cmakescripts/cmake_uninstall.cmake.in" +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmakescripts/cmake_uninstall.cmake.in" "cmake_uninstall.cmake" IMMEDIATE @ONLY) add_custom_target(uninstall COMMAND ${CMAKE_COMMAND} -P cmake_uninstall.cmake) diff --git a/ChangeLog.md b/ChangeLog.md index f5fe44b..8c529ec 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,3 +1,225 @@ +2.0.1 +===== + +### Significant changes relative to 2.0.0: + +1. Fixed a regression introduced with the new CMake-based Un*x build system, +whereby jconfig.h could cause compiler warnings of the form +`"HAVE_*_H" redefined` if it was included by downstream Autotools-based +projects that used `AC_CHECK_HEADERS()` to check for the existence of locale.h, +stddef.h, or stdlib.h. + +2. The `jsimd_quantize_float_dspr2()` and `jsimd_convsamp_float_dspr2()` +functions in the MIPS DSPr2 SIMD extensions are now disabled at compile time +if the soft float ABI is enabled. Those functions use instructions that are +incompatible with the soft float ABI. + +3. Fixed a regression in the SIMD feature detection code, introduced by +the AVX2 SIMD extensions (2.0 beta1[1]), that caused libjpeg-turbo to crash on +Windows 7 if Service Pack 1 was not installed. + +4. Fixed out-of-bounds read in cjpeg that occurred when attempting to compress +a specially-crafted malformed color-index (8-bit-per-sample) Targa file in +which some of the samples (color indices) exceeded the bounds of the Targa +file's color table. + +5. Fixed an issue whereby installing a fully static build of libjpeg-turbo +(a build in which `CFLAGS` contains `-static` and `ENABLE_SHARED` is `0`) would +fail with "No valid ELF RPATH or RUNPATH entry exists in the file." + + +2.0.0 +===== + +### Significant changes relative to 2.0 beta1: + +1. The TurboJPEG API can now decompress CMYK JPEG images that have subsampled M +and Y components (not to be confused with YCCK JPEG images, in which the C/M/Y +components have been transformed into luma and chroma.) Previously, an error +was generated ("Could not determine subsampling type for JPEG image") when such +an image was passed to `tjDecompressHeader3()`, `tjTransform()`, +`tjDecompressToYUVPlanes()`, `tjDecompressToYUV2()`, or the equivalent Java +methods. + +2. Fixed an issue (CVE-2018-11813) whereby a specially-crafted malformed input +file (specifically, a file with a valid Targa header but incomplete pixel data) +would cause cjpeg to generate a JPEG file that was potentially thousands of +times larger than the input file. The Targa reader in cjpeg was not properly +detecting that the end of the input file had been reached prematurely, so after +all valid pixels had been read from the input, the reader injected dummy pixels +with values of 255 into the JPEG compressor until the number of pixels +specified in the Targa header had been compressed. The Targa reader in cjpeg +now behaves like the PPM reader and aborts compression if the end of the input +file is reached prematurely. Because this issue only affected cjpeg and not +the underlying library, and because it did not involve any out-of-bounds reads +or other exploitable behaviors, it was not believed to represent a security +threat. + +3. Fixed an issue whereby the `tjLoadImage()` and `tjSaveImage()` functions +would produce a "Bogus message code" error message if the underlying bitmap and +PPM readers/writers threw an error that was specific to the readers/writers +(as opposed to a general libjpeg API error.) + +4. Fixed an issue whereby a specially-crafted malformed BMP file, one in which +the header specified an image width of 1073741824 pixels, would trigger a +floating point exception (division by zero) in the `tjLoadImage()` function +when attempting to load the BMP file into a 4-component image buffer. + +5. Fixed an issue whereby certain combinations of calls to +`jpeg_skip_scanlines()` and `jpeg_read_scanlines()` could trigger an infinite +loop when decompressing progressive JPEG images that use vertical chroma +subsampling (for instance, 4:2:0 or 4:4:0.) + +6. Fixed a segfault in `jpeg_skip_scanlines()` that occurred when decompressing +a 4:2:2 or 4:2:0 JPEG image using the merged (non-fancy) upsampling algorithms +(that is, when setting `cinfo.do_fancy_upsampling` to `FALSE`.) + +7. The new CMake-based build system will now disable the MIPS DSPr2 SIMD +extensions if it detects that the compiler does not support DSPr2 instructions. + +8. Fixed out-of-bounds read in cjpeg that occurred when attempting to compress +a specially-crafted malformed color-index (8-bit-per-sample) BMP file in which +some of the samples (color indices) exceeded the bounds of the BMP file's color +table. + +9. Fixed a signed integer overflow in the progressive Huffman decoder, detected +by the Clang and GCC undefined behavior sanitizers, that could be triggered by +attempting to decompress a specially-crafted malformed JPEG image. This issue +did not pose a security threat, but removing the warning made it easier to +detect actual security issues, should they arise in the future. + + +1.5.90 (2.0 beta1) +================== + +### Significant changes relative to 1.5.3: + +1. Added AVX2 SIMD implementations of the colorspace conversion, chroma +downsampling and upsampling, integer quantization and sample conversion, and +slow integer DCT/IDCT algorithms. When using the slow integer DCT/IDCT +algorithms on AVX2-equipped CPUs, the compression of RGB images is +approximately 13-36% (avg. 22%) faster (relative to libjpeg-turbo 1.5.x) with +64-bit code and 11-21% (avg. 17%) faster with 32-bit code, and the +decompression of RGB images is approximately 9-35% (avg. 17%) faster with +64-bit code and 7-17% (avg. 12%) faster with 32-bit code. (As tested on a +3 GHz Intel Core i7. Actual mileage may vary.) + +2. Overhauled the build system to use CMake on all platforms, and removed the +autotools-based build system. This decision resulted from extensive +discussions within the libjpeg-turbo community. libjpeg-turbo traditionally +used CMake only for Windows builds, but there was an increasing amount of +demand to extend CMake support to other platforms. However, because of the +unique nature of our code base (the need to support different assemblers on +each platform, the need for Java support, etc.), providing dual build systems +as other OSS imaging libraries do (including libpng and libtiff) would have +created a maintenance burden. The use of CMake greatly simplifies some aspects +of our build system, owing to CMake's built-in support for various assemblers, +Java, and unit testing, as well as generally fewer quirks that have to be +worked around in order to implement our packaging system. Eliminating +autotools puts our project slightly at odds with the traditional practices of +the OSS community, since most "system libraries" tend to be built with +autotools, but it is believed that the benefits of this move outweigh the +risks. In addition to providing a unified build environment, switching to +CMake allows for the use of various build tools and IDEs that aren't supported +under autotools, including XCode, Ninja, and Eclipse. It also eliminates the +need to install autotools via MacPorts/Homebrew on OS X and allows +libjpeg-turbo to be configured without the use of a terminal/command prompt. +Extensive testing was conducted to ensure that all features provided by the +autotools-based build system are provided by the new build system. + +3. The libjpeg API in this version of libjpeg-turbo now includes two additional +functions, `jpeg_read_icc_profile()` and `jpeg_write_icc_profile()`, that can +be used to extract ICC profile data from a JPEG file while decompressing or to +embed ICC profile data in a JPEG file while compressing or transforming. This +eliminates the need for downstream projects, such as color management libraries +and browsers, to include their own glueware for accomplishing this. + +4. Improved error handling in the TurboJPEG API library: + + - Introduced a new function (`tjGetErrorStr2()`) in the TurboJPEG C API +that allows compression/decompression/transform error messages to be retrieved +in a thread-safe manner. Retrieving error messages from global functions, such +as `tjInitCompress()` or `tjBufSize()`, is still thread-unsafe, but since those +functions will only throw errors if passed an invalid argument or if a memory +allocation failure occurs, thread safety is not as much of a concern. + - Introduced a new function (`tjGetErrorCode()`) in the TurboJPEG C API +and a new method (`TJException.getErrorCode()`) in the TurboJPEG Java API that +can be used to determine the severity of the last +compression/decompression/transform error. This allows applications to +choose whether to ignore warnings (non-fatal errors) from the underlying +libjpeg API or to treat them as fatal. + - Introduced a new flag (`TJFLAG_STOPONWARNING` in the TurboJPEG C API and +`TJ.FLAG_STOPONWARNING` in the TurboJPEG Java API) that causes the library to +immediately halt a compression/decompression/transform operation if it +encounters a warning from the underlying libjpeg API (the default behavior is +to allow the operation to complete unless a fatal error is encountered.) + +5. Introduced a new flag in the TurboJPEG C and Java APIs (`TJFLAG_PROGRESSIVE` +and `TJ.FLAG_PROGRESSIVE`, respectively) that causes the library to use +progressive entropy coding in JPEG images generated by compression and +transform operations. Additionally, a new transform option +(`TJXOPT_PROGRESSIVE` in the C API and `TJTransform.OPT_PROGRESSIVE` in the +Java API) has been introduced, allowing progressive entropy coding to be +enabled for selected transforms in a multi-transform operation. + +6. Introduced a new transform option in the TurboJPEG API (`TJXOPT_COPYNONE` in +the C API and `TJTransform.OPT_COPYNONE` in the Java API) that allows the +copying of markers (including EXIF and ICC profile data) to be disabled for a +particular transform. + +7. Added two functions to the TurboJPEG C API (`tjLoadImage()` and +`tjSaveImage()`) that can be used to load/save a BMP or PPM/PGM image to/from a +memory buffer with a specified pixel format and layout. These functions +replace the project-private (and slow) bmp API, which was previously used by +TJBench, and they also provide a convenient way for first-time users of +libjpeg-turbo to quickly develop a complete JPEG compression/decompression +program. + +8. The TurboJPEG C API now includes a new convenience array (`tjAlphaOffset[]`) +that contains the alpha component index for each pixel format (or -1 if the +pixel format lacks an alpha component.) The TurboJPEG Java API now includes a +new method (`TJ.getAlphaOffset()`) that returns the same value. In addition, +the `tjRedOffset[]`, `tjGreenOffset[]`, and `tjBlueOffset[]` arrays-- and the +corresponding `TJ.getRedOffset()`, `TJ.getGreenOffset()`, and +`TJ.getBlueOffset()` methods-- now return -1 for `TJPF_GRAY`/`TJ.PF_GRAY` +rather than 0. This allows programs to easily determine whether a pixel format +has red, green, blue, and alpha components. + +9. Added a new example (tjexample.c) that demonstrates the basic usage of the +TurboJPEG C API. This example mirrors the functionality of TJExample.java. +Both files are now included in the libjpeg-turbo documentation. + +10. Fixed two signed integer overflows in the arithmetic decoder, detected by +the Clang undefined behavior sanitizer, that could be triggered by attempting +to decompress a specially-crafted malformed JPEG image. These issues did not +pose a security threat, but removing the warnings makes it easier to detect +actual security issues, should they arise in the future. + +11. Fixed a bug in the merged 4:2:0 upsampling/dithered RGB565 color conversion +algorithm that caused incorrect dithering in the output image. This algorithm +now produces bitwise-identical results to the unmerged algorithms. + +12. The SIMD function symbols for x86[-64]/ELF, MIPS/ELF, macOS/x86[-64] (if +libjpeg-turbo is built with YASM), and iOS/ARM[64] builds are now private. +This prevents those symbols from being exposed in applications or shared +libraries that link statically with libjpeg-turbo. + +13. Added Loongson MMI SIMD implementations of the RGB-to-YCbCr and +YCbCr-to-RGB colorspace conversion, 4:2:0 chroma downsampling, 4:2:0 fancy +chroma upsampling, integer quantization, and slow integer DCT/IDCT algorithms. +When using the slow integer DCT/IDCT, this speeds up the compression of RGB +images by approximately 70-100% and the decompression of RGB images by +approximately 2-3.5x. + +14. Fixed a build error when building with older MinGW releases (regression +caused by 1.5.1[7].) + +15. Added SIMD acceleration for progressive Huffman encoding on SSE2-capable +x86 and x86-64 platforms. This speeds up the compression of full-color +progressive JPEGs by about 85-90% on average (relative to libjpeg-turbo 1.5.x) +when using modern Intel and AMD CPUs. + + 1.5.3 ===== @@ -174,8 +396,8 @@ specified.) 2x2 luminance sampling factors and 2x1 or 1x2 chrominance sampling factors. This is a non-standard way of specifying 2x subsampling (normally 4:2:2 JPEGs have 2x1 luminance and 1x1 chrominance sampling factors, and 4:4:0 JPEGs have -1x2 luminance and 1x1 chrominance sampling factors), but the JPEG specification -and the libjpeg API both allow it. +1x2 luminance and 1x1 chrominance sampling factors), but the JPEG format and +the libjpeg API both allow it. 7. Fixed an unsigned integer overflow in the libjpeg memory manager, detected by the Clang undefined behavior sanitizer, that could be triggered by diff --git a/LICENSE.md b/LICENSE.md index 0572390..0f6ec4b 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -11,7 +11,8 @@ libjpeg-turbo is covered by three compatible BSD-style open source licenses: - The Modified (3-clause) BSD License, which is listed below - This license covers the TurboJPEG API library and associated programs. + This license covers the TurboJPEG API library and associated programs, as + well as the build system. - The zlib License, which is listed below @@ -137,3 +138,16 @@ freely, subject to the following restrictions: 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. + + +Why Three Licenses? +=================== + +The zlib License could have been used instead of the Modified (3-clause) BSD +License, and since the IJG License effectively subsumes the distribution +conditions of the zlib License, this would have effectively placed +libjpeg-turbo binary distributions under the IJG License. However, the IJG +License specifically refers to the Independent JPEG Group and does not extend +attribution and endorsement protections to other entities. Thus, it was +desirable to choose a license that granted us the same protections for new code +that were granted to the IJG for code derived from their software. diff --git a/Makefile.am b/Makefile.am deleted file mode 100644 index 8043f09..0000000 --- a/Makefile.am +++ /dev/null @@ -1,794 +0,0 @@ -lib_LTLIBRARIES = libjpeg.la -libjpeg_la_LDFLAGS = -version-info ${LIBTOOL_CURRENT}:${SO_MINOR_VERSION}:${SO_AGE} -no-undefined -include_HEADERS = jerror.h jmorecfg.h jpeglib.h - -if WITH_TURBOJPEG -lib_LTLIBRARIES += libturbojpeg.la -libturbojpeg_la_LDFLAGS = -version-info 1:0:1 -no-undefined -include_HEADERS += turbojpeg.h -endif - -nodist_include_HEADERS = jconfig.h - -pkgconfigdir = $(libdir)/pkgconfig -pkgconfig_DATA = pkgscripts/libjpeg.pc -if WITH_TURBOJPEG -pkgconfig_DATA += pkgscripts/libturbojpeg.pc -endif - -HDRS = jchuff.h jdct.h jdhuff.h jerror.h jinclude.h jmemsys.h jmorecfg.h \ - jpegint.h jpeglib.h jversion.h jsimd.h jsimddct.h jpegcomp.h \ - jpeg_nbits_table.h - -libjpeg_la_SOURCES = $(HDRS) jcapimin.c jcapistd.c jccoefct.c jccolor.c \ - jcdctmgr.c jchuff.c jcinit.c jcmainct.c jcmarker.c jcmaster.c \ - jcomapi.c jcparam.c jcphuff.c jcprepct.c jcsample.c jctrans.c \ - jdapimin.c jdapistd.c jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c \ - jddctmgr.c jdhuff.c jdinput.c jdmainct.c jdmarker.c jdmaster.c \ - jdmerge.c jdphuff.c jdpostct.c jdsample.c jdtrans.c jerror.c \ - jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c jidctint.c \ - jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c - -if WITH_ARITH -libjpeg_la_SOURCES += jaricom.c -endif - -if WITH_ARITH_ENC -libjpeg_la_SOURCES += jcarith.c -endif - -if WITH_ARITH_DEC -libjpeg_la_SOURCES += jdarith.c -endif - - -SUBDIRS = java - - -if WITH_TURBOJPEG - -libturbojpeg_la_SOURCES = $(libjpeg_la_SOURCES) turbojpeg.c turbojpeg.h \ - transupp.c transupp.h jdatadst-tj.c jdatasrc-tj.c - -if WITH_JAVA - -libturbojpeg_la_SOURCES += turbojpeg-jni.c -libturbojpeg_la_CFLAGS = ${JNI_CFLAGS} -TJMAPFILE = turbojpeg-mapfile.jni - -else - -TJMAPFILE = turbojpeg-mapfile - -endif - -libturbojpeg_la_SOURCES += $(TJMAPFILE) - -if VERSION_SCRIPT -libturbojpeg_la_LDFLAGS += $(VERSION_SCRIPT_FLAG)$(srcdir)/$(TJMAPFILE) -endif - -endif - - -if VERSION_SCRIPT -libjpeg_la_LDFLAGS += $(VERSION_SCRIPT_FLAG)libjpeg.map -endif - - -if WITH_SIMD - -SUBDIRS += simd -libjpeg_la_LIBADD = simd/libsimd.la -libturbojpeg_la_LIBADD = simd/libsimd.la - -else - -libjpeg_la_SOURCES += jsimd_none.c - -endif - - -bin_PROGRAMS = cjpeg djpeg jpegtran rdjpgcom wrjpgcom -noinst_PROGRAMS = jcstest - - -if WITH_TURBOJPEG - -bin_PROGRAMS += tjbench - -noinst_PROGRAMS += tjunittest - -tjbench_SOURCES = tjbench.c bmp.h bmp.c tjutil.h tjutil.c rdbmp.c rdppm.c \ - wrbmp.c wrppm.c - -tjbench_LDADD = libturbojpeg.la libjpeg.la -lm - -tjbench_CFLAGS = -DBMP_SUPPORTED -DPPM_SUPPORTED - -tjunittest_SOURCES = tjunittest.c tjutil.h tjutil.c - -tjunittest_LDADD = libturbojpeg.la - -endif - - -cjpeg_SOURCES = cdjpeg.h cderror.h cdjpeg.c cjpeg.c rdgif.c rdppm.c rdswitch.c -if WITH_12BIT -else -cjpeg_SOURCES += rdbmp.c rdtarga.c -endif - -cjpeg_LDADD = libjpeg.la - -cjpeg_CFLAGS = -DGIF_SUPPORTED -DPPM_SUPPORTED -if WITH_12BIT -else -cjpeg_CFLAGS += -DBMP_SUPPORTED -DTARGA_SUPPORTED -endif - -djpeg_SOURCES = cdjpeg.h cderror.h cdjpeg.c djpeg.c rdcolmap.c rdswitch.c \ - wrgif.c wrppm.c -if WITH_12BIT -else -djpeg_SOURCES += wrbmp.c wrtarga.c -endif - -djpeg_LDADD = libjpeg.la - -djpeg_CFLAGS = -DGIF_SUPPORTED -DPPM_SUPPORTED -if WITH_12BIT -else -djpeg_CFLAGS += -DBMP_SUPPORTED -DTARGA_SUPPORTED -endif - -jpegtran_SOURCES = jpegtran.c rdswitch.c cdjpeg.c transupp.c transupp.h - -jpegtran_LDADD = libjpeg.la - -rdjpgcom_SOURCES = rdjpgcom.c - -rdjpgcom_LDADD = libjpeg.la - -wrjpgcom_SOURCES = wrjpgcom.c - -wrjpgcom_LDADD = libjpeg.la - -jcstest_SOURCES = jcstest.c - -jcstest_LDADD = libjpeg.la - -dist_man1_MANS = cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 wrjpgcom.1 - -DOCS= coderules.txt jconfig.txt change.log rdrle.c wrrle.c BUILDING.md \ - ChangeLog.md - -dist_doc_DATA = README.ijg README.md libjpeg.txt structure.txt usage.txt \ - wizard.txt LICENSE.md - -exampledir = $(docdir) -dist_example_DATA = example.c - - -EXTRA_DIST = win release $(DOCS) testimages CMakeLists.txt \ - sharedlib/CMakeLists.txt cmakescripts libjpeg.map.in doc doxygen.config \ - doxygen-extra.css jccolext.c jdcolext.c jdcol565.c jdmrgext.c jdmrg565.c \ - jstdhuff.c jdcoefct.h jdmainct.h jdmaster.h jdsample.h \ - md5/CMakeLists.txt - -dist-hook: - rm -rf `find $(distdir) -name .svn` - - -SUBDIRS += md5 - -if WITH_12BIT - -TESTORIG = testorig12.jpg -MD5_JPEG_RGB_ISLOW = 9620f424569594bb9242b48498ad801f -MD5_PPM_RGB_ISLOW = f3301d2219783b8b3d942b7239fa50c0 -MD5_JPEG_422_IFAST_OPT = 7322e3bd2f127f7de4b40d4480ce60e4 -MD5_PPM_422_IFAST = 79807fa552899e66a04708f533e16950 -MD5_PPM_422M_IFAST = 07737bfe8a7c1c87aaa393a0098d16b0 -MD5_JPEG_420_IFAST_Q100_PROG = a1da220b5604081863a504297ed59e55 -MD5_PPM_420_Q100_IFAST = 1b3730122709f53d007255e8dfd3305e -MD5_PPM_420M_Q100_IFAST = 980a1a3c5bf9510022869d30b7d26566 -MD5_JPEG_GRAY_ISLOW = 235c90707b16e2e069f37c888b2636d9 -MD5_PPM_GRAY_ISLOW = 7213c10af507ad467da5578ca5ee1fca -MD5_PPM_GRAY_ISLOW_RGB = e96ee81c30a6ed422d466338bd3de65d -MD5_JPEG_420S_IFAST_OPT = 7af8e60be4d9c227ec63ac9b6630855e -MD5_JPEG_3x2_FLOAT_PROG_SSE = a8c17daf77b457725ec929e215b603f8 -MD5_PPM_3x2_FLOAT_SSE = 42876ab9e5c2f76a87d08db5fbd57956 -MD5_JPEG_3x2_FLOAT_PROG_32BIT = a8c17daf77b457725ec929e215b603f8 -MD5_PPM_3x2_FLOAT_32BIT = 42876ab9e5c2f76a87d08db5fbd57956 -MD5_PPM_3x2_FLOAT_64BIT = d6fbc71153b3d8ded484dbc17c7b9cf4 -MD5_JPEG_3x2_FLOAT_PROG_387 = bc6dbbefac2872f6b9d6c4a0ae60c3c0 -MD5_PPM_3x2_FLOAT_387 = bcc5723c61560463ac60f772e742d092 -MD5_JPEG_3x2_IFAST_PROG = 1396cc2b7185cfe943d408c9d305339e -MD5_PPM_3x2_IFAST = 3975985ef6eeb0a2cdc58daa651ccc00 -MD5_PPM_420M_ISLOW_2_1 = 4ca6be2a6f326ff9eaab63e70a8259c0 -MD5_PPM_420M_ISLOW_15_8 = 12aa9f9534c1b3d7ba047322226365eb -MD5_PPM_420M_ISLOW_13_8 = f7e22817c7b25e1393e4ec101e9d4e96 -MD5_PPM_420M_ISLOW_11_8 = 800a16f9f4dc9b293197bfe11be10a82 -MD5_PPM_420M_ISLOW_9_8 = 06b7a92a9bc69f4dc36ec40f1937d55c -MD5_PPM_420M_ISLOW_7_8 = 3ec444a14a4ab4eab88ffc49c48eca43 -MD5_PPM_420M_ISLOW_3_4 = 3e726b7ea872445b19437d1c1d4f0d93 -MD5_PPM_420M_ISLOW_5_8 = a8a771abdc94301d20ffac119b2caccd -MD5_PPM_420M_ISLOW_1_2 = b419124dd5568b085787234866102866 -MD5_PPM_420M_ISLOW_3_8 = 343d19015531b7bbe746124127244fa8 -MD5_PPM_420M_ISLOW_1_4 = 35fd59d866e44659edfa3c18db2a3edb -MD5_PPM_420M_ISLOW_1_8 = ccaed48ac0aedefda5d4abe4013f4ad7 -MD5_PPM_420_ISLOW_SKIP15_31 = 86664cd9dc956536409e44e244d20a97 -MD5_PPM_420_ISLOW_PROG_CROP62x62_71_71 = 452a21656115a163029cfba5c04fa76a -MD5_PPM_444_ISLOW_SKIP1_6 = ef63901f71ef7a75cd78253fc0914f84 -MD5_PPM_444_ISLOW_PROG_CROP98x98_13_13 = 15b173fb5872d9575572fbcc1b05956f -MD5_JPEG_CROP = cdb35ff4b4519392690ea040c56ea99c - -else - -TESTORIG = testorig.jpg -MD5_JPEG_RGB_ISLOW = 768e970dd57b340ff1b83c9d3d47c77b -MD5_PPM_RGB_ISLOW = 00a257f5393fef8821f2b88ac7421291 -MD5_BMP_RGB_ISLOW_565 = f07d2e75073e4bb10f6c6f4d36e2e3be -MD5_BMP_RGB_ISLOW_565D = 4cfa0928ef3e6bb626d7728c924cfda4 -MD5_JPEG_422_IFAST_OPT = 2540287b79d913f91665e660303ab2c8 -MD5_PPM_422_IFAST = 35bd6b3f833bad23de82acea847129fa -MD5_PPM_422M_IFAST = 8dbc65323d62cca7c91ba02dd1cfa81d -MD5_BMP_422M_IFAST_565 = 3294bd4d9a1f2b3d08ea6020d0db7065 -MD5_BMP_422M_IFAST_565D = da98c9c7b6039511be4a79a878a9abc1 -MD5_JPEG_420_IFAST_Q100_PROG = 990cbe0329c882420a2094da7e5adade -MD5_PPM_420_Q100_IFAST = 5a732542015c278ff43635e473a8a294 -MD5_PPM_420M_Q100_IFAST = ff692ee9323a3b424894862557c092f1 -MD5_JPEG_GRAY_ISLOW = 72b51f894b8f4a10b3ee3066770aa38d -MD5_PPM_GRAY_ISLOW = 8d3596c56eace32f205deccc229aa5ed -MD5_PPM_GRAY_ISLOW_RGB = 116424ac07b79e5e801f00508eab48ec -MD5_BMP_GRAY_ISLOW_565 = 12f78118e56a2f48b966f792fedf23cc -MD5_BMP_GRAY_ISLOW_565D = bdbbd616441a24354c98553df5dc82db -MD5_JPEG_420S_IFAST_OPT = 388708217ac46273ca33086b22827ed8 -# See README.md for more details on why this next bit is necessary. -MD5_JPEG_3x2_FLOAT_PROG_SSE = 343e3f8caf8af5986ebaf0bdc13b5c71 -MD5_PPM_3x2_FLOAT_SSE = 1a75f36e5904d6fc3a85a43da9ad89bb -MD5_JPEG_3x2_FLOAT_PROG_32BIT = 9bca803d2042bd1eb03819e2bf92b3e5 -MD5_PPM_3x2_FLOAT_32BIT = f6bfab038438ed8f5522fbd33595dcdc -MD5_PPM_3x2_FLOAT_64BIT = 0e917a34193ef976b679a6b069b1be26 -MD5_JPEG_3x2_FLOAT_PROG_387 = 1657664a410e0822c924b54f6f65e6e9 -MD5_PPM_3x2_FLOAT_387 = cb0a1f027f3d2917c902b5640214e025 -MD5_JPEG_3x2_IFAST_PROG = 1ee5d2c1a77f2da495f993c8c7cceca5 -MD5_PPM_3x2_IFAST = fd283664b3b49127984af0a7f118fccd -MD5_JPEG_420_ISLOW_ARI = e986fb0a637a8d833d96e8a6d6d84ea1 -MD5_JPEG_444_ISLOW_PROGARI = 0a8f1c8f66e113c3cf635df0a475a617 -MD5_PPM_420M_IFAST_ARI = 72b59a99bcf1de24c5b27d151bde2437 -MD5_JPEG_420_ISLOW = 9a68f56bc76e466aa7e52f415d0f4a5f -MD5_PPM_420M_ISLOW_2_1 = 9f9de8c0612f8d06869b960b05abf9c9 -MD5_PPM_420M_ISLOW_15_8 = b6875bc070720b899566cc06459b63b7 -MD5_PPM_420M_ISLOW_13_8 = bc3452573c8152f6ae552939ee19f82f -MD5_PPM_420M_ISLOW_11_8 = d8cc73c0aaacd4556569b59437ba00a5 -MD5_PPM_420M_ISLOW_9_8 = d25e61bc7eac0002f5b393aa223747b6 -MD5_PPM_420M_ISLOW_7_8 = ddb564b7c74a09494016d6cd7502a946 -MD5_PPM_420M_ISLOW_3_4 = 8ed8e68808c3fbc4ea764fc9d2968646 -MD5_PPM_420M_ISLOW_5_8 = a3363274999da2366a024efae6d16c9b -MD5_PPM_420M_ISLOW_1_2 = e692a315cea26b988c8e8b29a5dbcd81 -MD5_PPM_420M_ISLOW_3_8 = 79eca9175652ced755155c90e785a996 -MD5_PPM_420M_ISLOW_1_4 = 79cd778f8bf1a117690052cacdd54eca -MD5_PPM_420M_ISLOW_1_8 = 391b3d4aca640c8567d6f8745eb2142f -MD5_BMP_420_ISLOW_256 = 4980185e3776e89bd931736e1cddeee6 -MD5_BMP_420_ISLOW_565 = bf9d13e16c4923b92e1faa604d7922cb -MD5_BMP_420_ISLOW_565D = 6bde71526acc44bcff76f696df8638d2 -MD5_BMP_420M_ISLOW_565 = 8dc0185245353cfa32ad97027342216f -MD5_BMP_420M_ISLOW_565D =d1be3a3339166255e76fa50a0d70d73e -MD5_PPM_420_ISLOW_SKIP15_31 = c4c65c1e43d7275cd50328a61e6534f0 -MD5_PPM_420_ISLOW_ARI_SKIP16_139 = 087c6b123db16ac00cb88c5b590bb74a -MD5_PPM_420_ISLOW_PROG_CROP62x62_71_71 = 26eb36ccc7d1f0cb80cdabb0ac8b5d99 -MD5_PPM_420_ISLOW_ARI_CROP53x53_4_4 = 886c6775af22370257122f8b16207e6d -MD5_PPM_444_ISLOW_SKIP1_6 = 5606f86874cf26b8fcee1117a0a436a6 -MD5_PPM_444_ISLOW_PROG_CROP98x98_13_13 = db87dc7ce26bcdc7a6b56239ce2b9d6c -MD5_PPM_444_ISLOW_ARI_CROP37x37_0_0 = cb57b32bd6d03e35432362f7bf184b6d -MD5_JPEG_CROP = b4197f377e621c4e9b1d20471432610d - -endif - -.PHONY: test -test: tjquicktest tjbittest bittest - -if CROSS_COMPILING -tjquicktest: testclean -else -tjquicktest: testclean all -endif - -if WITH_TURBOJPEG -if WITH_JAVA - $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest - $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -bi - $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv - $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv -noyuvpad - $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv -bi - $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv -bi -noyuvpad -endif - ./tjunittest - ./tjunittest -alloc - ./tjunittest -yuv - ./tjunittest -yuv -alloc - ./tjunittest -yuv -noyuvpad -endif - echo GREAT SUCCESS! - -if CROSS_COMPILING -tjbittest: testclean -else -tjbittest: testclean all -endif - -if WITH_TURBOJPEG - -MD5_PPM_GRAY_TILE = 89d3ca21213d9d864b50b4e4e7de4ca6 -MD5_PPM_420_8x8_TILE = 847fceab15c5b7b911cb986cf0f71de3 -MD5_PPM_420_16x16_TILE = ca45552a93687e078f7137cc4126a7b0 -MD5_PPM_420_32x32_TILE = d8676f1d6b68df358353bba9844f4a00 -MD5_PPM_420_64x64_TILE = 4e4c1a3d7ea4bace4f868bcbe83b7050 -MD5_PPM_420_128x128_TILE = f24c3429c52265832beab9df72a0ceae -MD5_PPM_420M_8x8_TILE = bc25320e1f4c31ce2e610e43e9fd173c -MD5_PPM_420M_TILE = 75ffdf14602258c5c189522af57fa605 -MD5_PPM_422_8x8_TILE = d83dacd9fc73b0a6f10c09acad64eb1e -MD5_PPM_422_16x16_TILE = 35077fb610d72dd743b1eb0cbcfe10fb -MD5_PPM_422_32x32_TILE = e6902ed8a449ecc0f0d6f2bf945f65f7 -MD5_PPM_422_64x64_TILE = 2b4502a8f316cedbde1da7bce3d2231e -MD5_PPM_422_128x128_TILE = f0b5617d578f5e13c8eee215d64d4877 -MD5_PPM_422M_8x8_TILE = 828941d7f41cd6283abd6beffb7fd51d -MD5_PPM_422M_TILE = e877ae1324c4a280b95376f7f018172f -MD5_PPM_444_TILE = 7964e41e67cfb8d0a587c0aa4798f9c3 - -# Test compressing from/decompressing to an arbitrary subregion of a larger -# image buffer - cp $(srcdir)/testimages/testorig.ppm testout_tile.ppm - ./tjbench testout_tile.ppm 95 -rgb -quiet -tile -benchtime 0.01 -warmup 0 >/dev/null 2>&1 - for i in 8 16 32 64 128; do \ - md5/md5cmp $(MD5_PPM_GRAY_TILE) testout_tile_GRAY_Q95_$$i\x$$i.ppm; \ - done - md5/md5cmp $(MD5_PPM_420_8x8_TILE) testout_tile_420_Q95_8x8.ppm - md5/md5cmp $(MD5_PPM_420_16x16_TILE) testout_tile_420_Q95_16x16.ppm - md5/md5cmp $(MD5_PPM_420_32x32_TILE) testout_tile_420_Q95_32x32.ppm - md5/md5cmp $(MD5_PPM_420_64x64_TILE) testout_tile_420_Q95_64x64.ppm - md5/md5cmp $(MD5_PPM_420_128x128_TILE) testout_tile_420_Q95_128x128.ppm - md5/md5cmp $(MD5_PPM_422_8x8_TILE) testout_tile_422_Q95_8x8.ppm - md5/md5cmp $(MD5_PPM_422_16x16_TILE) testout_tile_422_Q95_16x16.ppm - md5/md5cmp $(MD5_PPM_422_32x32_TILE) testout_tile_422_Q95_32x32.ppm - md5/md5cmp $(MD5_PPM_422_64x64_TILE) testout_tile_422_Q95_64x64.ppm - md5/md5cmp $(MD5_PPM_422_128x128_TILE) testout_tile_422_Q95_128x128.ppm - for i in 8 16 32 64 128; do \ - md5/md5cmp $(MD5_PPM_444_TILE) testout_tile_444_Q95_$$i\x$$i.ppm; \ - done - rm -f testout_tile_GRAY_* testout_tile_420_* testout_tile_422_* testout_tile_444_* - - ./tjbench testout_tile.ppm 95 -rgb -fastupsample -quiet -tile -benchtime 0.01 -warmup 0 >/dev/null 2>&1 - md5/md5cmp $(MD5_PPM_420M_8x8_TILE) testout_tile_420_Q95_8x8.ppm - for i in 16 32 64 128; do \ - md5/md5cmp $(MD5_PPM_420M_TILE) testout_tile_420_Q95_$$i\x$$i.ppm; \ - done - md5/md5cmp $(MD5_PPM_422M_8x8_TILE) testout_tile_422_Q95_8x8.ppm - for i in 16 32 64 128; do \ - md5/md5cmp $(MD5_PPM_422M_TILE) testout_tile_422_Q95_$$i\x$$i.ppm; \ - done - rm -f testout_tile_GRAY_* testout_tile_420_* testout_tile_422_* testout_tile_444_* testout_tile.ppm - echo GREAT SUCCESS! - -endif - -if CROSS_COMPILING -bittest: testclean -else -bittest: testclean all -endif - -# These tests are carefully crafted to provide full coverage of as many of the -# underlying algorithms as possible (including all of the SIMD-accelerated -# ones.) - -# CC: null SAMP: fullsize FDCT: islow ENT: huff - ./cjpeg -rgb -dct int -outfile testout_rgb_islow.jpg $(srcdir)/testimages/testorig.ppm - md5/md5cmp $(MD5_JPEG_RGB_ISLOW) testout_rgb_islow.jpg -# CC: null SAMP: fullsize IDCT: islow ENT: huff - ./djpeg -dct int -ppm -outfile testout_rgb_islow.ppm testout_rgb_islow.jpg - md5/md5cmp $(MD5_PPM_RGB_ISLOW) testout_rgb_islow.ppm - rm -f testout_rgb_islow.ppm -if WITH_12BIT - rm -f testout_rgb_islow.jpg -else -# CC: RGB->RGB565 SAMP: fullsize IDCT: islow ENT: huff - ./djpeg -dct int -rgb565 -dither none -bmp -outfile testout_rgb_islow_565.bmp testout_rgb_islow.jpg - md5/md5cmp $(MD5_BMP_RGB_ISLOW_565) testout_rgb_islow_565.bmp - rm -f testout_rgb_islow_565.bmp -# CC: RGB->RGB565 (dithered) SAMP: fullsize IDCT: islow ENT: huff - ./djpeg -dct int -rgb565 -bmp -outfile testout_rgb_islow_565D.bmp testout_rgb_islow.jpg - md5/md5cmp $(MD5_BMP_RGB_ISLOW_565D) testout_rgb_islow_565D.bmp - rm -f testout_rgb_islow_565D.bmp testout_rgb_islow.jpg -endif - -# CC: RGB->YCC SAMP: fullsize/h2v1 FDCT: ifast ENT: 2-pass huff - ./cjpeg -sample 2x1 -dct fast -opt -outfile testout_422_ifast_opt.jpg $(srcdir)/testimages/testorig.ppm - md5/md5cmp $(MD5_JPEG_422_IFAST_OPT) testout_422_ifast_opt.jpg -# CC: YCC->RGB SAMP: fullsize/h2v1 fancy IDCT: ifast ENT: huff - ./djpeg -dct fast -outfile testout_422_ifast.ppm testout_422_ifast_opt.jpg - md5/md5cmp $(MD5_PPM_422_IFAST) testout_422_ifast.ppm - rm -f testout_422_ifast.ppm -# CC: YCC->RGB SAMP: h2v1 merged IDCT: ifast ENT: huff - ./djpeg -dct fast -nosmooth -outfile testout_422m_ifast.ppm testout_422_ifast_opt.jpg - md5/md5cmp $(MD5_PPM_422M_IFAST) testout_422m_ifast.ppm - rm -f testout_422m_ifast.ppm -if WITH_12BIT - rm -f testout_422_ifast_opt.jpg -else -# CC: YCC->RGB565 SAMP: h2v1 merged IDCT: ifast ENT: huff - ./djpeg -dct int -nosmooth -rgb565 -dither none -bmp -outfile testout_422m_ifast_565.bmp testout_422_ifast_opt.jpg - md5/md5cmp $(MD5_BMP_422M_IFAST_565) testout_422m_ifast_565.bmp - rm -f testout_422m_ifast_565.bmp -# CC: YCC->RGB565 (dithered) SAMP: h2v1 merged IDCT: ifast ENT: huff - ./djpeg -dct int -nosmooth -rgb565 -bmp -outfile testout_422m_ifast_565D.bmp testout_422_ifast_opt.jpg - md5/md5cmp $(MD5_BMP_422M_IFAST_565D) testout_422m_ifast_565D.bmp - rm -f testout_422m_ifast_565D.bmp testout_422_ifast_opt.jpg -endif - -# CC: RGB->YCC SAMP: fullsize/h2v2 FDCT: ifast ENT: prog huff - ./cjpeg -sample 2x2 -quality 100 -dct fast -prog -outfile testout_420_q100_ifast_prog.jpg $(srcdir)/testimages/testorig.ppm - md5/md5cmp $(MD5_JPEG_420_IFAST_Q100_PROG) testout_420_q100_ifast_prog.jpg -# CC: YCC->RGB SAMP: fullsize/h2v2 fancy IDCT: ifast ENT: prog huff - ./djpeg -dct fast -outfile testout_420_q100_ifast.ppm testout_420_q100_ifast_prog.jpg - md5/md5cmp $(MD5_PPM_420_Q100_IFAST) testout_420_q100_ifast.ppm - rm -f testout_420_q100_ifast.ppm -# CC: YCC->RGB SAMP: h2v2 merged IDCT: ifast ENT: prog huff - ./djpeg -dct fast -nosmooth -outfile testout_420m_q100_ifast.ppm testout_420_q100_ifast_prog.jpg - md5/md5cmp $(MD5_PPM_420M_Q100_IFAST) testout_420m_q100_ifast.ppm - rm -f testout_420m_q100_ifast.ppm testout_420_q100_ifast_prog.jpg - -# CC: RGB->Gray SAMP: fullsize FDCT: islow ENT: huff - ./cjpeg -gray -dct int -outfile testout_gray_islow.jpg $(srcdir)/testimages/testorig.ppm - md5/md5cmp $(MD5_JPEG_GRAY_ISLOW) testout_gray_islow.jpg -# CC: Gray->Gray SAMP: fullsize IDCT: islow ENT: huff - ./djpeg -dct int -outfile testout_gray_islow.ppm testout_gray_islow.jpg - md5/md5cmp $(MD5_PPM_GRAY_ISLOW) testout_gray_islow.ppm - rm -f testout_gray_islow.ppm -# CC: Gray->RGB SAMP: fullsize IDCT: islow ENT: huff - ./djpeg -dct int -rgb -outfile testout_gray_islow_rgb.ppm testout_gray_islow.jpg - md5/md5cmp $(MD5_PPM_GRAY_ISLOW_RGB) testout_gray_islow_rgb.ppm - rm -f testout_gray_islow_rgb.ppm -if WITH_12BIT - rm -f testout_gray_islow.jpg -else -# CC: Gray->RGB565 SAMP: fullsize IDCT: islow ENT: huff - ./djpeg -dct int -rgb565 -dither none -bmp -outfile testout_gray_islow_565.bmp testout_gray_islow.jpg - md5/md5cmp $(MD5_BMP_GRAY_ISLOW_565) testout_gray_islow_565.bmp - rm -f testout_gray_islow_565.bmp -# CC: Gray->RGB565 (dithered) SAMP: fullsize IDCT: islow ENT: huff - ./djpeg -dct int -rgb565 -bmp -outfile testout_gray_islow_565D.bmp testout_gray_islow.jpg - md5/md5cmp $(MD5_BMP_GRAY_ISLOW_565D) testout_gray_islow_565D.bmp - rm -f testout_gray_islow_565D.bmp testout_gray_islow.jpg -endif - -# CC: RGB->YCC SAMP: fullsize smooth/h2v2 smooth FDCT: islow -# ENT: 2-pass huff - ./cjpeg -sample 2x2 -smooth 1 -dct int -opt -outfile testout_420s_ifast_opt.jpg $(srcdir)/testimages/testorig.ppm - md5/md5cmp $(MD5_JPEG_420S_IFAST_OPT) testout_420s_ifast_opt.jpg - rm -f testout_420s_ifast_opt.jpg - -# The output of the floating point tests is not validated by default, because -# the output differs depending on the type of floating point math used, and -# this is only deterministic if the DCT/IDCT are implemented using SIMD -# instructions on a particular platform. Pass one of the following on the make -# command line to validate the floating point tests against one of the expected -# results: -# -# FLOATTEST=sse validate against the expected results from the libjpeg-turbo -# SSE SIMD extensions -# FLOATTEST=32bit validate against the expected results from the C code -# when running on a 32-bit FPU (or when SSE is being used for -# floating point math, which is generally the default with -# x86-64 compilers) -# FLOATTEST=64bit validate against the exepected results from the C code -# when running on a 64-bit FPU -# FLOATTEST=387 validate against the expected results from the C code when -# the 387 FPU is being used for floating point math (which is -# generally the default with x86 compilers) - -# CC: RGB->YCC SAMP: fullsize/int FDCT: float ENT: prog huff - ./cjpeg -sample 3x2 -dct float -prog -outfile testout_3x2_float_prog.jpg $(srcdir)/testimages/testorig.ppm - if [ "${FLOATTEST}" = "sse" ]; then \ - md5/md5cmp $(MD5_JPEG_3x2_FLOAT_PROG_SSE) testout_3x2_float_prog.jpg; \ - elif [ "${FLOATTEST}" = "32bit" -o "${FLOATTEST}" = "64bit" ]; then \ - md5/md5cmp $(MD5_JPEG_3x2_FLOAT_PROG_32BIT) testout_3x2_float_prog.jpg; \ - elif [ "${FLOATTEST}" = "387" ]; then \ - md5/md5cmp $(MD5_JPEG_3x2_FLOAT_PROG_387) testout_3x2_float_prog.jpg; \ - fi -# CC: YCC->RGB SAMP: fullsize/int IDCT: float ENT: prog huff - ./djpeg -dct float -outfile testout_3x2_float.ppm testout_3x2_float_prog.jpg - if [ "${FLOATTEST}" = "sse" ]; then \ - md5/md5cmp $(MD5_PPM_3x2_FLOAT_SSE) testout_3x2_float.ppm; \ - elif [ "${FLOATTEST}" = "32bit" ]; then \ - md5/md5cmp $(MD5_PPM_3x2_FLOAT_32BIT) testout_3x2_float.ppm; \ - elif [ "${FLOATTEST}" = "64bit" ]; then \ - md5/md5cmp $(MD5_PPM_3x2_FLOAT_64BIT) testout_3x2_float.ppm; \ - elif [ "${FLOATTEST}" = "387" ]; then \ - md5/md5cmp $(MD5_PPM_3x2_FLOAT_387) testout_3x2_float.ppm; \ - fi - rm -f testout_3x2_float.ppm testout_3x2_float_prog.jpg - -# CC: RGB->YCC SAMP: fullsize/int FDCT: ifast ENT: prog huff - ./cjpeg -sample 3x2 -dct fast -prog -outfile testout_3x2_ifast_prog.jpg $(srcdir)/testimages/testorig.ppm - md5/md5cmp $(MD5_JPEG_3x2_IFAST_PROG) testout_3x2_ifast_prog.jpg -# CC: YCC->RGB SAMP: fullsize/int IDCT: ifast ENT: prog huff - ./djpeg -dct fast -outfile testout_3x2_ifast.ppm testout_3x2_ifast_prog.jpg - md5/md5cmp $(MD5_PPM_3x2_IFAST) testout_3x2_ifast.ppm - rm -f testout_3x2_ifast.ppm testout_3x2_ifast_prog.jpg - -if WITH_ARITH_ENC -# CC: YCC->RGB SAMP: fullsize/h2v2 FDCT: islow ENT: arith - ./cjpeg -dct int -arithmetic -outfile testout_420_islow_ari.jpg $(srcdir)/testimages/testorig.ppm - md5/md5cmp $(MD5_JPEG_420_ISLOW_ARI) testout_420_islow_ari.jpg - rm -f testout_420_islow_ari.jpg - ./jpegtran -arithmetic -outfile testout_420_islow_ari.jpg $(srcdir)/testimages/testimgint.jpg - md5/md5cmp $(MD5_JPEG_420_ISLOW_ARI) testout_420_islow_ari.jpg - rm -f testout_420_islow_ari.jpg -# CC: YCC->RGB SAMP: fullsize FDCT: islow ENT: prog arith - ./cjpeg -sample 1x1 -dct int -prog -arithmetic -outfile testout_444_islow_progari.jpg $(srcdir)/testimages/testorig.ppm - md5/md5cmp $(MD5_JPEG_444_ISLOW_PROGARI) testout_444_islow_progari.jpg - rm -f testout_444_islow_progari.jpg -endif -if WITH_ARITH_DEC -# CC: RGB->YCC SAMP: h2v2 merged IDCT: ifast ENT: arith - ./djpeg -fast -ppm -outfile testout_420m_ifast_ari.ppm $(srcdir)/testimages/testimgari.jpg - md5/md5cmp $(MD5_PPM_420M_IFAST_ARI) testout_420m_ifast_ari.ppm - rm -f testout_420m_ifast_ari.ppm - ./jpegtran -outfile testout_420_islow.jpg $(srcdir)/testimages/testimgari.jpg - md5/md5cmp $(MD5_JPEG_420_ISLOW) testout_420_islow.jpg - rm -f testout_420_islow.jpg -endif - -# CC: YCC->RGB SAMP: h2v2 merged IDCT: 16x16 islow ENT: huff - ./djpeg -dct int -scale 2/1 -nosmooth -ppm -outfile testout_420m_islow_2_1.ppm $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_PPM_420M_ISLOW_2_1) testout_420m_islow_2_1.ppm - rm -f testout_420m_islow_2_1.ppm -# CC: YCC->RGB SAMP: h2v2 merged IDCT: 15x15 islow ENT: huff - ./djpeg -dct int -scale 15/8 -nosmooth -ppm -outfile testout_420m_islow_15_8.ppm $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_PPM_420M_ISLOW_15_8) testout_420m_islow_15_8.ppm - rm -f testout_420m_islow_15_8.ppm -# CC: YCC->RGB SAMP: h2v2 merged IDCT: 13x13 islow ENT: huff - ./djpeg -dct int -scale 13/8 -nosmooth -ppm -outfile testout_420m_islow_13_8.ppm $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_PPM_420M_ISLOW_13_8) testout_420m_islow_13_8.ppm - rm -f testout_420m_islow_13_8.ppm -# CC: YCC->RGB SAMP: h2v2 merged IDCT: 11x11 islow ENT: huff - ./djpeg -dct int -scale 11/8 -nosmooth -ppm -outfile testout_420m_islow_11_8.ppm $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_PPM_420M_ISLOW_11_8) testout_420m_islow_11_8.ppm - rm -f testout_420m_islow_11_8.ppm -# CC: YCC->RGB SAMP: h2v2 merged IDCT: 9x9 islow ENT: huff - ./djpeg -dct int -scale 9/8 -nosmooth -ppm -outfile testout_420m_islow_9_8.ppm $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_PPM_420M_ISLOW_9_8) testout_420m_islow_9_8.ppm - rm -f testout_420m_islow_9_8.ppm -# CC: YCC->RGB SAMP: h2v2 merged IDCT: 7x7 islow/14x14 islow ENT: huff - ./djpeg -dct int -scale 7/8 -nosmooth -ppm -outfile testout_420m_islow_7_8.ppm $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_PPM_420M_ISLOW_7_8) testout_420m_islow_7_8.ppm - rm -f testout_420m_islow_7_8.ppm -# CC: YCC->RGB SAMP: h2v2 merged IDCT: 6x6 islow/12x12 islow ENT: huff - ./djpeg -dct int -scale 3/4 -nosmooth -ppm -outfile testout_420m_islow_3_4.ppm $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_PPM_420M_ISLOW_3_4) testout_420m_islow_3_4.ppm - rm -f testout_420m_islow_3_4.ppm -# CC: YCC->RGB SAMP: h2v2 merged IDCT: 5x5 islow/10x10 islow ENT: huff - ./djpeg -dct int -scale 5/8 -nosmooth -ppm -outfile testout_420m_islow_5_8.ppm $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_PPM_420M_ISLOW_5_8) testout_420m_islow_5_8.ppm - rm -f testout_420m_islow_5_8.ppm -# CC: YCC->RGB SAMP: h2v2 merged IDCT: 4x4 islow/8x8 islow ENT: huff - ./djpeg -dct int -scale 1/2 -nosmooth -ppm -outfile testout_420m_islow_1_2.ppm $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_PPM_420M_ISLOW_1_2) testout_420m_islow_1_2.ppm - rm -f testout_420m_islow_1_2.ppm -# CC: YCC->RGB SAMP: h2v2 merged IDCT: 3x3 islow/6x6 islow ENT: huff - ./djpeg -dct int -scale 3/8 -nosmooth -ppm -outfile testout_420m_islow_3_8.ppm $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_PPM_420M_ISLOW_3_8) testout_420m_islow_3_8.ppm - rm -f testout_420m_islow_3_8.ppm -# CC: YCC->RGB SAMP: h2v2 merged IDCT: 2x2 islow/4x4 islow ENT: huff - ./djpeg -dct int -scale 1/4 -nosmooth -ppm -outfile testout_420m_islow_1_4.ppm $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_PPM_420M_ISLOW_1_4) testout_420m_islow_1_4.ppm - rm -f testout_420m_islow_1_4.ppm -# CC: YCC->RGB SAMP: h2v2 merged IDCT: 1x1 islow/2x2 islow ENT: huff - ./djpeg -dct int -scale 1/8 -nosmooth -ppm -outfile testout_420m_islow_1_8.ppm $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_PPM_420M_ISLOW_1_8) testout_420m_islow_1_8.ppm - rm -f testout_420m_islow_1_8.ppm -if WITH_12BIT -else -# CC: YCC->RGB (dithered) SAMP: h2v2 fancy IDCT: islow ENT: huff - ./djpeg -dct int -colors 256 -bmp -outfile testout_420_islow_256.bmp $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_BMP_420_ISLOW_256) testout_420_islow_256.bmp - rm -f testout_420_islow_256.bmp -# CC: YCC->RGB565 SAMP: h2v2 fancy IDCT: islow ENT: huff - ./djpeg -dct int -rgb565 -dither none -bmp -outfile testout_420_islow_565.bmp $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_BMP_420_ISLOW_565) testout_420_islow_565.bmp - rm -f testout_420_islow_565.bmp -# CC: YCC->RGB565 (dithered) SAMP: h2v2 fancy IDCT: islow ENT: huff - ./djpeg -dct int -rgb565 -bmp -outfile testout_420_islow_565D.bmp $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_BMP_420_ISLOW_565D) testout_420_islow_565D.bmp - rm -f testout_420_islow_565D.bmp -# CC: YCC->RGB565 SAMP: h2v2 merged IDCT: islow ENT: huff - ./djpeg -dct int -nosmooth -rgb565 -dither none -bmp -outfile testout_420m_islow_565.bmp $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_BMP_420M_ISLOW_565) testout_420m_islow_565.bmp - rm -f testout_420m_islow_565.bmp -# CC: YCC->RGB565 (dithered) SAMP: h2v2 merged IDCT: islow ENT: huff - ./djpeg -dct int -nosmooth -rgb565 -bmp -outfile testout_420m_islow_565D.bmp $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_BMP_420M_ISLOW_565D) testout_420m_islow_565D.bmp - rm -f testout_420m_islow_565D.bmp -endif - -# Partial decode tests. These tests are designed to cover all of the possible -# code paths in jpeg_skip_scanlines(). - -# Context rows: Yes Intra-iMCU row: Yes iMCU row prefetch: No ENT: huff - ./djpeg -dct int -skip 15,31 -ppm -outfile testout_420_islow_skip15,31.ppm $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_PPM_420_ISLOW_SKIP15_31) testout_420_islow_skip15,31.ppm - rm -f testout_420_islow_skip15,31.ppm -# Context rows: Yes Intra-iMCU row: No iMCU row prefetch: Yes ENT: arith -if WITH_ARITH_DEC - ./djpeg -dct int -skip 16,139 -ppm -outfile testout_420_islow_ari_skip16,139.ppm $(srcdir)/testimages/testimgari.jpg - md5/md5cmp $(MD5_PPM_420_ISLOW_ARI_SKIP16_139) testout_420_islow_ari_skip16,139.ppm - rm -f testout_420_islow_ari_skip16,139.ppm -endif -# Context rows: Yes Intra-iMCU row: No iMCU row prefetch: No ENT: prog huff - ./cjpeg -dct int -prog -outfile testout_420_islow_prog.jpg $(srcdir)/testimages/testorig.ppm - ./djpeg -dct int -crop 62x62+71+71 -ppm -outfile testout_420_islow_prog_crop62x62,71,71.ppm testout_420_islow_prog.jpg - md5/md5cmp $(MD5_PPM_420_ISLOW_PROG_CROP62x62_71_71) testout_420_islow_prog_crop62x62,71,71.ppm - rm -f testout_420_islow_prog_crop62x62,71,71.ppm testout_420_islow_prog.jpg -# Context rows: Yes Intra-iMCU row: No iMCU row prefetch: No ENT: arith -if WITH_ARITH_DEC - ./djpeg -dct int -crop 53x53+4+4 -ppm -outfile testout_420_islow_ari_crop53x53,4,4.ppm $(srcdir)/testimages/testimgari.jpg - md5/md5cmp $(MD5_PPM_420_ISLOW_ARI_CROP53x53_4_4) testout_420_islow_ari_crop53x53,4,4.ppm - rm -f testout_420_islow_ari_crop53x53,4,4.ppm -endif -# Context rows: No Intra-iMCU row: Yes ENT: huff - ./cjpeg -dct int -sample 1x1 -outfile testout_444_islow.jpg $(srcdir)/testimages/testorig.ppm - ./djpeg -dct int -skip 1,6 -ppm -outfile testout_444_islow_skip1,6.ppm testout_444_islow.jpg - md5/md5cmp $(MD5_PPM_444_ISLOW_SKIP1_6) testout_444_islow_skip1,6.ppm - rm -f testout_444_islow_skip1,6.ppm testout_444_islow.jpg -# Context rows: No Intra-iMCU row: No ENT: prog huff - ./cjpeg -dct int -prog -sample 1x1 -outfile testout_444_islow_prog.jpg $(srcdir)/testimages/testorig.ppm - ./djpeg -dct int -crop 98x98+13+13 -ppm -outfile testout_444_islow_prog_crop98x98,13,13.ppm testout_444_islow_prog.jpg - md5/md5cmp $(MD5_PPM_444_ISLOW_PROG_CROP98x98_13_13) testout_444_islow_prog_crop98x98,13,13.ppm - rm -f testout_444_islow_prog_crop98x98,13,13.ppm testout_444_islow_prog.jpg -# Context rows: No Intra-iMCU row: No ENT: arith -if WITH_ARITH_ENC - ./cjpeg -dct int -arithmetic -sample 1x1 -outfile testout_444_islow_ari.jpg $(srcdir)/testimages/testorig.ppm -if WITH_ARITH_DEC - ./djpeg -dct int -crop 37x37+0+0 -ppm -outfile testout_444_islow_ari_crop37x37,0,0.ppm testout_444_islow_ari.jpg - md5/md5cmp $(MD5_PPM_444_ISLOW_ARI_CROP37x37_0_0) testout_444_islow_ari_crop37x37,0,0.ppm - rm -f testout_444_islow_ari_crop37x37,0,0.ppm -endif - rm -f testout_444_islow_ari.jpg -endif - - ./jpegtran -crop 120x90+20+50 -transpose -perfect -outfile testout_crop.jpg $(srcdir)/testimages/$(TESTORIG) - md5/md5cmp $(MD5_JPEG_CROP) testout_crop.jpg - rm -f testout_crop.jpg - echo GREAT SUCCESS! - - -testclean: - rm -f testout* - rm -f *_GRAY_*.bmp - rm -f *_GRAY_*.png - rm -f *_GRAY_*.ppm - rm -f *_GRAY_*.jpg - rm -f *_GRAY.yuv - rm -f *_420_*.bmp - rm -f *_420_*.png - rm -f *_420_*.ppm - rm -f *_420_*.jpg - rm -f *_420.yuv - rm -f *_422_*.bmp - rm -f *_422_*.png - rm -f *_422_*.ppm - rm -f *_422_*.jpg - rm -f *_422.yuv - rm -f *_444_*.bmp - rm -f *_444_*.png - rm -f *_444_*.ppm - rm -f *_444_*.jpg - rm -f *_444.yuv - rm -f *_440_*.bmp - rm -f *_440_*.png - rm -f *_440_*.ppm - rm -f *_440_*.jpg - rm -f *_440.yuv - rm -f *_411_*.bmp - rm -f *_411_*.png - rm -f *_411_*.ppm - rm -f *_411_*.jpg - rm -f *_411.yuv - rm -f tjbenchtest*.log - rm -f tjexampletest*.log - - -tjtest: - sh ./tjbenchtest - sh ./tjbenchtest -alloc - sh ./tjbenchtest -yuv - sh ./tjbenchtest -yuv -alloc -if WITH_JAVA - sh ./tjbenchtest.java - sh ./tjbenchtest.java -yuv -endif - - -pkgscripts/libjpeg-turbo.spec: pkgscripts/libjpeg-turbo.spec.tmpl - cat pkgscripts/libjpeg-turbo.spec.tmpl | sed s@%{__prefix}@$(prefix)@g | \ - sed s@%{__bindir}@$(bindir)@g | sed s@%{__datadir}@$(datadir)@g | \ - sed s@%{__docdir}@$(docdir)@g | sed s@%{__includedir}@$(includedir)@g | \ - sed s@%{__libdir}@$(libdir)@g | sed s@%{__mandir}@$(mandir)@g \ - > pkgscripts/libjpeg-turbo.spec - -rpm: all pkgscripts/libjpeg-turbo.spec - TMPDIR=`mktemp -d /tmp/${PACKAGE_NAME}-build.XXXXXX`; \ - mkdir -p $$TMPDIR/RPMS; \ - ln -fs `pwd` $$TMPDIR/BUILD; \ - rm -f ${PKGNAME}-${VERSION}.${RPMARCH}.rpm; \ - rpmbuild -bb --define "_blddir $$TMPDIR/buildroot" \ - --define "_topdir $$TMPDIR" \ - --target ${RPMARCH} pkgscripts/libjpeg-turbo.spec; \ - cp $$TMPDIR/RPMS/${RPMARCH}/${PKGNAME}-${VERSION}-${BUILD}.${RPMARCH}.rpm \ - ${PKGNAME}-${VERSION}.${RPMARCH}.rpm; \ - rm -rf $$TMPDIR - -srpm: dist-gzip pkgscripts/libjpeg-turbo.spec - TMPDIR=`mktemp -d /tmp/${PACKAGE_NAME}-build.XXXXXX`; \ - mkdir -p $$TMPDIR/RPMS; \ - mkdir -p $$TMPDIR/SRPMS; \ - mkdir -p $$TMPDIR/BUILD; \ - mkdir -p $$TMPDIR/SOURCES; \ - mkdir -p $$TMPDIR/SPECS; \ - rm -f ${PKGNAME}-${VERSION}.src.rpm; \ - cp ${PACKAGE_NAME}-${VERSION}.tar.gz $$TMPDIR/SOURCES; \ - cat pkgscripts/libjpeg-turbo.spec | sed s/%{_blddir}/%{_tmppath}/g \ - | sed s/#--\>//g \ - > $$TMPDIR/SPECS/libjpeg-turbo.spec; \ - rpmbuild -bs --define "_topdir $$TMPDIR" $$TMPDIR/SPECS/libjpeg-turbo.spec; \ - cp $$TMPDIR/SRPMS/${PKGNAME}-${VERSION}-${BUILD}.src.rpm \ - ${PKGNAME}-${VERSION}.src.rpm; \ - rm -rf $$TMPDIR - -pkgscripts/makedpkg: pkgscripts/makedpkg.tmpl - cat pkgscripts/makedpkg.tmpl | sed s@%{__prefix}@$(prefix)@g | \ - sed s@%{__docdir}@$(docdir)@g | sed s@%{__libdir}@$(libdir)@g \ - > pkgscripts/makedpkg - -deb: all pkgscripts/makedpkg - sh pkgscripts/makedpkg - -pkgscripts/uninstall: pkgscripts/uninstall.tmpl - cat pkgscripts/uninstall.tmpl | sed s@%{__prefix}@$(prefix)@g | \ - sed s@%{__bindir}@$(bindir)@g | sed s@%{__datadir}@$(datadir)@g | \ - sed s@%{__includedir}@$(includedir)@g | sed s@%{__libdir}@$(libdir)@g | \ - sed s@%{__mandir}@$(mandir)@g > pkgscripts/uninstall - -pkgscripts/makemacpkg: pkgscripts/makemacpkg.tmpl - cat pkgscripts/makemacpkg.tmpl | sed s@%{__prefix}@$(prefix)@g | \ - sed s@%{__bindir}@$(bindir)@g | sed s@%{__docdir}@$(docdir)@g | \ - sed s@%{__libdir}@$(libdir)@g > pkgscripts/makemacpkg - -if X86_64 - -udmg: all pkgscripts/makemacpkg pkgscripts/uninstall - sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} - -iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall - sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S} -buildarmv8 ${BUILDDIRARMV8} -lipo "${LIPO}" - -else - -iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall - sh pkgscripts/makemacpkg -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S} -buildarmv8 ${BUILDDIRARMV8} -lipo "${LIPO}" - -endif - -dmg: all pkgscripts/makemacpkg pkgscripts/uninstall - sh pkgscripts/makemacpkg - -pkgscripts/makecygwinpkg: pkgscripts/makecygwinpkg.tmpl - cat pkgscripts/makecygwinpkg.tmpl | sed s@%{__prefix}@$(prefix)@g | \ - sed s@%{__docdir}@$(docdir)@g | sed s@%{__libdir}@$(libdir)@g \ - > pkgscripts/makecygwinpkg - -cygwinpkg: all pkgscripts/makecygwinpkg - sh pkgscripts/makecygwinpkg diff --git a/README.ijg b/README.ijg index 9c450ce..2e39f96 100644 --- a/README.ijg +++ b/README.ijg @@ -43,7 +43,7 @@ User documentation: change.log Version-to-version change highlights. Programmer and internal documentation: libjpeg.txt How to use the JPEG library in your own programs. - example.c Sample code for calling the JPEG library. + example.txt Sample code for calling the JPEG library. structure.txt Overview of the JPEG library's internal structure. coderules.txt Coding style rules --- please read if you contribute code. @@ -159,12 +159,6 @@ commercial products, provided that all warranty or liability claims are assumed by the product vendor. -The Unix configuration script "configure" was produced with GNU Autoconf. -It is copyright by the Free Software Foundation but is freely distributable. -The same holds for its supporting scripts (config.guess, config.sub, -ltmain.sh). Another support script, install-sh, is copyright by X Consortium -but is also freely distributable. - The IJG distribution formerly included code to read and write GIF files. To avoid entanglement with the Unisys LZW patent (now expired), GIF reading support has been removed altogether, and the GIF writer has been simplified @@ -185,8 +179,8 @@ We recommend reading one or more of these references before trying to understand the innards of the JPEG software. The best short technical introduction to the JPEG compression algorithm is - Wallace, Gregory K. "The JPEG Still Picture Compression Standard", - Communications of the ACM, April 1991 (vol. 34 no. 4), pp. 30-44. + Wallace, Gregory K. "The JPEG Still Picture Compression Standard", + Communications of the ACM, April 1991 (vol. 34 no. 4), pp. 30-44. (Adjacent articles in that issue discuss MPEG motion picture compression, applications of JPEG, and related topics.) If you don't have the CACM issue handy, a PDF file containing a revised version of Wallace's article is @@ -220,14 +214,14 @@ Continuous-tone Still Images, Part 2: Compliance testing" and has document numbers ISO/IEC IS 10918-2, ITU-T T.83. The JPEG standard does not specify all details of an interchangeable file -format. For the omitted details we follow the "JFIF" conventions, revision -1.02. JFIF 1.02 has been adopted as an Ecma International Technical Report -and thus received a formal publication status. It is available as a free -download in PDF format from -http://www.ecma-international.org/publications/techreports/E-TR-098.htm. -A PostScript version of the JFIF document is available at -http://www.ijg.org/files/jfif.ps.gz. There is also a plain text version at -http://www.ijg.org/files/jfif.txt.gz, but it is missing the figures. +format. For the omitted details, we follow the "JFIF" conventions, revision +1.02. JFIF version 1 has been adopted as ISO/IEC 10918-5 (05/2013) and +Recommendation ITU-T T.871 (05/2011): Information technology - Digital +compression and coding of continuous-tone still images: JPEG File Interchange +Format (JFIF). It is available as a free download in PDF file format from +https://www.iso.org/standard/54989.html and http://www.itu.int/rec/T-REC-T.871. +A PDF file of the older JFIF 1.02 specification is available at +http://www.w3.org/Graphics/JPEG/jfif3.pdf. The TIFF 6.0 file format specification can be obtained by FTP from ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz. The JPEG incorporation scheme @@ -255,22 +249,26 @@ and other news.answers archive sites, including the official news.answers archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/. If you don't have Web or FTP access, send e-mail to mail-server@rtfm.mit.edu with body - send usenet/news.answers/jpeg-faq/part1 - send usenet/news.answers/jpeg-faq/part2 - - -FILE FORMAT WARS -================ - -The ISO/IEC JTC1/SC29/WG1 standards committee (also known as JPEG, together -with ITU-T SG16) currently promotes different formats containing the name -"JPEG" which are incompatible with original DCT-based JPEG. IJG therefore does -not support these formats (see REFERENCES). Indeed, one of the original -reasons for developing this free software was to help force convergence on -common, interoperable format standards for JPEG files. -Don't use an incompatible file format! -(In any case, our decoder will remain capable of reading existing JPEG -image files indefinitely.) + send usenet/news.answers/jpeg-faq/part1 + send usenet/news.answers/jpeg-faq/part2 + + +FILE FORMAT COMPATIBILITY +========================= + +This software implements ITU T.81 | ISO/IEC 10918 with some extensions from +ITU T.871 | ISO/IEC 10918-5 (JPEG File Interchange Format-- see REFERENCES). +Informally, the term "JPEG image" or "JPEG file" most often refers to JFIF or +a subset thereof, but there are other formats containing the name "JPEG" that +are incompatible with the DCT-based JPEG standard or with JFIF (for instance, +JPEG 2000 and JPEG XR). This software therefore does not support these +formats. Indeed, one of the original reasons for developing this free software +was to help force convergence on a common, interoperable format standard for +JPEG files. + +JFIF is a minimal or "low end" representation. TIFF/JPEG (TIFF revision 6.0 as +modified by TIFF Technical Note #2) can be used for "high end" applications +that need to record a lot of additional data about an image. TO DO diff --git a/README.md b/README.md old mode 100755 new mode 100644 index 74e6eac..a769259 --- a/README.md +++ b/README.md @@ -2,8 +2,9 @@ Background ========== libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2, -NEON, AltiVec) to accelerate baseline JPEG compression and decompression on -x86, x86-64, ARM, and PowerPC systems. On such systems, libjpeg-turbo is +AVX2, NEON, AltiVec) to accelerate baseline JPEG compression and decompression +on x86, x86-64, ARM, and PowerPC systems, as well as progressive JPEG +compression on x86 and x86-64 systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg, all else being equal. On other types of systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by virtue of its highly-optimized Huffman coding routines. In many cases, the @@ -48,7 +49,10 @@ JPEG images: straightforward to achieve using the underlying libjpeg API, such as generating planar YUV images and performing multiple simultaneous lossless transforms on an image. The Java interface for libjpeg-turbo is written on - top of the TurboJPEG API. + top of the TurboJPEG API. The TurboJPEG API is recommended for first-time + users of libjpeg-turbo. Refer to [tjexample.c](tjexample.c) and + [TJExample.java](java/TJExample.java) for examples of its usage and to + for API documentation. - **libjpeg API**
This is the de facto industry-standard API for compressing and decompressing @@ -56,7 +60,8 @@ JPEG images: more powerful. The libjpeg API implementation in libjpeg-turbo is both API/ABI-compatible and mathematically compatible with libjpeg v6b. It can also optionally be configured to be API/ABI-compatible with libjpeg v7 and v8 - (see below.) + (see below.) Refer to [cjpeg.c](cjpeg.c) and [djpeg.c](djpeg.c) for examples + of its usage and to [libjpeg.txt](libjpeg.txt) for API documentation. There is no significant performance advantage to either API when both are used to perform similar operations. diff --git a/acinclude.m4 b/acinclude.m4 deleted file mode 100644 index 113169f..0000000 --- a/acinclude.m4 +++ /dev/null @@ -1,287 +0,0 @@ -# AC_PROG_NASM -# -------------------------- -# Check that NASM exists and determine flags -AC_DEFUN([AC_PROG_NASM],[ - -AC_ARG_VAR(NASM, [NASM command (used to build the x86/x86-64 SIMD code)]) -if test "x$NASM" = "x"; then - AC_CHECK_PROGS(NASM, [nasm nasmw yasm]) - test -z "$NASM" && AC_MSG_ERROR([no nasm (Netwide Assembler) found]) -fi - -AC_MSG_CHECKING([for object file format of host system]) -case "$host_os" in - cygwin* | mingw* | pw32* | interix*) - case "$host_cpu" in - x86_64) - objfmt='Win64-COFF' - ;; - *) - objfmt='Win32-COFF' - ;; - esac - ;; - msdosdjgpp* | go32*) - objfmt='COFF' - ;; - os2-emx*) # not tested - objfmt='MSOMF' # obj - ;; - linux*coff* | linux*oldld*) - objfmt='COFF' # ??? - ;; - linux*aout*) - objfmt='a.out' - ;; - linux*) - case "$host_cpu" in - x86_64) - objfmt='ELF64' - ;; - *) - objfmt='ELF' - ;; - esac - ;; - kfreebsd* | freebsd* | netbsd* | openbsd*) - if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then - objfmt='BSD-a.out' - else - case "$host_cpu" in - x86_64 | amd64) - objfmt='ELF64' - ;; - *) - objfmt='ELF' - ;; - esac - fi - ;; - solaris* | sunos* | sysv* | sco*) - case "$host_cpu" in - x86_64) - objfmt='ELF64' - ;; - *) - objfmt='ELF' - ;; - esac - ;; - darwin* | rhapsody* | nextstep* | openstep* | macos*) - case "$host_cpu" in - x86_64) - objfmt='Mach-O64' - ;; - *) - objfmt='Mach-O' - ;; - esac - ;; - *) - objfmt='ELF ?' - ;; -esac - -AC_MSG_RESULT([$objfmt]) -if test "$objfmt" = 'ELF ?'; then - objfmt='ELF' - AC_MSG_WARN([unexpected host system. assumed that the format is $objfmt.]) -fi - -AC_MSG_CHECKING([for object file format specifier (NAFLAGS) ]) -case "$objfmt" in - MSOMF) NAFLAGS='-fobj -DOBJ32';; - Win32-COFF) NAFLAGS='-fwin32 -DWIN32';; - Win64-COFF) NAFLAGS='-fwin64 -DWIN64 -D__x86_64__';; - COFF) NAFLAGS='-fcoff -DCOFF';; - a.out) NAFLAGS='-faout -DAOUT';; - BSD-a.out) NAFLAGS='-faoutb -DAOUT';; - ELF) NAFLAGS='-felf -DELF';; - ELF64) NAFLAGS='-felf64 -DELF -D__x86_64__';; - RDF) NAFLAGS='-frdf -DRDF';; - Mach-O) NAFLAGS='-fmacho -DMACHO';; - Mach-O64) NAFLAGS='-fmacho64 -DMACHO -D__x86_64__';; -esac -AC_MSG_RESULT([$NAFLAGS]) -AC_SUBST([NAFLAGS]) - -AC_MSG_CHECKING([whether the assembler ($NASM $NAFLAGS) works]) -cat > conftest.asm <&AC_FD_CC - cat conftest.asm >&AC_FD_CC - rm -rf conftest* - AC_MSG_RESULT(no) - AC_MSG_ERROR([installation or configuration problem: assembler cannot create object files.]) -fi - -AC_MSG_CHECKING([whether the linker accepts assembler output]) -try_nasm='${CC-cc} -o conftest${ac_exeext} $LDFLAGS conftest.o $LIBS 1>&AC_FD_CC' -if AC_TRY_EVAL(try_nasm) && test -s conftest${ac_exeext}; then - rm -rf conftest* - AC_MSG_RESULT(yes) -else - rm -rf conftest* - AC_MSG_RESULT(no) - AC_MSG_ERROR([configuration problem: maybe object file format mismatch.]) -fi - -]) - -# AC_CHECK_COMPATIBLE_ARM_ASSEMBLER_IFELSE -# -------------------------- -# Test whether the assembler is suitable and supports NEON instructions -AC_DEFUN([AC_CHECK_COMPATIBLE_ARM_ASSEMBLER_IFELSE],[ - ac_good_gnu_arm_assembler=no - ac_save_CC="$CC" - ac_save_CFLAGS="$CFLAGS" - CFLAGS="$CCASFLAGS -x assembler-with-cpp" - CC="$CCAS" - AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ - .text - .fpu neon - .arch armv7a - .object_arch armv4 - .arm - pld [r0] - vmovn.u16 d0, q0]])], ac_good_gnu_arm_assembler=yes) - - ac_use_gas_preprocessor=no - if test "x$ac_good_gnu_arm_assembler" = "xno" ; then - CC="gas-preprocessor.pl $CCAS" - AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ - .text - .fpu neon - .arch armv7a - .object_arch armv4 - .arm - pld [r0] - vmovn.u16 d0, q0]])], ac_use_gas_preprocessor=yes) - fi - CFLAGS="$ac_save_CFLAGS" - CC="$ac_save_CC" - - if test "x$ac_use_gas_preprocessor" = "xyes" ; then - CCAS="gas-preprocessor.pl $CCAS" - AC_SUBST([CCAS]) - ac_good_gnu_arm_assembler=yes - fi - - if test "x$ac_good_gnu_arm_assembler" = "xyes" ; then - $1 - else - $2 - fi -]) - -# AC_CHECK_COMPATIBLE_MIPSEL_ASSEMBLER_IFELSE -# -------------------------- -# Test whether the assembler is suitable and supports MIPS instructions -AC_DEFUN([AC_CHECK_COMPATIBLE_MIPS_ASSEMBLER_IFELSE],[ - have_mips_dspr2=no - ac_save_CFLAGS="$CFLAGS" - CFLAGS="$CCASFLAGS -mdspr2" - - AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ - - int main () - { - int c = 0, a = 0, b = 0; - __asm__ __volatile__ ( - "precr.qb.ph %[c], %[a], %[b] \n\t" - : [c] "=r" (c) - : [a] "r" (a), [b] "r" (b) - ); - return c; - } - ]])], have_mips_dspr2=yes) - CFLAGS=$ac_save_CFLAGS - - if test "x$have_mips_dspr2" = "xyes" ; then - $1 - else - $2 - fi -]) - -AC_DEFUN([AC_CHECK_COMPATIBLE_ARM64_ASSEMBLER_IFELSE],[ - ac_good_gnu_arm_assembler=no - ac_save_CC="$CC" - ac_save_CFLAGS="$CFLAGS" - CFLAGS="$CCASFLAGS -x assembler-with-cpp" - CC="$CCAS" - AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ - .text - MYVAR .req x0 - movi v0.16b, #100 - mov MYVAR, #100 - .unreq MYVAR]])], ac_good_gnu_arm_assembler=yes) - - ac_use_gas_preprocessor=no - if test "x$ac_good_gnu_arm_assembler" = "xno" ; then - CC="gas-preprocessor.pl $CCAS" - AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ - .text - MYVAR .req x0 - movi v0.16b, #100 - mov MYVAR, #100 - .unreq MYVAR]])], ac_use_gas_preprocessor=yes) - fi - CFLAGS="$ac_save_CFLAGS" - CC="$ac_save_CC" - - if test "x$ac_use_gas_preprocessor" = "xyes" ; then - CCAS="gas-preprocessor.pl $CCAS" - AC_SUBST([CCAS]) - ac_good_gnu_arm_assembler=yes - fi - - if test "x$ac_good_gnu_arm_assembler" = "xyes" ; then - $1 - else - $2 - fi -]) - -# AC_CHECK_ALTIVEC -# ---------------- -# Test whether AltiVec intrinsics are supported -AC_DEFUN([AC_CHECK_ALTIVEC],[ - ac_save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -maltivec" - AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ - #include - int main(void) { - __vector int vi = { 0, 0, 0, 0 }; - int i[4]; - vec_st(vi, 0, i); - return i[0]; - }]])], ac_has_altivec=yes) - CFLAGS="$ac_save_CFLAGS" - if test "x$ac_has_altivec" = "xyes" ; then - $1 - else - $2 - fi -]) - -AC_DEFUN([AC_NO_SIMD],[ - AC_MSG_RESULT([no ("$1")]) - with_simd=no; - if test "x${require_simd}" = "xyes"; then - AC_MSG_ERROR([SIMD support not available for this CPU.]) - else - AC_MSG_WARN([SIMD support not available for this CPU. Performance will\ - suffer.]) - fi -]) diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index 4f2d6cc..0000000 --- a/appveyor.yml +++ /dev/null @@ -1,57 +0,0 @@ -install: - - cmd: >- - mkdir c:\installers - - mkdir c:\temp - - curl -fSL -o c:\installers\nasm-2.10.01-win32.zip http://www.nasm.us/pub/nasm/releasebuilds/2.10.01/win32/nasm-2.10.01-win32.zip - - 7z x c:\installers\nasm-2.10.01-win32.zip -oc:\ > c:\installers\nasm.install.log - - set INCLUDE=c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\include - - set LIB=c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\lib\x64 - - set PATH=c:\nasm-2.10.01;c:\Program Files (x86)\NSIS;c:\msys64\mingw32\bin;c:\msys64\usr\bin;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin\amd64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\Common7\IDE;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\bin\x64;c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\bin;%PATH% - - set MSYSTEM=MINGW32 - - bash -c "pacman --noconfirm -S autoconf automake libtool zip" - - mklink /d "%ProgramData%\Oracle\Java32" "c:\Program Files (x86)\Java\jdk1.6.0" - - git clone --depth=1 https://github.com/libjpeg-turbo/buildscripts.git c:/buildscripts - -build_script: - - cmd: >- - for /f %%i in ('"cygpath %CD%"') do set MINGWPATH=%%i - - bash c:/buildscripts/buildljt -r file://%MINGWPATH% -b /c/ljt.nightly %APPVEYOR_REPO_BRANCH% -v - - move c:\ljt.nightly\files\*.tar.gz . - - move c:\ljt.nightly\files\*.exe . - - move c:\ljt.nightly\files\*.zip . - - move c:\ljt.nightly\log-windows.txt . - -artifacts: - - path: '*.tar.gz' - name: Source tarball - - - path: '*-gcc*.exe' - name: SDK for MinGW - - - path: '*-vc*.exe' - name: SDK for Visual C++ - - - path: '*.zip' - name: Windows JNI JARs - - - path: 'log-windows.txt' - name: Build log - -test: off - -deploy: off diff --git a/bmp.c b/bmp.c deleted file mode 100644 index 2b8e80c..0000000 --- a/bmp.c +++ /dev/null @@ -1,341 +0,0 @@ -/* - * Copyright (C)2011, 2015 D. R. Commander. All Rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * - Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - Neither the name of the libjpeg-turbo Project nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include "cdjpeg.h" -#include -#include -#include "tjutil.h" -#include "bmp.h" - - -/* This duplicates the functionality of the VirtualGL bitmap library using - the components from cjpeg and djpeg */ - - -/* Error handling (based on example in example.c) */ - -static char errStr[JMSG_LENGTH_MAX]="No error"; - -struct my_error_mgr -{ - struct jpeg_error_mgr pub; - jmp_buf setjmp_buffer; -}; -typedef struct my_error_mgr *my_error_ptr; - -static void my_error_exit(j_common_ptr cinfo) -{ - my_error_ptr myerr=(my_error_ptr)cinfo->err; - (*cinfo->err->output_message)(cinfo); - longjmp(myerr->setjmp_buffer, 1); -} - -/* Based on output_message() in jerror.c */ - -static void my_output_message(j_common_ptr cinfo) -{ - (*cinfo->err->format_message)(cinfo, errStr); -} - -#define _throw(m) {snprintf(errStr, JMSG_LENGTH_MAX, "%s", m); \ - retval=-1; goto bailout;} -#define _throwunix(m) {snprintf(errStr, JMSG_LENGTH_MAX, "%s\n%s", m, \ - strerror(errno)); retval=-1; goto bailout;} - - -static void pixelconvert(unsigned char *srcbuf, int srcpf, int srcbottomup, - unsigned char *dstbuf, int dstpf, int dstbottomup, int w, int h) -{ - unsigned char *srcrowptr=srcbuf, *srccolptr; - int srcps=tjPixelSize[srcpf]; - int srcstride=srcbottomup? -w*srcps:w*srcps; - unsigned char *dstrowptr=dstbuf, *dstcolptr; - int dstps=tjPixelSize[dstpf]; - int dststride=dstbottomup? -w*dstps:w*dstps; - int row, col; - - if(srcbottomup) srcrowptr=&srcbuf[w*srcps*(h-1)]; - if(dstbottomup) dstrowptr=&dstbuf[w*dstps*(h-1)]; - - /* NOTE: These quick & dirty CMYK<->RGB conversion routines are for testing - purposes only. Properly converting between CMYK and RGB requires a color - management system. */ - - if(dstpf==TJPF_CMYK) - { - for(row=0; row1.0) c=1.0; - if(c<0.) c=0.; - if(m>1.0) m=1.0; - if(m<0.) m=0.; - if(y>1.0) y=1.0; - if(y<0.) y=0.; - if(k>1.0) k=1.0; - if(k<0.) k=0.; - *dstcolptr++=(unsigned char)(255.0-c*255.0+0.5); - *dstcolptr++=(unsigned char)(255.0-m*255.0+0.5); - *dstcolptr++=(unsigned char)(255.0-y*255.0+0.5); - *dstcolptr++=(unsigned char)(255.0-k*255.0+0.5); - } - } - } - else if(srcpf==TJPF_CMYK) - { - for(row=0; row255.0) r=255.0; - if(r<0.) r=0.; - if(g>255.0) g=255.0; - if(g<0.) g=0.; - if(b>255.0) b=255.0; - if(b<0.) b=0.; - dstcolptr[tjRedOffset[dstpf]]=(unsigned char)(r+0.5); - dstcolptr[tjGreenOffset[dstpf]]=(unsigned char)(g+0.5); - dstcolptr[tjBlueOffset[dstpf]]=(unsigned char)(b+0.5); - } - } - } - else - { - for(row=0; row=TJ_NUMPF) - _throw("loadbmp(): Invalid argument"); - - if((file=fopen(filename, "rb"))==NULL) - _throwunix("loadbmp(): Cannot open input file"); - - cinfo.err=jpeg_std_error(&jerr.pub); - jerr.pub.error_exit=my_error_exit; - jerr.pub.output_message=my_output_message; - - if(setjmp(jerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - retval=-1; goto bailout; - } - - jpeg_create_compress(&cinfo); - if((tempc=getc(file))<0 || ungetc(tempc, file)==EOF) - _throwunix("loadbmp(): Could not read input file") - else if(tempc==EOF) _throw("loadbmp(): Input file contains no data"); - - if(tempc=='B') - { - if((src=jinit_read_bmp(&cinfo))==NULL) - _throw("loadbmp(): Could not initialize bitmap loader"); - } - else if(tempc=='P') - { - if((src=jinit_read_ppm(&cinfo))==NULL) - _throw("loadbmp(): Could not initialize bitmap loader"); - } - else _throw("loadbmp(): Unsupported file type"); - - src->input_file=file; - (*src->start_input)(&cinfo, src); - (*cinfo.mem->realize_virt_arrays)((j_common_ptr)&cinfo); - - *w=cinfo.image_width; *h=cinfo.image_height; - - if(cinfo.input_components==1 && cinfo.in_color_space==JCS_RGB) - srcpf=TJPF_GRAY; - else srcpf=TJPF_RGB; - - dstps=tjPixelSize[dstpf]; - if((*buf=(unsigned char *)malloc((*w)*(*h)*dstps))==NULL) - _throw("loadbmp(): Memory allocation failure"); - - while(cinfo.next_scanlineget_pixel_rows)(&cinfo, src); - for(i=0; ibuffer[i], srcpf, 0, outbuf, dstpf, bottomup, *w, - nlines); - } - cinfo.next_scanline+=nlines; - } - - (*src->finish_input)(&cinfo, src); - - bailout: - jpeg_destroy_compress(&cinfo); - if(file) fclose(file); - if(retval<0 && buf && *buf) {free(*buf); *buf=NULL;} - return retval; -} - - -int savebmp(char *filename, unsigned char *buf, int w, int h, int srcpf, - int bottomup) -{ - int retval=0, srcps, dstpf; - struct jpeg_decompress_struct dinfo; - struct my_error_mgr jerr; - djpeg_dest_ptr dst; - FILE *file=NULL; - char *ptr=NULL; - - memset(&dinfo, 0, sizeof(struct jpeg_decompress_struct)); - - if(!filename || !buf || w<1 || h<1 || srcpf<0 || srcpf>=TJ_NUMPF) - _throw("savebmp(): Invalid argument"); - - if((file=fopen(filename, "wb"))==NULL) - _throwunix("savebmp(): Cannot open output file"); - - dinfo.err=jpeg_std_error(&jerr.pub); - jerr.pub.error_exit=my_error_exit; - jerr.pub.output_message=my_output_message; - - if(setjmp(jerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - retval=-1; goto bailout; - } - - jpeg_create_decompress(&dinfo); - if(srcpf==TJPF_GRAY) - { - dinfo.out_color_components=dinfo.output_components=1; - dinfo.out_color_space=JCS_GRAYSCALE; - } - else - { - dinfo.out_color_components=dinfo.output_components=3; - dinfo.out_color_space=JCS_RGB; - } - dinfo.image_width=w; dinfo.image_height=h; - dinfo.global_state=DSTATE_READY; - dinfo.scale_num=dinfo.scale_denom=1; - - ptr=strrchr(filename, '.'); - if(ptr && !strcasecmp(ptr, ".bmp")) - { - if((dst=jinit_write_bmp(&dinfo, 0))==NULL) - _throw("savebmp(): Could not initialize bitmap writer"); - } - else - { - if((dst=jinit_write_ppm(&dinfo))==NULL) - _throw("savebmp(): Could not initialize PPM writer"); - } - - dst->output_file=file; - (*dst->start_output)(&dinfo, dst); - (*dinfo.mem->realize_virt_arrays)((j_common_ptr)&dinfo); - - if(srcpf==TJPF_GRAY) dstpf=srcpf; - else dstpf=TJPF_RGB; - srcps=tjPixelSize[srcpf]; - - while(dinfo.output_scanlinebuffer_height; - for(i=0; ibuffer[i], dstpf, 0, w, - nlines); - } - (*dst->put_pixel_rows)(&dinfo, dst, nlines); - dinfo.output_scanline+=nlines; - } - - (*dst->finish_output)(&dinfo, dst); - - bailout: - jpeg_destroy_decompress(&dinfo); - if(file) fclose(file); - return retval; -} - -const char *bmpgeterr(void) -{ - return errStr; -} diff --git a/bmp.h b/bmp.h deleted file mode 100644 index c50c260..0000000 --- a/bmp.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C)2011 D. R. Commander. All Rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * - Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - Neither the name of the libjpeg-turbo Project nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __BMP_H__ -#define __BMP_H__ - -#include "./turbojpeg.h" - -int loadbmp(char *filename, unsigned char **buf, int *w, int *h, int pf, - int bottomup); - -int savebmp(char *filename, unsigned char *buf, int w, int h, int pf, - int bottomup); - -const char *bmpgeterr(void); - -#endif diff --git a/cderror.h b/cderror.h index 63de498..4f2c7a3 100644 --- a/cderror.h +++ b/cderror.h @@ -2,7 +2,7 @@ * cderror.h * * Copyright (C) 1994-1997, Thomas G. Lane. - * Modified 2009 by Guido Vollbeding. + * Modified 2009-2017 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README.ijg * file. @@ -26,7 +26,7 @@ #define JMAKE_ENUM_LIST #else /* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */ -#define JMESSAGE(code,string) +#define JMESSAGE(code, string) #endif /* CDERROR_H */ #endif /* JMESSAGE */ @@ -34,11 +34,11 @@ typedef enum { -#define JMESSAGE(code,string) code , +#define JMESSAGE(code, string) code, #endif /* JMAKE_ENUM_LIST */ -JMESSAGE(JMSG_FIRSTADDONCODE=1000, NULL) /* Must be first entry! */ +JMESSAGE(JMSG_FIRSTADDONCODE = 1000, NULL) /* Must be first entry! */ #ifdef BMP_SUPPORTED JMESSAGE(JERR_BMP_BADCMAP, "Unsupported BMP colormap format") @@ -49,6 +49,7 @@ JMESSAGE(JERR_BMP_COLORSPACE, "BMP output must be grayscale or RGB") JMESSAGE(JERR_BMP_COMPRESSED, "Sorry, compressed BMPs not yet supported") JMESSAGE(JERR_BMP_EMPTY, "Empty BMP image") JMESSAGE(JERR_BMP_NOT, "Not a BMP file - does not start with BM") +JMESSAGE(JERR_BMP_OUTOFRANGE, "Numeric value out of range in BMP file") JMESSAGE(JTRC_BMP, "%ux%u 24-bit BMP image") JMESSAGE(JTRC_BMP_MAPPED, "%ux%u 8-bit colormapped BMP image") JMESSAGE(JTRC_BMP_OS2, "%ux%u 24-bit OS2 BMP image") @@ -75,8 +76,8 @@ JMESSAGE(JWRN_GIF_NOMOREDATA, "Ran out of GIF bits") #ifdef PPM_SUPPORTED JMESSAGE(JERR_PPM_COLORSPACE, "PPM output must be grayscale or RGB") JMESSAGE(JERR_PPM_NONNUMERIC, "Nonnumeric data in PPM file") -JMESSAGE(JERR_PPM_TOOLARGE, "Integer value too large in PPM file") JMESSAGE(JERR_PPM_NOT, "Not a PPM/PGM file") +JMESSAGE(JERR_PPM_OUTOFRANGE, "Numeric value out of range in PPM file") JMESSAGE(JTRC_PGM, "%ux%u PGM image") JMESSAGE(JTRC_PGM_TEXT, "%ux%u text PGM image") JMESSAGE(JTRC_PPM, "%ux%u PPM image") diff --git a/cdjpeg.c b/cdjpeg.c index 441d671..e0e382d 100644 --- a/cdjpeg.c +++ b/cdjpeg.c @@ -28,11 +28,12 @@ #ifdef PROGRESS_REPORT METHODDEF(void) -progress_monitor (j_common_ptr cinfo) +progress_monitor(j_common_ptr cinfo) { - cd_progress_ptr prog = (cd_progress_ptr) cinfo->progress; + cd_progress_ptr prog = (cd_progress_ptr)cinfo->progress; int total_passes = prog->pub.total_passes + prog->total_extra_passes; - int percent_done = (int) (prog->pub.pass_counter*100L/prog->pub.pass_limit); + int percent_done = + (int)(prog->pub.pass_counter * 100L / prog->pub.pass_limit); if (percent_done != prog->percent_done) { prog->percent_done = percent_done; @@ -49,7 +50,7 @@ progress_monitor (j_common_ptr cinfo) GLOBAL(void) -start_progress_monitor (j_common_ptr cinfo, cd_progress_ptr progress) +start_progress_monitor(j_common_ptr cinfo, cd_progress_ptr progress) { /* Enable progress display, unless trace output is on */ if (cinfo->err->trace_level == 0) { @@ -63,7 +64,7 @@ start_progress_monitor (j_common_ptr cinfo, cd_progress_ptr progress) GLOBAL(void) -end_progress_monitor (j_common_ptr cinfo) +end_progress_monitor(j_common_ptr cinfo) { /* Clear away progress display */ if (cinfo->err->trace_level == 0) { @@ -82,7 +83,7 @@ end_progress_monitor (j_common_ptr cinfo) */ GLOBAL(boolean) -keymatch (char *arg, const char *keyword, int minchars) +keymatch(char *arg, const char *keyword, int minchars) { register int ca, ck; register int nmatched = 0; @@ -109,9 +110,9 @@ keymatch (char *arg, const char *keyword, int minchars) */ GLOBAL(FILE *) -read_stdin (void) +read_stdin(void) { - FILE * input_file = stdin; + FILE *input_file = stdin; #ifdef USE_SETMODE /* need to hack file mode? */ setmode(fileno(stdin), O_BINARY); @@ -127,9 +128,9 @@ read_stdin (void) GLOBAL(FILE *) -write_stdout (void) +write_stdout(void) { - FILE * output_file = stdout; + FILE *output_file = stdout; #ifdef USE_SETMODE /* need to hack file mode? */ setmode(fileno(stdout), O_BINARY); diff --git a/cdjpeg.h b/cdjpeg.h index bb49fbf..9868a0b 100644 --- a/cdjpeg.h +++ b/cdjpeg.h @@ -96,41 +96,42 @@ typedef struct cdjpeg_progress_mgr *cd_progress_ptr; /* Module selection routines for I/O modules. */ -EXTERN(cjpeg_source_ptr) jinit_read_bmp (j_compress_ptr cinfo); -EXTERN(djpeg_dest_ptr) jinit_write_bmp (j_decompress_ptr cinfo, - boolean is_os2); -EXTERN(cjpeg_source_ptr) jinit_read_gif (j_compress_ptr cinfo); -EXTERN(djpeg_dest_ptr) jinit_write_gif (j_decompress_ptr cinfo); -EXTERN(cjpeg_source_ptr) jinit_read_ppm (j_compress_ptr cinfo); -EXTERN(djpeg_dest_ptr) jinit_write_ppm (j_decompress_ptr cinfo); -EXTERN(cjpeg_source_ptr) jinit_read_rle (j_compress_ptr cinfo); -EXTERN(djpeg_dest_ptr) jinit_write_rle (j_decompress_ptr cinfo); -EXTERN(cjpeg_source_ptr) jinit_read_targa (j_compress_ptr cinfo); -EXTERN(djpeg_dest_ptr) jinit_write_targa (j_decompress_ptr cinfo); +EXTERN(cjpeg_source_ptr) jinit_read_bmp(j_compress_ptr cinfo, + boolean use_inversion_array); +EXTERN(djpeg_dest_ptr) jinit_write_bmp(j_decompress_ptr cinfo, boolean is_os2, + boolean use_inversion_array); +EXTERN(cjpeg_source_ptr) jinit_read_gif(j_compress_ptr cinfo); +EXTERN(djpeg_dest_ptr) jinit_write_gif(j_decompress_ptr cinfo); +EXTERN(cjpeg_source_ptr) jinit_read_ppm(j_compress_ptr cinfo); +EXTERN(djpeg_dest_ptr) jinit_write_ppm(j_decompress_ptr cinfo); +EXTERN(cjpeg_source_ptr) jinit_read_rle(j_compress_ptr cinfo); +EXTERN(djpeg_dest_ptr) jinit_write_rle(j_decompress_ptr cinfo); +EXTERN(cjpeg_source_ptr) jinit_read_targa(j_compress_ptr cinfo); +EXTERN(djpeg_dest_ptr) jinit_write_targa(j_decompress_ptr cinfo); /* cjpeg support routines (in rdswitch.c) */ -EXTERN(boolean) read_quant_tables (j_compress_ptr cinfo, char *filename, - boolean force_baseline); -EXTERN(boolean) read_scan_script (j_compress_ptr cinfo, char *filename); -EXTERN(boolean) set_quality_ratings (j_compress_ptr cinfo, char *arg, - boolean force_baseline); -EXTERN(boolean) set_quant_slots (j_compress_ptr cinfo, char *arg); -EXTERN(boolean) set_sample_factors (j_compress_ptr cinfo, char *arg); +EXTERN(boolean) read_quant_tables(j_compress_ptr cinfo, char *filename, + boolean force_baseline); +EXTERN(boolean) read_scan_script(j_compress_ptr cinfo, char *filename); +EXTERN(boolean) set_quality_ratings(j_compress_ptr cinfo, char *arg, + boolean force_baseline); +EXTERN(boolean) set_quant_slots(j_compress_ptr cinfo, char *arg); +EXTERN(boolean) set_sample_factors(j_compress_ptr cinfo, char *arg); /* djpeg support routines (in rdcolmap.c) */ -EXTERN(void) read_color_map (j_decompress_ptr cinfo, FILE *infile); +EXTERN(void) read_color_map(j_decompress_ptr cinfo, FILE *infile); /* common support routines (in cdjpeg.c) */ -EXTERN(void) enable_signal_catcher (j_common_ptr cinfo); -EXTERN(void) start_progress_monitor (j_common_ptr cinfo, - cd_progress_ptr progress); -EXTERN(void) end_progress_monitor (j_common_ptr cinfo); -EXTERN(boolean) keymatch (char *arg, const char *keyword, int minchars); -EXTERN(FILE *) read_stdin (void); -EXTERN(FILE *) write_stdout (void); +EXTERN(void) enable_signal_catcher(j_common_ptr cinfo); +EXTERN(void) start_progress_monitor(j_common_ptr cinfo, + cd_progress_ptr progress); +EXTERN(void) end_progress_monitor(j_common_ptr cinfo); +EXTERN(boolean) keymatch(char *arg, const char *keyword, int minchars); +EXTERN(FILE *) read_stdin(void); +EXTERN(FILE *) write_stdout(void); /* miscellaneous useful macros */ @@ -151,3 +152,6 @@ EXTERN(FILE *) write_stdout (void); #ifndef EXIT_WARNING #define EXIT_WARNING 2 #endif + +#define IsExtRGB(cs) \ + (cs == JCS_RGB || (cs >= JCS_EXT_RGB && cs <= JCS_EXT_ARGB)) diff --git a/ci/keys.enc b/ci/keys.enc deleted file mode 100644 index 4cd333f5708397e18d4384182a4172f933e21d86..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4624 zcmV+r67TItaZ{fwFP>}$P7TZu8bz$Im)>M!@pZJRGvFw+X~JCT`!am>#06x2bl0^) z%IGCC>@l5pl0LF0jRDO}<_0K>o3L?%$$l5~Xnx)Q%F*lQj)O!l^S{QnNv^4{w5LP` z)RYpKd6Nryh~3q3Z>`B0`&s!3Ymsql35{v)vk>{m)~s2$hw{q)3Iety^`HM2cd`8N zV_RaYl4dckmP9Eud1Gnu{_+5LR^kimXvWyeYrl=hP2GYcN*Z6e$f-COo|X-8dUoPn zfFPv-IFo~sSC1Y6MpW~fArM!G)WNciN(bO}iQezC%kO@yihiK3Yd#%JSh0W6?=A~1 zGl@jYi|CKPavL#xQ>svP3n!t7DEAv+00xYqiQMok;3`dhKD)k5uwx%GRsm%1h78#l zTx;kz+W&yMy>QId4zy)Aiivt}sWB1!`%YZITXjjnH0)uyK+<(UV6CDIAwgEcZ1Gbt zWKOMdf*Z=Mc<%ihP*STP%+HyP{=y>h0z6wa1l`))ehjR$gIoy43VP=aomDmM(mF@o z0vvpaVv-*u<`2WIyKh|HgU=_)Nt=JeWId$IegisYyd?^%oXxHY)~4TT5K~n9>N6c zlNMbCyZm2JF61~>QnVn`u5P}%Qz$GUV%EAKHq=0}pPb4rcWXF%nLfg5O%Yvgr+_jk zkm(_UCsX9f+V3*;6DGd+wY0LtCXcY-9^Q*gOyo!aJHYUt}dwSwymDuod{wJEy_ZIh`M>b$kZyR(BnO$n&SHJ{!#g5{$rEnT(sJmqGNr5{=Z z13Y=i;&;J?0)rg=e57&`JCO{m#dWQM?6CFG_zOO0WR6vif?6}@6IDLztjaNDdyqKCWwrN=L<0kay^v;G6Q4;HtJ86sW(55QY z|K3~TWbP%Gk=Qeiap^bD_54{E^L(?RYh6^=R0y;N`LbP6N;He=L9dE^d&C5MFy@8@ z6=il3VwuRePVO_Wkz#=dj3T^RfY-hy#+N~VfFs<7reL)W82fcXcL=!Co`z}*p}e%r z@JaY4emnb@`@@Fn0%Mf*NRVG*nK}cR(OJ~%gf@dtxVX>LZ9a)dpW@lIph(A65R2AY zig8veL`#y1t~#z`{8$!$LR5*}v4B+H(0-*mCS^NNNXpbAet^dcWDC?m1|Q67xf9Ii zSlE~rbCV8flC^Tx$AX&EuChT~{;th~$nnO8U=*MUwC*2o@Ja%v{^;)yG|KOBOwn{? zhmv3?uU?u2gfbQV41R&5-M)>@B=Sk}@k+aG{yqlKP^c@?v~3D>EDudPBfc`9ZQOeC z9m=}SEhc#jrWcIdw}}7g6F3U{J7#E~&GCcvLK*@ge`^d@q9I&yxjvxt@=bze#=cgf z7j~G6Rc#06p<)v^gdXUGJQ~$IjbZeI#!sO>D52uuQEUo2Z-`{EkrK~xoGG->%w@o8 zSc@7{b{0DOv~@OUXGvC^$+dA?&P{B}MnfQrto^D*5w6HA8hVCy4KiVeLMy@1OvqW<9XI0K#I$wN=jO?RKpT>3tPUaTs z=*BG#r~vhlv4AQw-hN%LH{e~h{E=(N*v3>H9o$@5dStqI88PL{b+AQxK1XBX4=a(1| z>0THqQt2%}5EFt#!}%{vRur*C64m_LuQ~6#7>a_$k%Mg~hYhj+Sbt%4k+|)RC}!_2 z6yrZ}>j)7pi$q%;ix_*vHcVB3;1n%#Dz>eec%>E~LYGcJ$qCkF&qJYx&6qHy&G7L1V+s-$ACM19uUeE#(Y8G?_k3VU}Cx^V{*3d9Z^lL+FKEYO5fvK8P{O`GCGygO*!H_Rdu4w~3ooBrnTPKdqaRoci-D8E zoCqDvO*iBT1Bf#}S;n{n7T*IE9KF_x`wuWBbO&(_BGu#t^2s_ z2touE>+shVQc1Flbr=`XranXBiKnRM^3wv>-Ny)DhhaQCRSR)lx<((AgQP?O=wsog zdOI_6e*K$JY6vPMd)`ALDddv)79o14hCdi6gI8T!7*0p3az)86K-L|KOCtrfpTCPU z7r)7?+|D{0&V1ox(MY9uU_g1^Nb%!;@@%Bj*r#}&VjT={@#*-U0RyQfY_S!i?{a&4 zHN}!MHGZ|_P!_eheN_{5OLCWo-(&ug)*lZxHU{bs@+y%4F#INqT{I3-80C5F2=@C) zlHY9s@B$AVbMn3pEp7Gq_Nog$@4Di#Ge7K!NP~fn(FP6$*@L;3i`lY78_UCr*~tlH zlQIgvig(k3wXLB@n%lun6Tr1erygGo-ePBBYc9}&=-gO5JmVW(*yAKFh|^$SW86}F zOc3G(17xSg!N*>_w@b6u=tu_ZPoFEop4AmY~l z70ttIh2LeCCE#mee^VR@@J7OkPc}hU%kgX^gHTp2Z>AFzPv_OLU~Fdt$_jP&bE82) zKV<#tgMf)8Q#QEnM^$P-IEd~UepIetu;h<{ZJV9C3_(~4u9!l!Ia#HrVy#Kyh}j>b zR`IOK%-pHxZrN$))}_q1G}c$pc|%etu$Ycd8Y{pDb)FY&Rx(}x3<#0J#!~~lCt9|J z@cK^a;HLsX6Y{*w*Ja>Bow|HCre894lr=i?qC-s%5RL0<#0w4#aC$^rbS&6%*Nyz# z`jB>MM3&jg4%_2%2l$%!3XmpiUlpkI4f2Q2Dt<{>%ETH^uXi;{VOvWj z>}az}_P-(vbRVYIcZsvo8QJfyKQSs)Q)ZfSmMe|XM>D-MYQv;mB#tib?5NMT1F>*= zo)m8a%fOO!%W~S;9DisGw^qFb?d4e6G3pC|xNR%Yj~o}&`=AM5b@ElA%cp(W%+RJM zt#@|;nLuU4+YX}3D@GEWMC(don3b67g6{9NbfG-ON|8B1%N}x%m1vLU7CsyqV8>Jd>#*gkZ>WaC zP_4eKd~O`Uhm!j1sj>7M=|+c<(@r||nf|em0V40{lbyU8bI7eifA`Ead~aa92WLsKZtGFm`F;LcHv41xHi#o^+m$(59VKQq;HD z!-7MqJqj6-v(Y+Porxh`XgdJmYzIhh(;;g5bpZexXwO|c_`SI5P2L6rjK z$zm5@eoOe%r(GF5Owq0e{g&PFVBpa5$b@i+xS}Y^P!_2Hn+&|LdWSKsC=_gW6Yl{@ zZ+!7~GFRe}%Pa(IQ;LGF7iO$mHx$oEjka>kJ1_zzpwrvyTZ`ZcwGGMqTQsOX&i-p% zs;#rkx53eRIgR$W`;nG^VfR*s1?O`ZAA75nt)y}jb#mkaDg|44x|N`17`&bOP8L?3 z3Bw>N{+Fv_U~6)5j(n&dLop6A6nh5+b!U^ZjR;__kx0^YQ2uKak}0V$Z8MNPsD(}| z&Zmq4xYR8vB&U!{ADKm^rrf|GugP`$QdNi|M`vK!@I>E6dg^tiy!Hl!JgEt8cRPh; zE&E~)N2yGinv0JSg4HO*g-xDXKw@t0EVnD4xs4=N`!N-SsB+oh zntD#;wzk9Xo_$IzGImgf5Lp|uFpG?~(1Am^uSW@9W!St|C}SbcdJc(WlU#gVD;MZJ zga*_(nN*sVTTE#nU@o~_6TtIf(_TgZg3eV!#ZwjSS1GU|;zLVR2`EQrWv{C@NDlc?2%Puv)uU=%YM%;&= zP}0;S#)-#Pu`#M}T>VysSNgsQTfIP4l*ThNd%HaYeieioeRQu7R#iZEhYZ`OCmfTq zTx0)DRt)6|cX{@#RiIse_@aG%a9@E?yCgz~(FMJ1qYD;3^<;<+tXNsY&CUonq0nq{ zT3|xV_)@*6VliK|jHe^;bi@#HeJXF%sX@yw((pDhdmv~TAzZ8BolkijzHx*%6<%Qi Gu5yFmO5$Yz diff --git a/cjpeg.1 b/cjpeg.1 index 283fc81..a3e47ba 100644 --- a/cjpeg.1 +++ b/cjpeg.1 @@ -46,7 +46,7 @@ compressing a grayscale BMP file, because .B cjpeg isn't bright enough to notice whether a BMP file uses only shades of gray. By saying -.BR \-grayscale , +.BR \-grayscale, you'll get a smaller JPEG file that takes less time to process. .TP .B \-rgb @@ -187,6 +187,9 @@ method may also give different results on different machines due to varying roundoff behavior, whereas the integer methods should give the same results on all machines. .TP +.BI \-icc " file" +Embed ICC color management profile contained in the specified file. +.TP .BI \-restart " N" Emit a JPEG restart marker every N MCU rows, or every N MCU blocks if "B" is attached to the number. diff --git a/cjpeg.c b/cjpeg.c index 9d282b8..07e7db1 100644 --- a/cjpeg.c +++ b/cjpeg.c @@ -31,6 +31,11 @@ #include "jversion.h" /* for version message */ #include "jconfigint.h" +#ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ +extern void *malloc(size_t size); +extern void free(void *ptr); +#endif + #ifdef USE_CCOMMAND /* command-line reader for Macintosh */ #ifdef __MWERKS__ #include /* Metrowerks needs this */ @@ -44,7 +49,7 @@ /* Create the add-on message string table. */ -#define JMESSAGE(code,string) string , +#define JMESSAGE(code, string) string, static const char * const cdjpeg_message_table[] = { #include "cderror.h" @@ -82,7 +87,7 @@ static boolean is_targa; /* records user -targa switch */ LOCAL(cjpeg_source_ptr) -select_file_type (j_compress_ptr cinfo, FILE *infile) +select_file_type(j_compress_ptr cinfo, FILE *infile) { int c; @@ -102,7 +107,7 @@ select_file_type (j_compress_ptr cinfo, FILE *infile) switch (c) { #ifdef BMP_SUPPORTED case 'B': - return jinit_read_bmp(cinfo); + return jinit_read_bmp(cinfo, TRUE); #endif #ifdef GIF_SUPPORTED case 'G': @@ -139,12 +144,13 @@ select_file_type (j_compress_ptr cinfo, FILE *infile) static const char *progname; /* program name for error messages */ +static char *icc_filename; /* for -icc switch */ static char *outfilename; /* for -outfile switch */ boolean memdst; /* for -memdst switch */ LOCAL(void) -usage (void) +usage(void) /* complain about bad command line */ { fprintf(stderr, "usage: %s [switches] ", progname); @@ -184,6 +190,7 @@ usage (void) fprintf(stderr, " -dct float Use floating-point DCT method%s\n", (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : "")); #endif + fprintf(stderr, " -icc FILE Embed ICC profile contained in FILE\n"); fprintf(stderr, " -restart N Set restart interval in rows, or in blocks with B\n"); #ifdef INPUT_SMOOTHING_SUPPORTED fprintf(stderr, " -smooth N Smooth dithered input (N=1..100 is strength)\n"); @@ -208,8 +215,8 @@ usage (void) LOCAL(int) -parse_switches (j_compress_ptr cinfo, int argc, char **argv, - int last_file_arg_seen, boolean for_real) +parse_switches(j_compress_ptr cinfo, int argc, char **argv, + int last_file_arg_seen, boolean for_real) /* Parse optional switches. * Returns argv[] index of first file-name argument (== argc if none). * Any file names with indexes <= last_file_arg_seen are ignored; @@ -234,6 +241,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, force_baseline = FALSE; /* by default, allow 16-bit quantizers */ simple_progressive = FALSE; is_targa = FALSE; + icc_filename = NULL; outfilename = NULL; memdst = FALSE; cinfo->err->trace_level = 0; @@ -284,7 +292,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, /* On first -d, print version identification */ static boolean printed_version = FALSE; - if (! printed_version) { + if (!printed_version) { fprintf(stderr, "%s version %s (build %s)\n", PACKAGE_NAME, VERSION, BUILD); fprintf(stderr, "%s\n\n", JCOPYRIGHT); @@ -299,7 +307,8 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, PACKAGE_NAME, VERSION, BUILD); exit(EXIT_SUCCESS); - } else if (keymatch(arg, "grayscale", 2) || keymatch(arg, "greyscale",2)) { + } else if (keymatch(arg, "grayscale", 2) || + keymatch(arg, "greyscale", 2)) { /* Force a monochrome JPEG file to be generated. */ jpeg_set_colorspace(cinfo, JCS_GRAYSCALE); @@ -307,6 +316,12 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, /* Force an RGB JPEG file to be generated. */ jpeg_set_colorspace(cinfo, JCS_RGB); + } else if (keymatch(arg, "icc", 1)) { + /* Set ICC filename. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + icc_filename = argv[argn]; + } else if (keymatch(arg, "maxmemory", 3)) { /* Maximum memory in Kb (or Mb with 'm'). */ long lval; @@ -392,10 +407,10 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, if (lval < 0 || lval > 65535L) usage(); if (ch == 'b' || ch == 'B') { - cinfo->restart_interval = (unsigned int) lval; + cinfo->restart_interval = (unsigned int)lval; cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */ } else { - cinfo->restart_in_rows = (int) lval; + cinfo->restart_in_rows = (int)lval; /* restart_interval will be computed during startup */ } @@ -450,19 +465,19 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, /* Set quantization tables for selected quality. */ /* Some or all may be overridden if -qtables is present. */ if (qualityarg != NULL) /* process -quality if it was present */ - if (! set_quality_ratings(cinfo, qualityarg, force_baseline)) + if (!set_quality_ratings(cinfo, qualityarg, force_baseline)) usage(); if (qtablefile != NULL) /* process -qtables if it was present */ - if (! read_quant_tables(cinfo, qtablefile, force_baseline)) + if (!read_quant_tables(cinfo, qtablefile, force_baseline)) usage(); if (qslotsarg != NULL) /* process -qslots if it was present */ - if (! set_quant_slots(cinfo, qslotsarg)) + if (!set_quant_slots(cinfo, qslotsarg)) usage(); if (samplearg != NULL) /* process -sample if it was present */ - if (! set_sample_factors(cinfo, samplearg)) + if (!set_sample_factors(cinfo, samplearg)) usage(); #ifdef C_PROGRESSIVE_SUPPORTED @@ -472,7 +487,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, #ifdef C_MULTISCAN_FILES_SUPPORTED if (scansarg != NULL) /* process -scans if it was present */ - if (! read_scan_script(cinfo, scansarg)) + if (!read_scan_script(cinfo, scansarg)) usage(); #endif } @@ -486,7 +501,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, */ int -main (int argc, char **argv) +main(int argc, char **argv) { struct jpeg_compress_struct cinfo; struct jpeg_error_mgr jerr; @@ -496,6 +511,9 @@ main (int argc, char **argv) int file_index; cjpeg_source_ptr src_mgr; FILE *input_file; + FILE *icc_file; + JOCTET *icc_profile = NULL; + long icc_len = 0; FILE *output_file = NULL; unsigned char *outbuffer = NULL; unsigned long outsize = 0; @@ -539,14 +557,14 @@ main (int argc, char **argv) if (!memdst) { /* Must have either -outfile switch or explicit output file name */ if (outfilename == NULL) { - if (file_index != argc-2) { + if (file_index != argc - 2) { fprintf(stderr, "%s: must name one input and one output file\n", progname); usage(); } - outfilename = argv[file_index+1]; + outfilename = argv[file_index + 1]; } else { - if (file_index != argc-1) { + if (file_index != argc - 1) { fprintf(stderr, "%s: must name one input and one output file\n", progname); usage(); @@ -555,7 +573,7 @@ main (int argc, char **argv) } #else /* Unix style: expect zero or one file name */ - if (file_index < argc-1) { + if (file_index < argc - 1) { fprintf(stderr, "%s: only one input file\n", progname); usage(); } @@ -583,8 +601,35 @@ main (int argc, char **argv) output_file = write_stdout(); } + if (icc_filename != NULL) { + if ((icc_file = fopen(icc_filename, READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, icc_filename); + exit(EXIT_FAILURE); + } + if (fseek(icc_file, 0, SEEK_END) < 0 || + (icc_len = ftell(icc_file)) < 1 || + fseek(icc_file, 0, SEEK_SET) < 0) { + fprintf(stderr, "%s: can't determine size of %s\n", progname, + icc_filename); + exit(EXIT_FAILURE); + } + if ((icc_profile = (JOCTET *)malloc(icc_len)) == NULL) { + fprintf(stderr, "%s: can't allocate memory for ICC profile\n", progname); + fclose(icc_file); + exit(EXIT_FAILURE); + } + if (fread(icc_profile, icc_len, 1, icc_file) < 1) { + fprintf(stderr, "%s: can't read ICC profile from %s\n", progname, + icc_filename); + free(icc_profile); + fclose(icc_file); + exit(EXIT_FAILURE); + } + fclose(icc_file); + } + #ifdef PROGRESS_REPORT - start_progress_monitor((j_common_ptr) &cinfo, &progress); + start_progress_monitor((j_common_ptr)&cinfo, &progress); #endif /* Figure out the input file format, and set up to read it. */ @@ -611,10 +656,13 @@ main (int argc, char **argv) /* Start compressor */ jpeg_start_compress(&cinfo, TRUE); + if (icc_profile != NULL) + jpeg_write_icc_profile(&cinfo, icc_profile, (unsigned int)icc_len); + /* Process data */ while (cinfo.next_scanline < cinfo.image_height) { num_scanlines = (*src_mgr->get_pixel_rows) (&cinfo, src_mgr); - (void) jpeg_write_scanlines(&cinfo, src_mgr->buffer, num_scanlines); + (void)jpeg_write_scanlines(&cinfo, src_mgr->buffer, num_scanlines); } /* Finish compression and release memory */ @@ -629,7 +677,7 @@ main (int argc, char **argv) fclose(output_file); #ifdef PROGRESS_REPORT - end_progress_monitor((j_common_ptr) &cinfo); + end_progress_monitor((j_common_ptr)&cinfo); #endif if (memdst) { @@ -638,6 +686,9 @@ main (int argc, char **argv) free(outbuffer); } + if (icc_profile != NULL) + free(icc_profile); + /* All done. */ exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS); return 0; /* suppress no-return-value warnings */ diff --git a/cmakescripts/BuildPackages.cmake b/cmakescripts/BuildPackages.cmake new file mode 100644 index 0000000..57f0672 --- /dev/null +++ b/cmakescripts/BuildPackages.cmake @@ -0,0 +1,177 @@ +# This file is included from the top-level CMakeLists.txt. We just store it +# here to avoid cluttering up that file. + +set(PKGNAME ${CMAKE_PROJECT_NAME} CACHE STRING + "Distribution package name (default: ${CMAKE_PROJECT_NAME})") +set(PKGVENDOR "The ${CMAKE_PROJECT_NAME} Project" CACHE STRING + "Vendor name to be included in distribution package descriptions (default: The ${CMAKE_PROJECT_NAME} Project)") +set(PKGURL "http://www.${CMAKE_PROJECT_NAME}.org" CACHE STRING + "URL of project web site to be included in distribution package descriptions (default: http://www.${CMAKE_PROJECT_NAME}.org)") +set(PKGEMAIL "information@${CMAKE_PROJECT_NAME}.org" CACHE STRING + "E-mail of project maintainer to be included in distribution package descriptions (default: information@${CMAKE_PROJECT_NAME}.org") +set(PKGID "com.${CMAKE_PROJECT_NAME}.${PKGNAME}" CACHE STRING + "Globally unique package identifier (reverse DNS notation) (default: com.${CMAKE_PROJECT_NAME}.${PKGNAME})") + + +############################################################################### +# Linux RPM and DEB +############################################################################### + +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + +set(RPMARCH ${CMAKE_SYSTEM_PROCESSOR}) +if(CPU_TYPE STREQUAL "x86_64") + set(DEBARCH amd64) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "armv7*") + set(DEBARCH armhf) +elseif(CPU_TYPE STREQUAL "arm64") + set(DEBARCH ${CPU_TYPE}) +elseif(CPU_TYPE STREQUAL "arm") + set(DEBARCH armel) +elseif(CMAKE_SYSTEM_PROCESSOR_LC STREQUAL "ppc64le") + set(DEBARCH ppc64el) +elseif(CPU_TYPE STREQUAL "powerpc" AND BITS EQUAL 32) + set(RPMARCH ppc) + set(DEBARCH ppc) +else() + set(DEBARCH ${CMAKE_SYSTEM_PROCESSOR}) +endif() +message(STATUS "RPM architecture = ${RPMARCH}, DEB architecture = ${DEBARCH}") + +# Re-set CMAKE_POSITION_INDEPENDENT_CODE so that the RPM spec file works +# properly +boolean_number(CMAKE_POSITION_INDEPENDENT_CODE) + +configure_file(release/makerpm.in pkgscripts/makerpm) +configure_file(release/rpm.spec.in pkgscripts/rpm.spec @ONLY) + +add_custom_target(rpm sh pkgscripts/makerpm + SOURCES pkgscripts/makerpm) + +configure_file(release/makesrpm.in pkgscripts/makesrpm) + +add_custom_target(srpm sh pkgscripts/makesrpm + SOURCES pkgscripts/makesrpm + DEPENDS dist) + +configure_file(release/makedpkg.in pkgscripts/makedpkg) +configure_file(release/deb-control.in pkgscripts/deb-control) + +add_custom_target(deb sh pkgscripts/makedpkg + SOURCES pkgscripts/makedpkg) + +endif() # Linux + + +############################################################################### +# Windows installer (NullSoft Installer) +############################################################################### + +if(WIN32) + +if(MSVC) + set(INST_PLATFORM "Visual C++") + set(INST_NAME ${CMAKE_PROJECT_NAME}-${VERSION}-vc) + set(INST_REG_NAME ${CMAKE_PROJECT_NAME}) +elseif(MINGW) + set(INST_PLATFORM GCC) + set(INST_NAME ${CMAKE_PROJECT_NAME}-${VERSION}-gcc) + set(INST_REG_NAME ${CMAKE_PROJECT_NAME}-gcc) + set(INST_DEFS -DGCC) +endif() + +if(BITS EQUAL 64) + set(INST_PLATFORM "${INST_PLATFORM} 64-bit") + set(INST_NAME ${INST_NAME}64) + set(INST_REG_NAME ${INST_DIR}64) + set(INST_DEFS ${INST_DEFS} -DWIN64) +endif() + +if(WITH_JAVA) + set(INST_DEFS ${INST_DEFS} -DJAVA) +endif() + +if(MSVC_IDE) + set(INST_DEFS ${INST_DEFS} "-DBUILDDIR=${CMAKE_CFG_INTDIR}\\") +else() + set(INST_DEFS ${INST_DEFS} "-DBUILDDIR=") +endif() + +string(REGEX REPLACE "/" "\\\\" INST_DIR ${CMAKE_INSTALL_PREFIX}) + +configure_file(release/installer.nsi.in installer.nsi @ONLY) + +if(WITH_JAVA) + set(JAVA_DEPEND turbojpeg-java) +endif() +add_custom_target(installer + makensis -nocd ${INST_DEFS} installer.nsi + DEPENDS jpeg jpeg-static turbojpeg turbojpeg-static rdjpgcom wrjpgcom + cjpeg djpeg jpegtran tjbench ${JAVA_DEPEND} + SOURCES installer.nsi) + +endif() # WIN32 + + +############################################################################### +# Cygwin Package +############################################################################### + +if(CYGWIN) + +configure_file(release/makecygwinpkg.in pkgscripts/makecygwinpkg) + +add_custom_target(cygwinpkg sh pkgscripts/makecygwinpkg) + +endif() # CYGWIN + + +############################################################################### +# Mac DMG +############################################################################### + +if(APPLE) + +set(DEFAULT_OSX_32BIT_BUILD ${CMAKE_SOURCE_DIR}/osxx86) +set(OSX_32BIT_BUILD ${DEFAULT_OSX_32BIT_BUILD} CACHE PATH + "Directory containing 32-bit (i386) Mac build to include in universal binaries (default: ${DEFAULT_OSX_32BIT_BUILD})") +set(DEFAULT_IOS_ARMV7_BUILD ${CMAKE_SOURCE_DIR}/iosarmv7) +set(IOS_ARMV7_BUILD ${DEFAULT_IOS_ARMV7_BUILD} CACHE PATH + "Directory containing ARMv7 iOS build to include in universal binaries (default: ${DEFAULT_IOS_ARMV7_BUILD})") +set(DEFAULT_IOS_ARMV7S_BUILD ${CMAKE_SOURCE_DIR}/iosarmv7s) +set(IOS_ARMV7S_BUILD ${DEFAULT_IOS_ARMV7S_BUILD} CACHE PATH + "Directory containing ARMv7s iOS build to include in universal binaries (default: ${DEFAULT_IOS_ARMV7S_BUILD})") +set(DEFAULT_IOS_ARMV8_BUILD ${CMAKE_SOURCE_DIR}/iosarmv8) +set(IOS_ARMV8_BUILD ${DEFAULT_IOS_ARMV8_BUILD} CACHE PATH + "Directory containing ARMv8 iOS build to include in universal binaries (default: ${DEFAULT_IOS_ARMV8_BUILD})") + +configure_file(release/makemacpkg.in pkgscripts/makemacpkg) +configure_file(release/Distribution.xml.in pkgscripts/Distribution.xml) +configure_file(release/uninstall.in pkgscripts/uninstall) + +add_custom_target(dmg sh pkgscripts/makemacpkg + SOURCES pkgscripts/makemacpkg) + +add_custom_target(udmg sh pkgscripts/makemacpkg universal + SOURCES pkgscripts/makemacpkg) + +endif() # APPLE + + +############################################################################### +# Generic +############################################################################### + +add_custom_target(dist + COMMAND git archive --prefix=${CMAKE_PROJECT_NAME}-${VERSION}/ HEAD | + gzip > ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_PROJECT_NAME}-${VERSION}.tar.gz + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + +configure_file(release/maketarball.in pkgscripts/maketarball) + +add_custom_target(tarball sh pkgscripts/maketarball + SOURCES pkgscripts/maketarball) + +configure_file(release/libjpeg.pc.in pkgscripts/libjpeg.pc @ONLY) + +configure_file(release/libturbojpeg.pc.in pkgscripts/libturbojpeg.pc @ONLY) diff --git a/cmakescripts/GNUInstallDirs.cmake b/cmakescripts/GNUInstallDirs.cmake new file mode 100644 index 0000000..ef564bb --- /dev/null +++ b/cmakescripts/GNUInstallDirs.cmake @@ -0,0 +1,416 @@ +#.rst: +# GNUInstallDirs +# -------------- +# +# Define GNU standard installation directories +# +# Provides install directory variables as defined by the +# `GNU Coding Standards`_. +# +# .. _`GNU Coding Standards`: https://www.gnu.org/prep/standards/html_node/Directory-Variables.html +# +# Result Variables +# ^^^^^^^^^^^^^^^^ +# +# Inclusion of this module defines the following variables: +# +# ``CMAKE_INSTALL_`` +# +# Destination for files of a given type. This value may be passed to +# the ``DESTINATION`` options of :command:`install` commands for the +# corresponding file type. +# +# ``CMAKE_INSTALL_FULL_`` +# +# The absolute path generated from the corresponding ``CMAKE_INSTALL_`` +# value. If the value is not already an absolute path, an absolute path +# is constructed typically by prepending the value of the +# :variable:`CMAKE_INSTALL_PREFIX` variable. However, there are some +# `special cases`_ as documented below. +# +# where ```` is one of: +# +# ``BINDIR`` +# user executables (``bin``) +# ``SBINDIR`` +# system admin executables (``sbin``) +# ``LIBEXECDIR`` +# program executables (``libexec``) +# ``SYSCONFDIR`` +# read-only single-machine data (``etc``) +# ``SHAREDSTATEDIR`` +# modifiable architecture-independent data (``com``) +# ``LOCALSTATEDIR`` +# modifiable single-machine data (``var``) +# ``LIBDIR`` +# object code libraries (``lib`` or ``lib64`` +# or ``lib/`` on Debian) +# ``INCLUDEDIR`` +# C header files (``include``) +# ``OLDINCLUDEDIR`` +# C header files for non-gcc (``/usr/include``) +# ``DATAROOTDIR`` +# read-only architecture-independent data root (``share``) +# ``DATADIR`` +# read-only architecture-independent data (``DATAROOTDIR``) +# ``INFODIR`` +# info documentation (``DATAROOTDIR/info``) +# ``LOCALEDIR`` +# locale-dependent data (``DATAROOTDIR/locale``) +# ``MANDIR`` +# man documentation (``DATAROOTDIR/man``) +# ``DOCDIR`` +# documentation root (``DATAROOTDIR/doc/PROJECT_NAME``) +# +# If the includer does not define a value the above-shown default will be +# used and the value will appear in the cache for editing by the user. +# +# Special Cases +# ^^^^^^^^^^^^^ +# +# The following values of :variable:`CMAKE_INSTALL_PREFIX` are special: +# +# ``/`` +# +# For ```` other than the ``SYSCONFDIR`` and ``LOCALSTATEDIR``, +# the value of ``CMAKE_INSTALL_`` is prefixed with ``usr/`` if +# it is not user-specified as an absolute path. For example, the +# ``INCLUDEDIR`` value ``include`` becomes ``usr/include``. +# This is required by the `GNU Coding Standards`_, which state: +# +# When building the complete GNU system, the prefix will be empty +# and ``/usr`` will be a symbolic link to ``/``. +# +# ``/usr`` +# +# For ```` equal to ``SYSCONFDIR`` or ``LOCALSTATEDIR``, the +# ``CMAKE_INSTALL_FULL_`` is computed by prepending just ``/`` +# to the value of ``CMAKE_INSTALL_`` if it is not user-specified +# as an absolute path. For example, the ``SYSCONFDIR`` value ``etc`` +# becomes ``/etc``. This is required by the `GNU Coding Standards`_. +# +# ``/opt/...`` +# +# For ```` equal to ``SYSCONFDIR`` or ``LOCALSTATEDIR``, the +# ``CMAKE_INSTALL_FULL_`` is computed by *appending* the prefix +# to the value of ``CMAKE_INSTALL_`` if it is not user-specified +# as an absolute path. For example, the ``SYSCONFDIR`` value ``etc`` +# becomes ``/etc/opt/...``. This is defined by the +# `Filesystem Hierarchy Standard`_. +# +# .. _`Filesystem Hierarchy Standard`: https://refspecs.linuxfoundation.org/FHS_3.0/fhs/index.html +# +# Macros +# ^^^^^^ +# +# .. command:: GNUInstallDirs_get_absolute_install_dir +# +# :: +# +# GNUInstallDirs_get_absolute_install_dir(absvar var) +# +# Set the given variable ``absvar`` to the absolute path contained +# within the variable ``var``. This is to allow the computation of an +# absolute path, accounting for all the special cases documented +# above. While this macro is used to compute the various +# ``CMAKE_INSTALL_FULL_`` variables, it is exposed publicly to +# allow users who create additional path variables to also compute +# absolute paths where necessary, using the same logic. + +#============================================================================= +# Copyright 2016 D. R. Commander +# Copyright 2016 Dmitry Marakasov +# Copyright 2016 Roger Leigh +# Copyright 2015 Alex Turbov +# Copyright 2014 Rolf Eike Beer +# Copyright 2014 Daniele E. Domenichelli +# Copyright 2013 Dimitri John Ledkov +# Copyright 2011 Alex Neundorf +# Copyright 2011 Eric NOULARD +# Copyright 2011, 2013-2015 Kitware, Inc. +# Copyright 2011 Nikita Krupen'ko +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the names of Kitware, Inc., the Insight Software Consortium, +# nor the names of their contributors may be used to endorse or promote +# products derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#============================================================================= + +# Installation directories +# + +macro(GNUInstallDirs_set_install_dir var docstring) + # If CMAKE_INSTALL_PREFIX changes and CMAKE_INSTALL_*DIR is still set to the + # default value, then modify it accordingly. This presumes that the default + # value may change based on the prefix. + + set(_GNUInstallDirs_CMAKE_INSTALL_FORCE_${var} "") + if(NOT DEFINED CMAKE_INSTALL_${var}) + set(_GNUInstallDirs_CMAKE_INSTALL_DEFAULT_${var} 1 CACHE INTERNAL + "CMAKE_INSTALL_${var} has default value") + elseif(DEFINED _GNUInstallDirs_CMAKE_INSTALL_LAST_DEFAULT_${var} AND + NOT "${_GNUInstallDirs_CMAKE_INSTALL_LAST_DEFAULT_${var}}" STREQUAL + "${CMAKE_INSTALL_DEFAULT_${var}}" AND + _GNUInstallDirs_CMAKE_INSTALL_DEFAULT_${var} AND + "${_GNUInstallDirs_CMAKE_INSTALL_LAST_${var}}" STREQUAL + "${CMAKE_INSTALL_${var}}") + set(_GNUInstallDirs_CMAKE_INSTALL_FORCE_${var} "FORCE") + endif() + + set(CMAKE_INSTALL_${var} "${CMAKE_INSTALL_DEFAULT_${var}}" CACHE PATH + "${docstring} (Default: ${CMAKE_INSTALL_DEFAULT_${var}})" + ${_GNUInstallDirs_CMAKE_INSTALL_FORCE_${var}}) + + if(NOT "${CMAKE_INSTALL_${var}}" STREQUAL "${CMAKE_INSTALL_DEFAULT_${var}}") + unset(_GNUInstallDirs_CMAKE_INSTALL_DEFAULT_${var} CACHE) + endif() + + # Save for next run + set(_GNUInstallDirs_CMAKE_INSTALL_LAST_${var} "${CMAKE_INSTALL_${var}}" + CACHE INTERNAL "CMAKE_INSTALL_${var} during last run") + set(_GNUInstallDirs_CMAKE_INSTALL_LAST_DEFAULT_${var} + "${CMAKE_INSTALL_DEFAULT_${var}}" CACHE INTERNAL + "CMAKE_INSTALL_DEFAULT_${var} during last run") +endmacro() + +if(NOT DEFINED CMAKE_INSTALL_DEFAULT_BINDIR) + set(CMAKE_INSTALL_DEFAULT_BINDIR "bin") +endif() +GNUInstallDirs_set_install_dir(BINDIR + "Directory into which user executables should be installed") + +if(NOT DEFINED CMAKE_INSTALL_DEFAULT_SBINDIR) + set(CMAKE_INSTALL_DEFAULT_SBINDIR "sbin") +endif() +GNUInstallDirs_set_install_dir(SBINDIR + "Directory into which system admin executables should be installed") + +if(NOT DEFINED CMAKE_INSTALL_DEFAULT_LIBEXECDIR) + set(CMAKE_INSTALL_DEFAULT_LIBEXECDIR "libexec") +endif() +GNUInstallDirs_set_install_dir(LIBEXECDIR + "Directory under which executables run by other programs should be installed") + +if(NOT DEFINED CMAKE_INSTALL_DEFAULT_SYSCONFDIR) + set(CMAKE_INSTALL_DEFAULT_SYSCONFDIR "etc") +endif() +GNUInstallDirs_set_install_dir(SYSCONFDIR + "Directory into which machine-specific read-only ASCII data and configuration files should be installed") + +if(NOT DEFINED CMAKE_INSTALL_DEFAULT_SHAREDSTATEDIR) + set(CMAKE_INSTALL_DEFAULT_SHAREDSTATEDIR "com") +endif() +GNUInstallDirs_set_install_dir(SHAREDSTATEDIR + "Directory into which architecture-independent run-time-modifiable data files should be installed") + +if(NOT DEFINED CMAKE_INSTALL_DEFAULT_LOCALSTATEDIR) + set(CMAKE_INSTALL_DEFAULT_LOCALSTATEDIR "var") +endif() +GNUInstallDirs_set_install_dir(LOCALSTATEDIR + "Directory into which machine-specific run-time-modifiable data files should be installed") + +if(NOT DEFINED CMAKE_INSTALL_DEFAULT_LIBDIR) + set(CMAKE_INSTALL_DEFAULT_LIBDIR "lib") + # Override this default 'lib' with 'lib64' iff: + # - we are on Linux system but NOT cross-compiling + # - we are NOT on debian + # - we are on a 64 bits system + # reason is: amd64 ABI: http://www.x86-64.org/documentation/abi.pdf + # For Debian with multiarch, use 'lib/${CMAKE_LIBRARY_ARCHITECTURE}' if + # CMAKE_LIBRARY_ARCHITECTURE is set (which contains e.g. "i386-linux-gnu" + # and CMAKE_INSTALL_PREFIX is "/usr" + # See http://wiki.debian.org/Multiarch + if(CMAKE_SYSTEM_NAME MATCHES "^(Linux|kFreeBSD|GNU)$" + AND NOT CMAKE_CROSSCOMPILING) + if (EXISTS "/etc/debian_version") # is this a debian system ? + if(CMAKE_LIBRARY_ARCHITECTURE) + if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/?$") + set(CMAKE_INSTALL_DEFAULT_LIBDIR "lib/${CMAKE_LIBRARY_ARCHITECTURE}") + endif() + endif() + else() # not debian, rely on CMAKE_SIZEOF_VOID_P: + if(NOT DEFINED CMAKE_SIZEOF_VOID_P) + message(AUTHOR_WARNING + "Unable to determine default CMAKE_INSTALL_LIBDIR directory because no target architecture is known. " + "Please enable at least one language before including GNUInstallDirs.") + else() + if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + set(CMAKE_INSTALL_DEFAULT_LIBDIR "lib64") + endif() + endif() + endif() + endif() +endif() +GNUInstallDirs_set_install_dir(LIBDIR + "Directory into which object files and object code libraries should be installed") + +if(NOT DEFINED CMAKE_INSTALL_DEFAULT_INCLUDEDIR) + set(CMAKE_INSTALL_DEFAULT_INCLUDEDIR "include") +endif() +GNUInstallDirs_set_install_dir(INCLUDEDIR + "Directory into which C header files should be installed") + +if(NOT DEFINED CMAKE_INSTALL_DEFAULT_OLDINCLUDEDIR) + set(CMAKE_INSTALL_DEFAULT_OLDINCLUDEDIR "/usr/include") +endif() +GNUInstallDirs_set_install_dir(OLDINCLUDEDIR + PATH "Directory into which C header files for non-GCC compilers should be installed") + +if(NOT DEFINED CMAKE_INSTALL_DEFAULT_DATAROOTDIR) + set(CMAKE_INSTALL_DEFAULT_DATAROOTDIR "share") +endif() +GNUInstallDirs_set_install_dir(DATAROOTDIR + "The root of the directory tree for read-only architecture-independent data files") + +#----------------------------------------------------------------------------- +# Values whose defaults are relative to DATAROOTDIR. Store empty values in +# the cache and store the defaults in local variables if the cache values are +# not set explicitly. This auto-updates the defaults as DATAROOTDIR changes. + +if(NOT DEFINED CMAKE_INSTALL_DEFAULT_DATADIR) + set(CMAKE_INSTALL_DEFAULT_DATADIR "") +endif() +GNUInstallDirs_set_install_dir(DATADIR + "The directory under which read-only architecture-independent data files should be installed") + +if(NOT DEFINED CMAKE_INSTALL_DEFAULT_INFODIR) + if(CMAKE_SYSTEM_NAME MATCHES "^(.*BSD|DragonFly)$") + set(CMAKE_INSTALL_DEFAULT_INFODIR "info") + else() + set(CMAKE_INSTALL_DEFAULT_INFODIR "/info") + endif() +endif() +GNUInstallDirs_set_install_dir(INFODIR + "The directory into which info documentation files should be installed") + +if(NOT DEFINED CMAKE_INSTALL_DEFAULT_MANDIR) + if(CMAKE_SYSTEM_NAME MATCHES "^(.*BSD|DragonFly)$") + set(CMAKE_INSTALL_DEFAULT_MANDIR "man") + else() + set(CMAKE_INSTALL_DEFAULT_MANDIR "/man") + endif() +endif() +GNUInstallDirs_set_install_dir(MANDIR + "The directory under which man pages should be installed") + +if(NOT DEFINED CMAKE_INSTALL_DEFAULT_LOCALEDIR) + set(CMAKE_INSTALL_DEFAULT_LOCALEDIR "/locale") +endif() +GNUInstallDirs_set_install_dir(LOCALEDIR + "The directory under which locale-specific message catalogs should be installed") + +if(NOT DEFINED CMAKE_INSTALL_DEFAULT_DOCDIR) + set(CMAKE_INSTALL_DEFAULT_DOCDIR "/doc/${PROJECT_NAME}") +endif() +GNUInstallDirs_set_install_dir(DOCDIR + "The directory into which documentation files (other than info files) should be installed") + +#----------------------------------------------------------------------------- + +mark_as_advanced( + CMAKE_INSTALL_BINDIR + CMAKE_INSTALL_SBINDIR + CMAKE_INSTALL_LIBEXECDIR + CMAKE_INSTALL_SYSCONFDIR + CMAKE_INSTALL_SHAREDSTATEDIR + CMAKE_INSTALL_LOCALSTATEDIR + CMAKE_INSTALL_LIBDIR + CMAKE_INSTALL_INCLUDEDIR + CMAKE_INSTALL_OLDINCLUDEDIR + CMAKE_INSTALL_DATAROOTDIR + CMAKE_INSTALL_DATADIR + CMAKE_INSTALL_INFODIR + CMAKE_INSTALL_LOCALEDIR + CMAKE_INSTALL_MANDIR + CMAKE_INSTALL_DOCDIR + ) + +macro(GNUInstallDirs_get_absolute_install_dir absvar var) + string(REGEX REPLACE "[<>]" "@" ${var} "${${var}}") + # Handle the specific case of an empty CMAKE_INSTALL_DATAROOTDIR + if(NOT CMAKE_INSTALL_DATAROOTDIR AND + ${var} MATCHES "\@CMAKE_INSTALL_DATAROOTDIR\@/") + string(CONFIGURE "${${var}}" ${var} @ONLY) + string(REGEX REPLACE "^/" "" ${var} "${${var}}") + else() + string(CONFIGURE "${${var}}" ${var} @ONLY) + endif() + if(NOT IS_ABSOLUTE "${${var}}") + # Handle special cases: + # - CMAKE_INSTALL_PREFIX == / + # - CMAKE_INSTALL_PREFIX == /usr + # - CMAKE_INSTALL_PREFIX == /opt/... + if("${CMAKE_INSTALL_PREFIX}" STREQUAL "/") + if("${dir}" STREQUAL "SYSCONFDIR" OR "${dir}" STREQUAL "LOCALSTATEDIR") + set(${absvar} "/${${var}}") + else() + if (NOT "${${var}}" MATCHES "^usr/") + set(${var} "usr/${${var}}") + endif() + set(${absvar} "/${${var}}") + endif() + elseif("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/?$") + if("${dir}" STREQUAL "SYSCONFDIR" OR "${dir}" STREQUAL "LOCALSTATEDIR") + set(${absvar} "/${${var}}") + else() + set(${absvar} "${CMAKE_INSTALL_PREFIX}/${${var}}") + endif() + elseif("${CMAKE_INSTALL_PREFIX}" MATCHES "^/opt/.*") + if("${dir}" STREQUAL "SYSCONFDIR" OR "${dir}" STREQUAL "LOCALSTATEDIR") + set(${absvar} "/${${var}}${CMAKE_INSTALL_PREFIX}") + else() + set(${absvar} "${CMAKE_INSTALL_PREFIX}/${${var}}") + endif() + else() + set(${absvar} "${CMAKE_INSTALL_PREFIX}/${${var}}") + endif() + else() + set(${absvar} "${${var}}") + endif() + string(REGEX REPLACE "/$" "" ${absvar} "${${absvar}}") +endmacro() + +# Result directories +# +foreach(dir + BINDIR + SBINDIR + LIBEXECDIR + SYSCONFDIR + SHAREDSTATEDIR + LOCALSTATEDIR + LIBDIR + INCLUDEDIR + OLDINCLUDEDIR + DATAROOTDIR + DATADIR + INFODIR + LOCALEDIR + MANDIR + DOCDIR + ) + GNUInstallDirs_get_absolute_install_dir(CMAKE_INSTALL_FULL_${dir} CMAKE_INSTALL_${dir}) +endforeach() diff --git a/cmakescripts/cmake_uninstall.cmake.in b/cmakescripts/cmake_uninstall.cmake.in index b35d100..6726a0d 100644 --- a/cmakescripts/cmake_uninstall.cmake.in +++ b/cmakescripts/cmake_uninstall.cmake.in @@ -1,10 +1,10 @@ # This code is from the CMake FAQ -if (NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") - message(FATAL_ERROR "Cannot find install manifest: \"@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt\"") -endif(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") +if (NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt") + message(FATAL_ERROR "Cannot find install manifest: \"@CMAKE_BINARY_DIR@/install_manifest.txt\"") +endif(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt") -file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files) +file(READ "@CMAKE_BINARY_DIR@/install_manifest.txt" files) string(REGEX REPLACE "\n" ";" files "${files}") list(REVERSE files) foreach (file ${files}) diff --git a/cmakescripts/testclean.cmake b/cmakescripts/testclean.cmake index 38bb03b..fc3fc25 100644 --- a/cmakescripts/testclean.cmake +++ b/cmakescripts/testclean.cmake @@ -29,7 +29,9 @@ file(GLOB FILES *_411_*.png *_411_*.ppm *_411_*.jpg - *_411.yuv) + *_411.yuv + tjbenchtest*.log + tjexampletest*.log) if(NOT FILES STREQUAL "") message(STATUS "Removing test files") diff --git a/cmyk.h b/cmyk.h new file mode 100644 index 0000000..48187a8 --- /dev/null +++ b/cmyk.h @@ -0,0 +1,61 @@ +/* + * cmyk.h + * + * Copyright (C) 2017-2018, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains convenience functions for performing quick & dirty + * CMYK<->RGB conversion. This algorithm is suitable for testing purposes + * only. Properly converting between CMYK and RGB requires a color management + * system. + */ + +#ifndef CMYK_H +#define CMYK_H + +#include +#define JPEG_INTERNALS +#include +#include "jconfigint.h" + + +/* Fully reversible */ + +INLINE +LOCAL(void) +rgb_to_cmyk(JSAMPLE r, JSAMPLE g, JSAMPLE b, JSAMPLE *c, JSAMPLE *m, + JSAMPLE *y, JSAMPLE *k) +{ + double ctmp = 1.0 - ((double)r / 255.0); + double mtmp = 1.0 - ((double)g / 255.0); + double ytmp = 1.0 - ((double)b / 255.0); + double ktmp = MIN(MIN(ctmp, mtmp), ytmp); + + if (ktmp == 1.0) ctmp = mtmp = ytmp = 0.0; + else { + ctmp = (ctmp - ktmp) / (1.0 - ktmp); + mtmp = (mtmp - ktmp) / (1.0 - ktmp); + ytmp = (ytmp - ktmp) / (1.0 - ktmp); + } + *c = (JSAMPLE)(255.0 - ctmp * 255.0 + 0.5); + *m = (JSAMPLE)(255.0 - mtmp * 255.0 + 0.5); + *y = (JSAMPLE)(255.0 - ytmp * 255.0 + 0.5); + *k = (JSAMPLE)(255.0 - ktmp * 255.0 + 0.5); +} + + +/* Fully reversible only for C/M/Y/K values generated with rgb_to_cmyk() */ + +INLINE +LOCAL(void) +cmyk_to_rgb(JSAMPLE c, JSAMPLE m, JSAMPLE y, JSAMPLE k, JSAMPLE *r, JSAMPLE *g, + JSAMPLE *b) +{ + *r = (JSAMPLE)((double)c * (double)k / 255.0 + 0.5); + *g = (JSAMPLE)((double)m * (double)k / 255.0 + 0.5); + *b = (JSAMPLE)((double)y * (double)k / 255.0 + 0.5); +} + + +#endif /* CMYK_H */ diff --git a/configure.ac b/configure.ac deleted file mode 100644 index af80ee5..0000000 --- a/configure.ac +++ /dev/null @@ -1,616 +0,0 @@ -# -*- Autoconf -*- -# Process this file with autoconf to produce a configure script. - -AC_PREREQ([2.56]) -AC_INIT([libjpeg-turbo], [1.5.3]) - -AM_INIT_AUTOMAKE([-Wall foreign dist-bzip2]) -AC_PREFIX_DEFAULT(/opt/libjpeg-turbo) - -m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) - -# Checks for programs. -SAVED_CFLAGS=${CFLAGS} -SAVED_CPPFLAGS=${CPPFLAGS} -AC_PROG_CPP -AC_PROG_CC -m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) -AM_PROG_AS -AM_PROG_CC_C_O -AC_PROG_INSTALL -AC_PROG_LIBTOOL -AC_PROG_LN_S - -AC_ARG_WITH([build-date], [Use custom build string to enable reproducible builds (default: YYMMDD)], - [BUILD="$with_build_date"], - [BUILD=`date +%Y%m%d`]) - -PKG_PROG_PKG_CONFIG - -# When the prefix is /opt/libjpeg-turbo, we assume that an "official" binary is -# being created, and thus we install things into specific locations. - -old_prefix=${prefix} -if test "x$prefix" = "xNONE" -a "x$ac_default_prefix" != "x"; then - prefix=$ac_default_prefix -fi -DATADIR=`eval echo ${datadir}` -DATADIR=`eval echo $DATADIR` -if test "$DATADIR" = "/opt/libjpeg-turbo/share"; then - datadir='${prefix}' -fi -DATADIR=`eval echo ${datarootdir}` -DATADIR=`eval echo $DATADIR` -if test "$DATADIR" = "/opt/libjpeg-turbo/share"; then - datarootdir='${prefix}' -fi -DOCDIR=`eval echo ${docdir}` -DOCDIR=`eval echo $DOCDIR` -if test "$DOCDIR" = "/opt/libjpeg-turbo/doc/libjpeg-turbo"; then - docdir='${datadir}/doc' -fi - -old_exec_prefix=${exec_prefix} -if test "x$exec_prefix" = "xNONE"; then - exec_prefix=${prefix} -fi - -AC_CHECK_SIZEOF(size_t) - -if test "x${libdir}" = 'x${exec_prefix}/lib' -o "x${libdir}" = 'x${prefix}/lib'; then - LIBDIR=`eval echo ${libdir}` - LIBDIR=`eval echo $LIBDIR` - if test "$LIBDIR" = "/opt/libjpeg-turbo/lib"; then - case $host_os in - darwin*) - ;; - *) - if test "${ac_cv_sizeof_size_t}" = "8"; then - libdir='${exec_prefix}/lib64' - elif test "${ac_cv_sizeof_size_t}" = "4"; then - libdir='${exec_prefix}/lib32' - fi - ;; - esac - fi -fi -exec_prefix=${old_exec_prefix} -prefix=${old_prefix} - -# Check whether compiler supports pointers to undefined structures -AC_MSG_CHECKING(whether compiler supports pointers to undefined structures) -AC_TRY_COMPILE([ typedef struct undefined_structure *undef_struct_ptr; ], , - AC_MSG_RESULT(yes), - [AC_MSG_RESULT(no) - AC_DEFINE([INCOMPLETE_TYPES_BROKEN], [1], - [Compiler does not support pointers to undefined structures.])]) - -if test "x${GCC}" = "xyes"; then - if test "x${SAVED_CFLAGS}" = "x"; then - CFLAGS=-O3 - fi - if test "x${SAVED_CPPFLAGS}" = "x"; then - CPPFLAGS=-Wall - fi -fi - -AC_CHECK_DECL([__SUNPRO_C], [SUNCC="yes"], [SUNCC="no"]) -if test "x${SUNCC}" = "xyes"; then - if test "x${SAVED_CFLAGS}" = "x"; then - CFLAGS=-xO5 - fi -fi - -# Checks for libraries. - -# Checks for header files. -AC_HEADER_STDC -AC_CHECK_HEADERS([stddef.h stdlib.h locale.h string.h]) -AC_CHECK_HEADER([sys/types.h], - AC_DEFINE([NEED_SYS_TYPES_H], 1, [Define if you need to include to get size_t.])) - -# Checks for typedefs, structures, and compiler characteristics. -AC_C_CONST -AC_C_CHAR_UNSIGNED -AC_C_INLINE -AC_TYPE_SIZE_T -AC_CHECK_TYPES([unsigned char, unsigned short]) - -AC_MSG_CHECKING([if right shift is signed]) -AC_TRY_RUN( - [#include - int is_shifting_signed (long arg) { - long res = arg >> 4; - - if (res == -0x7F7E80CL) - return 1; /* right shift is signed */ - - /* see if unsigned-shift hack will fix it. */ - /* we can't just test exact value since it depends on width of long... */ - res |= (~0L) << (32-4); - if (res == -0x7F7E80CL) - return 0; /* right shift is unsigned */ - - printf("Right shift isn't acting as I expect it to.\n"); - printf("I fear the JPEG software will not work at all.\n\n"); - return 0; /* try it with unsigned anyway */ - } - int main (void) { - exit(is_shifting_signed(-0x7F7E80B1L)); - }], - [AC_MSG_RESULT(no) - AC_DEFINE([RIGHT_SHIFT_IS_UNSIGNED], 1, - [Define if your (broken) compiler shifts signed values as if they were unsigned.])], - [AC_MSG_RESULT(yes)], - [AC_MSG_RESULT(Assuming that right shift is signed on target machine.)]) - -# Checks for library functions. -AC_CHECK_FUNCS([memset memcpy], [], - [AC_DEFINE([NEED_BSD_STRINGS], 1, - [Define if you have BSD-like bzero and bcopy in rather than memset/memcpy in .])]) - -AC_MSG_CHECKING([libjpeg API version]) -AC_ARG_VAR(JPEG_LIB_VERSION, [libjpeg API version (62, 70, or 80)]) -if test "x$JPEG_LIB_VERSION" = "x"; then - AC_ARG_WITH([jpeg7], - AC_HELP_STRING([--with-jpeg7], - [Emulate libjpeg v7 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b.)])) - AC_ARG_WITH([jpeg8], - AC_HELP_STRING([--with-jpeg8], - [Emulate libjpeg v8 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b.)])) - if test "x${with_jpeg8}" = "xyes"; then - JPEG_LIB_VERSION=80 - else - if test "x${with_jpeg7}" = "xyes"; then - JPEG_LIB_VERSION=70 - else - JPEG_LIB_VERSION=62 - fi - fi -fi -JPEG_LIB_VERSION_DECIMAL=`expr $JPEG_LIB_VERSION / 10`.`expr $JPEG_LIB_VERSION % 10` -AC_SUBST(JPEG_LIB_VERSION_DECIMAL) -AC_MSG_RESULT([$JPEG_LIB_VERSION_DECIMAL]) -AC_DEFINE_UNQUOTED(JPEG_LIB_VERSION, [$JPEG_LIB_VERSION], - [libjpeg API version]) - -AC_ARG_VAR(SO_MAJOR_VERSION, - [Major version of the libjpeg-turbo shared library (default is determined by the API version)]) -AC_ARG_VAR(SO_MINOR_VERSION, - [Minor version of the libjpeg-turbo shared library (default is determined by the API version)]) -if test "x$SO_MAJOR_VERSION" = "x"; then - case "$JPEG_LIB_VERSION" in - 62) SO_MAJOR_VERSION=$JPEG_LIB_VERSION ;; - *) SO_MAJOR_VERSION=`expr $JPEG_LIB_VERSION / 10` ;; - esac -fi -if test "x$SO_MINOR_VERSION" = "x"; then - case "$JPEG_LIB_VERSION" in - 80) SO_MINOR_VERSION=2 ;; - *) SO_MINOR_VERSION=0 ;; - esac -fi - -RPM_CONFIG_ARGS= - -# Memory source/destination managers -SO_AGE=1 -MEM_SRCDST_FUNCTIONS= -if test "x${with_jpeg8}" != "xyes"; then - AC_MSG_CHECKING([whether to include in-memory source/destination managers]) - AC_ARG_WITH([mem-srcdst], - AC_HELP_STRING([--without-mem-srcdst], - [Do not include in-memory source/destination manager functions when emulating the libjpeg v6b or v7 API/ABI])) - if test "x$with_mem_srcdst" != "xno"; then - AC_MSG_RESULT(yes) - AC_DEFINE([MEM_SRCDST_SUPPORTED], [1], - [Support in-memory source/destination managers]) - SO_AGE=2 - MEM_SRCDST_FUNCTIONS="global: jpeg_mem_dest; jpeg_mem_src;"; - else - AC_MSG_RESULT(no) - RPM_CONFIG_ARGS="$RPM_CONFIG_ARGS --without-mem-srcdst" - fi -fi - -AC_MSG_CHECKING([libjpeg shared library version]) -AC_MSG_RESULT([$SO_MAJOR_VERSION.$SO_AGE.$SO_MINOR_VERSION]) -LIBTOOL_CURRENT=`expr $SO_MAJOR_VERSION + $SO_AGE` -AC_SUBST(LIBTOOL_CURRENT) -AC_SUBST(SO_MAJOR_VERSION) -AC_SUBST(SO_MINOR_VERSION) -AC_SUBST(SO_AGE) -AC_SUBST(MEM_SRCDST_FUNCTIONS) - -AC_DEFINE_UNQUOTED(LIBJPEG_TURBO_VERSION, [$VERSION], [libjpeg-turbo version]) - -m4_define(version_triplet,m4_split(AC_PACKAGE_VERSION,[[.]])) -m4_define(version_major,m4_car(m4_shiftn(1,[],version_triplet))) -m4_define(version_minor,m4_car(m4_shiftn(2,[],version_triplet))) -m4_define(version_revision,m4_car(m4_shiftn(3,[],version_triplet))) -VERSION_MAJOR=version_major -VERSION_MINOR=version_minor -VERSION_REVISION=version_revision -LIBJPEG_TURBO_VERSION_NUMBER=`printf "%d%03d%03d" $VERSION_MAJOR $VERSION_MINOR $VERSION_REVISION` -AC_DEFINE_UNQUOTED(LIBJPEG_TURBO_VERSION_NUMBER, [$LIBJPEG_TURBO_VERSION_NUMBER], [libjpeg-turbo version in integer form]) - -VERSION_SCRIPT=yes -AC_ARG_ENABLE([ld-version-script], - AS_HELP_STRING([--disable-ld-version-script], - [Disable linker version script for libjpeg-turbo (default is to use linker version script if the linker supports it)]), - [VERSION_SCRIPT=$enableval], []) - -AC_MSG_CHECKING([whether the linker supports version scripts]) -SAVED_LDFLAGS="$LDFLAGS" -LDFLAGS="$LDFLAGS -Wl,--version-script,conftest.map" -cat > conftest.map < should declare free() */ +extern void free(void *ptr); +#endif + #include /* to declare isprint() */ #ifdef USE_CCOMMAND /* command-line reader for Macintosh */ @@ -47,7 +51,7 @@ /* Create the add-on message string table. */ -#define JMESSAGE(code,string) string , +#define JMESSAGE(code, string) string, static const char * const cdjpeg_message_table[] = { #include "cderror.h" @@ -63,13 +67,13 @@ static const char * const cdjpeg_message_table[] = { */ typedef enum { - FMT_BMP, /* BMP format (Windows flavor) */ - FMT_GIF, /* GIF format */ - FMT_OS2, /* BMP format (OS/2 flavor) */ - FMT_PPM, /* PPM/PGM (PBMPLUS formats) */ - FMT_RLE, /* RLE format */ - FMT_TARGA, /* Targa format */ - FMT_TIFF /* TIFF format */ + FMT_BMP, /* BMP format (Windows flavor) */ + FMT_GIF, /* GIF format */ + FMT_OS2, /* BMP format (OS/2 flavor) */ + FMT_PPM, /* PPM/PGM (PBMPLUS formats) */ + FMT_RLE, /* RLE format */ + FMT_TARGA, /* Targa format */ + FMT_TIFF /* TIFF format */ } IMAGE_FORMATS; #ifndef DEFAULT_FMT /* so can override from CFLAGS in Makefile */ @@ -89,6 +93,7 @@ static IMAGE_FORMATS requested_fmt; static const char *progname; /* program name for error messages */ +static char *icc_filename; /* for -icc switch */ static char *outfilename; /* for -outfile switch */ boolean memsrc; /* for -memsrc switch */ boolean skip, crop; @@ -98,7 +103,7 @@ JDIMENSION crop_x, crop_y, crop_width, crop_height; LOCAL(void) -usage (void) +usage(void) /* complain about bad command line */ { fprintf(stderr, "usage: %s [switches] ", progname); @@ -157,6 +162,7 @@ usage (void) fprintf(stderr, " -dither fs Use F-S dithering (default)\n"); fprintf(stderr, " -dither none Don't use dithering in quantization\n"); fprintf(stderr, " -dither ordered Use ordered dither (medium speed, quality)\n"); + fprintf(stderr, " -icc FILE Extract ICC profile to FILE\n"); #ifdef QUANT_2PASS_SUPPORTED fprintf(stderr, " -map FILE Map to colors used in named image file\n"); #endif @@ -180,8 +186,8 @@ usage (void) LOCAL(int) -parse_switches (j_decompress_ptr cinfo, int argc, char **argv, - int last_file_arg_seen, boolean for_real) +parse_switches(j_decompress_ptr cinfo, int argc, char **argv, + int last_file_arg_seen, boolean for_real) /* Parse optional switches. * Returns argv[] index of first file-name argument (== argc if none). * Any file names with indexes <= last_file_arg_seen are ignored; @@ -196,6 +202,7 @@ parse_switches (j_decompress_ptr cinfo, int argc, char **argv, /* Set up default JPEG parameters. */ requested_fmt = DEFAULT_FMT; /* set default output file format */ + icc_filename = NULL; outfilename = NULL; memsrc = FALSE; skip = FALSE; @@ -263,7 +270,7 @@ parse_switches (j_decompress_ptr cinfo, int argc, char **argv, /* On first -d, print version identification */ static boolean printed_version = FALSE; - if (! printed_version) { + if (!printed_version) { fprintf(stderr, "%s version %s (build %s)\n", PACKAGE_NAME, VERSION, BUILD); fprintf(stderr, "%s\n\n", JCOPYRIGHT); @@ -282,7 +289,7 @@ parse_switches (j_decompress_ptr cinfo, int argc, char **argv, /* Select recommended processing options for quick-and-dirty output. */ cinfo->two_pass_quantize = FALSE; cinfo->dither_mode = JDITHER_ORDERED; - if (! cinfo->quantize_colors) /* don't override an earlier -colors */ + if (!cinfo->quantize_colors) /* don't override an earlier -colors */ cinfo->desired_number_of_colors = 216; cinfo->dct_method = JDCT_FASTEST; cinfo->do_fancy_upsampling = FALSE; @@ -291,7 +298,8 @@ parse_switches (j_decompress_ptr cinfo, int argc, char **argv, /* GIF output format. */ requested_fmt = FMT_GIF; - } else if (keymatch(arg, "grayscale", 2) || keymatch(arg, "greyscale",2)) { + } else if (keymatch(arg, "grayscale", 2) || + keymatch(arg, "greyscale", 2)) { /* Force monochrome output. */ cinfo->out_color_space = JCS_GRAYSCALE; @@ -303,6 +311,13 @@ parse_switches (j_decompress_ptr cinfo, int argc, char **argv, /* Force RGB565 output. */ cinfo->out_color_space = JCS_RGB565; + } else if (keymatch(arg, "icc", 1)) { + /* Set ICC filename. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + icc_filename = argv[argn]; + jpeg_save_markers(cinfo, JPEG_APP0 + 2, 0xFFFF); + } else if (keymatch(arg, "map", 3)) { /* Quantize to a color map taken from an input file. */ if (++argn >= argc) /* advance to next argument */ @@ -419,13 +434,13 @@ parse_switches (j_decompress_ptr cinfo, int argc, char **argv, */ LOCAL(unsigned int) -jpeg_getc (j_decompress_ptr cinfo) +jpeg_getc(j_decompress_ptr cinfo) /* Read next byte */ { struct jpeg_source_mgr *datasrc = cinfo->src; if (datasrc->bytes_in_buffer == 0) { - if (! (*datasrc->fill_input_buffer) (cinfo)) + if (!(*datasrc->fill_input_buffer) (cinfo)) ERREXIT(cinfo, JERR_CANT_SUSPEND); } datasrc->bytes_in_buffer--; @@ -434,7 +449,7 @@ jpeg_getc (j_decompress_ptr cinfo) METHODDEF(boolean) -print_text_marker (j_decompress_ptr cinfo) +print_text_marker(j_decompress_ptr cinfo) { boolean traceit = (cinfo->err->trace_level >= 1); long length; @@ -447,10 +462,10 @@ print_text_marker (j_decompress_ptr cinfo) if (traceit) { if (cinfo->unread_marker == JPEG_COM) - fprintf(stderr, "Comment, length %ld:\n", (long) length); + fprintf(stderr, "Comment, length %ld:\n", (long)length); else /* assume it is an APPn otherwise */ fprintf(stderr, "APP%d, length %ld:\n", - cinfo->unread_marker - JPEG_APP0, (long) length); + cinfo->unread_marker - JPEG_APP0, (long)length); } while (--length >= 0) { @@ -489,7 +504,7 @@ print_text_marker (j_decompress_ptr cinfo) */ int -main (int argc, char **argv) +main(int argc, char **argv) { struct jpeg_decompress_struct cinfo; struct jpeg_error_mgr jerr; @@ -528,7 +543,7 @@ main (int argc, char **argv) * but don't try to override APP0 or APP14 this way (see libjpeg.txt). */ jpeg_set_marker_processor(&cinfo, JPEG_COM, print_text_marker); - jpeg_set_marker_processor(&cinfo, JPEG_APP0+12, print_text_marker); + jpeg_set_marker_processor(&cinfo, JPEG_APP0 + 12, print_text_marker); /* Scan command line to find file names. */ /* It is convenient to use just one switch-parsing routine, but the switch @@ -543,14 +558,14 @@ main (int argc, char **argv) #ifdef TWO_FILE_COMMANDLINE /* Must have either -outfile switch or explicit output file name */ if (outfilename == NULL) { - if (file_index != argc-2) { + if (file_index != argc - 2) { fprintf(stderr, "%s: must name one input and one output file\n", progname); usage(); } - outfilename = argv[file_index+1]; + outfilename = argv[file_index + 1]; } else { - if (file_index != argc-1) { + if (file_index != argc - 1) { fprintf(stderr, "%s: must name one input and one output file\n", progname); usage(); @@ -558,7 +573,7 @@ main (int argc, char **argv) } #else /* Unix style: expect zero or one file name */ - if (file_index < argc-1) { + if (file_index < argc - 1) { fprintf(stderr, "%s: only one input file\n", progname); usage(); } @@ -587,7 +602,7 @@ main (int argc, char **argv) } #ifdef PROGRESS_REPORT - start_progress_monitor((j_common_ptr) &cinfo, &progress); + start_progress_monitor((j_common_ptr)&cinfo, &progress); #endif /* Specify data source for decompression */ @@ -617,7 +632,7 @@ main (int argc, char **argv) jpeg_stdio_src(&cinfo, input_file); /* Read file header, set default decompression parameters */ - (void) jpeg_read_header(&cinfo, TRUE); + (void)jpeg_read_header(&cinfo, TRUE); /* Adjust default decompression parameters by re-parsing the options */ file_index = parse_switches(&cinfo, argc, argv, 0, TRUE); @@ -628,10 +643,10 @@ main (int argc, char **argv) switch (requested_fmt) { #ifdef BMP_SUPPORTED case FMT_BMP: - dest_mgr = jinit_write_bmp(&cinfo, FALSE); + dest_mgr = jinit_write_bmp(&cinfo, FALSE, TRUE); break; case FMT_OS2: - dest_mgr = jinit_write_bmp(&cinfo, TRUE); + dest_mgr = jinit_write_bmp(&cinfo, TRUE, TRUE); break; #endif #ifdef GIF_SUPPORTED @@ -661,7 +676,7 @@ main (int argc, char **argv) dest_mgr->output_file = output_file; /* Start decompressor */ - (void) jpeg_start_decompress(&cinfo); + (void)jpeg_start_decompress(&cinfo); /* Skip rows */ if (skip) { @@ -755,12 +770,35 @@ main (int argc, char **argv) progress.pub.completed_passes = progress.pub.total_passes; #endif + if (icc_filename != NULL) { + FILE *icc_file; + JOCTET *icc_profile; + unsigned int icc_len; + + if ((icc_file = fopen(icc_filename, WRITE_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, icc_filename); + exit(EXIT_FAILURE); + } + if (jpeg_read_icc_profile(&cinfo, &icc_profile, &icc_len)) { + if (fwrite(icc_profile, icc_len, 1, icc_file) < 1) { + fprintf(stderr, "%s: can't read ICC profile from %s\n", progname, + icc_filename); + free(icc_profile); + fclose(icc_file); + exit(EXIT_FAILURE); + } + free(icc_profile); + fclose(icc_file); + } else if (cinfo.err->msg_code != JWRN_BOGUS_ICC) + fprintf(stderr, "%s: no ICC profile data in JPEG file\n", progname); + } + /* Finish decompression and release memory. * I must do it in this order because output module has allocated memory * of lifespan JPOOL_IMAGE; it needs to finish before releasing memory. */ (*dest_mgr->finish_output) (&cinfo, dest_mgr); - (void) jpeg_finish_decompress(&cinfo); + (void)jpeg_finish_decompress(&cinfo); jpeg_destroy_decompress(&cinfo); /* Close files, if we opened them */ @@ -770,7 +808,7 @@ main (int argc, char **argv) fclose(output_file); #ifdef PROGRESS_REPORT - end_progress_monitor((j_common_ptr) &cinfo); + end_progress_monitor((j_common_ptr)&cinfo); #endif if (memsrc && inbuffer != NULL) diff --git a/doc/html/annotated.html b/doc/html/annotated.html index d0b0e1e..50286d2 100644 --- a/doc/html/annotated.html +++ b/doc/html/annotated.html @@ -24,7 +24,7 @@
TurboJPEG -  1.5 +  2.0
diff --git a/doc/html/classes.html b/doc/html/classes.html index 275e96d..41a2811 100644 --- a/doc/html/classes.html +++ b/doc/html/classes.html @@ -24,7 +24,7 @@
TurboJPEG -  1.5 +  2.0
diff --git a/doc/html/doxygen-extra.css b/doc/html/doxygen-extra.css index 5abbcc2..f1bd4c2 100644 --- a/doc/html/doxygen-extra.css +++ b/doc/html/doxygen-extra.css @@ -1,3 +1,3 @@ code { - color: #4665A2; + color: #4665A2; } diff --git a/doc/html/functions.html b/doc/html/functions.html index 31d78f5..1042ae7 100644 --- a/doc/html/functions.html +++ b/doc/html/functions.html @@ -24,7 +24,7 @@
TurboJPEG -  1.5 +  2.0
diff --git a/doc/html/functions_vars.html b/doc/html/functions_vars.html index 8373eac..e0a7157 100644 --- a/doc/html/functions_vars.html +++ b/doc/html/functions_vars.html @@ -24,7 +24,7 @@
TurboJPEG -  1.5 +  2.0
diff --git a/doc/html/group___turbo_j_p_e_g.html b/doc/html/group___turbo_j_p_e_g.html index 89780d4..cef856a 100644 --- a/doc/html/group___turbo_j_p_e_g.html +++ b/doc/html/group___turbo_j_p_e_g.html @@ -24,7 +24,7 @@
TurboJPEG -  1.5 +  2.0
@@ -128,14 +128,23 @@ Macros #define TJFLAG_ACCURATEDCT  Use the most accurate DCT/IDCT algorithm available in the underlying codec. More...
  +#define TJFLAG_STOPONWARNING + Immediately discontinue the current compression/decompression/transform operation if the underlying codec throws a warning (non-fatal error). More...
+  +#define TJFLAG_PROGRESSIVE + Use progressive entropy coding in JPEG images generated by the compression and transform functions. More...
+  +#define TJ_NUMERR + The number of error codes. More...
+  #define TJ_NUMXOP  The number of transform operations. More...
  #define TJXOPT_PERFECT - This option will cause tjTransform() to return an error if the transform is not perfect. More...
+ This option will cause tjTransform() to return an error if the transform is not perfect. More...
  #define TJXOPT_TRIM - This option will cause tjTransform() to discard any partial MCU blocks that cannot be transformed. More...
+ This option will cause tjTransform() to discard any partial MCU blocks that cannot be transformed. More...
  #define TJXOPT_CROP  This option will enable lossless cropping. More...
@@ -144,8 +153,14 @@ Macros  This option will discard the color data in the input image and produce a grayscale output image. More...
  #define TJXOPT_NOOUTPUT - This option will prevent tjTransform() from outputting a JPEG image for this particular transform (this can be used in conjunction with a custom filter to capture the transformed DCT coefficients without transcoding them.) More...
+ This option will prevent tjTransform() from outputting a JPEG image for this particular transform (this can be used in conjunction with a custom filter to capture the transformed DCT coefficients without transcoding them.) More...
  +#define TJXOPT_PROGRESSIVE + This option will enable progressive entropy coding in the output image generated by this particular transform. More...
+  +#define TJXOPT_COPYNONE + This option will prevent tjTransform() from copying any extra markers (including EXIF and ICC profile data) from the source image to the output image. More...
+  #define TJPAD(width)  Pad the given width to the nearest 32-bit boundary. More...
  @@ -190,7 +205,9 @@ Enumerations   TJPF_BGRA, TJPF_ABGR, TJPF_ARGB, -TJPF_CMYK +TJPF_CMYK, +
+  TJPF_UNKNOWN
}  Pixel formats. More...
@@ -206,6 +223,11 @@ Enumerations }  JPEG colorspaces. More...
  +enum  TJERR { TJERR_WARNING, +TJERR_FATAL + } + Error codes. More...
+  enum  TJXOP {
  TJXOP_NONE, TJXOP_HFLIP, @@ -218,86 +240,95 @@ Enumerations TJXOP_ROT270
} - Transform operations for tjTransform() More...
+ Transform operations for tjTransform() More...
  - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Functions

DLLEXPORT tjhandle DLLCALL tjInitCompress (void)
 Create a TurboJPEG compressor instance. More...
 
DLLEXPORT int DLLCALL tjCompress2 (tjhandle handle, const unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags)
 Compress an RGB, grayscale, or CMYK image into a JPEG image. More...
 
DLLEXPORT int DLLCALL tjCompressFromYUV (tjhandle handle, const unsigned char *srcBuf, int width, int pad, int height, int subsamp, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, int flags)
 Compress a YUV planar image into a JPEG image. More...
 
DLLEXPORT int DLLCALL tjCompressFromYUVPlanes (tjhandle handle, const unsigned char **srcPlanes, int width, const int *strides, int height, int subsamp, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, int flags)
 Compress a set of Y, U (Cb), and V (Cr) image planes into a JPEG image. More...
 
DLLEXPORT unsigned long DLLCALL tjBufSize (int width, int height, int jpegSubsamp)
 The maximum size of the buffer (in bytes) required to hold a JPEG image with the given parameters. More...
 
DLLEXPORT unsigned long DLLCALL tjBufSizeYUV2 (int width, int pad, int height, int subsamp)
 The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters. More...
 
DLLEXPORT unsigned long DLLCALL tjPlaneSizeYUV (int componentID, int width, int stride, int height, int subsamp)
 The size of the buffer (in bytes) required to hold a YUV image plane with the given parameters. More...
 
DLLEXPORT tjhandle tjInitCompress (void)
 Create a TurboJPEG compressor instance. More...
 
DLLEXPORT int tjCompress2 (tjhandle handle, const unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags)
 Compress an RGB, grayscale, or CMYK image into a JPEG image. More...
 
DLLEXPORT int tjCompressFromYUV (tjhandle handle, const unsigned char *srcBuf, int width, int pad, int height, int subsamp, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, int flags)
 Compress a YUV planar image into a JPEG image. More...
 
DLLEXPORT int tjCompressFromYUVPlanes (tjhandle handle, const unsigned char **srcPlanes, int width, const int *strides, int height, int subsamp, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, int flags)
 Compress a set of Y, U (Cb), and V (Cr) image planes into a JPEG image. More...
 
DLLEXPORT unsigned long tjBufSize (int width, int height, int jpegSubsamp)
 The maximum size of the buffer (in bytes) required to hold a JPEG image with the given parameters. More...
 
DLLEXPORT unsigned long tjBufSizeYUV2 (int width, int pad, int height, int subsamp)
 The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters. More...
 
DLLEXPORT unsigned long tjPlaneSizeYUV (int componentID, int width, int stride, int height, int subsamp)
 The size of the buffer (in bytes) required to hold a YUV image plane with the given parameters. More...
 
DLLEXPORT int tjPlaneWidth (int componentID, int width, int subsamp)
 The plane width of a YUV image plane with the given parameters. More...
 
DLLEXPORT int tjPlaneHeight (int componentID, int height, int subsamp)
 The plane height of a YUV image plane with the given parameters. More...
 
DLLEXPORT int DLLCALL tjEncodeYUV3 (tjhandle handle, const unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf, int pad, int subsamp, int flags)
 Encode an RGB or grayscale image into a YUV planar image. More...
 
DLLEXPORT int DLLCALL tjEncodeYUVPlanes (tjhandle handle, const unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char **dstPlanes, int *strides, int subsamp, int flags)
 Encode an RGB or grayscale image into separate Y, U (Cb), and V (Cr) image planes. More...
 
DLLEXPORT tjhandle DLLCALL tjInitDecompress (void)
 Create a TurboJPEG decompressor instance. More...
 
DLLEXPORT int DLLCALL tjDecompressHeader3 (tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height, int *jpegSubsamp, int *jpegColorspace)
 Retrieve information about a JPEG image without decompressing it. More...
 
DLLEXPORT tjscalingfactor *DLLCALL tjGetScalingFactors (int *numscalingfactors)
 Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of TurboJPEG supports. More...
 
DLLEXPORT int DLLCALL tjDecompress2 (tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags)
 Decompress a JPEG image to an RGB, grayscale, or CMYK image. More...
 
DLLEXPORT int DLLCALL tjDecompressToYUV2 (tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int width, int pad, int height, int flags)
 Decompress a JPEG image to a YUV planar image. More...
 
DLLEXPORT int DLLCALL tjDecompressToYUVPlanes (tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char **dstPlanes, int width, int *strides, int height, int flags)
 Decompress a JPEG image into separate Y, U (Cb), and V (Cr) image planes. More...
 
DLLEXPORT int DLLCALL tjDecodeYUV (tjhandle handle, const unsigned char *srcBuf, int pad, int subsamp, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags)
 Decode a YUV planar image into an RGB or grayscale image. More...
 
DLLEXPORT int DLLCALL tjDecodeYUVPlanes (tjhandle handle, const unsigned char **srcPlanes, const int *strides, int subsamp, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags)
 Decode a set of Y, U (Cb), and V (Cr) image planes into an RGB or grayscale image. More...
 
DLLEXPORT tjhandle DLLCALL tjInitTransform (void)
 Create a new TurboJPEG transformer instance. More...
 
DLLEXPORT int DLLCALL tjTransform (tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, int n, unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *transforms, int flags)
 Losslessly transform a JPEG image into another JPEG image. More...
 
DLLEXPORT int DLLCALL tjDestroy (tjhandle handle)
 Destroy a TurboJPEG compressor, decompressor, or transformer instance. More...
 
DLLEXPORT unsigned char *DLLCALL tjAlloc (int bytes)
 Allocate an image buffer for use with TurboJPEG. More...
 
DLLEXPORT void DLLCALL tjFree (unsigned char *buffer)
 Free an image buffer previously allocated by TurboJPEG. More...
 
DLLEXPORT char *DLLCALL tjGetErrorStr (void)
 Returns a descriptive error message explaining why the last command failed. More...
 
DLLEXPORT int tjEncodeYUV3 (tjhandle handle, const unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf, int pad, int subsamp, int flags)
 Encode an RGB or grayscale image into a YUV planar image. More...
 
DLLEXPORT int tjEncodeYUVPlanes (tjhandle handle, const unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char **dstPlanes, int *strides, int subsamp, int flags)
 Encode an RGB or grayscale image into separate Y, U (Cb), and V (Cr) image planes. More...
 
DLLEXPORT tjhandle tjInitDecompress (void)
 Create a TurboJPEG decompressor instance. More...
 
DLLEXPORT int tjDecompressHeader3 (tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height, int *jpegSubsamp, int *jpegColorspace)
 Retrieve information about a JPEG image without decompressing it. More...
 
DLLEXPORT tjscalingfactortjGetScalingFactors (int *numscalingfactors)
 Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of TurboJPEG supports. More...
 
DLLEXPORT int tjDecompress2 (tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags)
 Decompress a JPEG image to an RGB, grayscale, or CMYK image. More...
 
DLLEXPORT int tjDecompressToYUV2 (tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int width, int pad, int height, int flags)
 Decompress a JPEG image to a YUV planar image. More...
 
DLLEXPORT int tjDecompressToYUVPlanes (tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char **dstPlanes, int width, int *strides, int height, int flags)
 Decompress a JPEG image into separate Y, U (Cb), and V (Cr) image planes. More...
 
DLLEXPORT int tjDecodeYUV (tjhandle handle, const unsigned char *srcBuf, int pad, int subsamp, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags)
 Decode a YUV planar image into an RGB or grayscale image. More...
 
DLLEXPORT int tjDecodeYUVPlanes (tjhandle handle, const unsigned char **srcPlanes, const int *strides, int subsamp, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags)
 Decode a set of Y, U (Cb), and V (Cr) image planes into an RGB or grayscale image. More...
 
DLLEXPORT tjhandle tjInitTransform (void)
 Create a new TurboJPEG transformer instance. More...
 
DLLEXPORT int tjTransform (tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, int n, unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *transforms, int flags)
 Losslessly transform a JPEG image into another JPEG image. More...
 
DLLEXPORT int tjDestroy (tjhandle handle)
 Destroy a TurboJPEG compressor, decompressor, or transformer instance. More...
 
DLLEXPORT unsigned char * tjAlloc (int bytes)
 Allocate an image buffer for use with TurboJPEG. More...
 
DLLEXPORT unsigned char * tjLoadImage (const char *filename, int *width, int align, int *height, int *pixelFormat, int flags)
 Load an uncompressed image from disk into memory. More...
 
DLLEXPORT int tjSaveImage (const char *filename, unsigned char *buffer, int width, int pitch, int height, int pixelFormat, int flags)
 Save an uncompressed image from memory to disk. More...
 
DLLEXPORT void tjFree (unsigned char *buffer)
 Free an image buffer previously allocated by TurboJPEG. More...
 
DLLEXPORT char * tjGetErrorStr2 (tjhandle handle)
 Returns a descriptive error message explaining why the last command failed. More...
 
DLLEXPORT int tjGetErrorCode (tjhandle handle)
 Returns a code indicating the severity of the last error. More...
 
@@ -316,6 +347,9 @@ Variables + + + @@ -343,6 +377,20 @@ Variables + +
+
+

Variables

static const int tjBlueOffset [TJ_NUMPF]
 Blue offset (in bytes) for a given pixel format. More...
 
static const int tjAlphaOffset [TJ_NUMPF]
 Alpha offset (in bytes) for a given pixel format. More...
 
static const int tjPixelSize [TJ_NUMPF]
 Pixel size (in bytes) for a given pixel format. More...
 
+ + + +
#define TJ_NUMERR
+
+ +

The number of error codes.

+ +
+
@@ -459,6 +507,36 @@ Variables
+ +
+
+ + + + +
#define TJFLAG_PROGRESSIVE
+
+ +

Use progressive entropy coding in JPEG images generated by the compression and transform functions.

+

Progressive entropy coding will generally improve compression relative to baseline entropy coding (the default), but it will reduce compression and decompression performance considerably.

+ +
+
+ +
+
+ + + + +
#define TJFLAG_STOPONWARNING
+
+ +

Immediately discontinue the current compression/decompression/transform operation if the underlying codec throws a warning (non-fatal error).

+

The default behavior is to allow the operation to complete unless a fatal error is encountered.

+ +
+
@@ -506,6 +584,20 @@ Variables
+ +
+
+ + + + +
#define TJXOPT_COPYNONE
+
+ +

This option will prevent tjTransform() from copying any extra markers (including EXIF and ICC profile data) from the source image to the output image.

+ +
+
@@ -517,7 +609,7 @@ Variables

This option will enable lossless cropping.

-

See tjTransform() for more information.

+

See tjTransform() for more information.

@@ -545,7 +637,7 @@ Variables
-

This option will prevent tjTransform() from outputting a JPEG image for this particular transform (this can be used in conjunction with a custom filter to capture the transformed DCT coefficients without transcoding them.)

+

This option will prevent tjTransform() from outputting a JPEG image for this particular transform (this can be used in conjunction with a custom filter to capture the transformed DCT coefficients without transcoding them.)

@@ -559,11 +651,26 @@ Variables
-

This option will cause tjTransform() to return an error if the transform is not perfect.

+

This option will cause tjTransform() to return an error if the transform is not perfect.

Lossless transforms operate on MCU blocks, whose size depends on the level of chrominance subsampling used (see tjMCUWidth and tjMCUHeight.) If the image's width or height is not evenly divisible by the MCU block size, then there will be partial MCU blocks on the right and/or bottom edges. It is not possible to move these partial MCU blocks to the top or left of the image, so any transform that would require that is "imperfect." If this option is not specified, then any partial MCU blocks that cannot be transformed will be left in place, which will create odd-looking strips on the right or bottom edge of the image.

+ +
+
+ + + + +
#define TJXOPT_PROGRESSIVE
+
+ +

This option will enable progressive entropy coding in the output image generated by this particular transform.

+

Progressive entropy coding will generally improve compression relative to baseline entropy coding (the default), but it will reduce compression and decompression performance considerably.

+ +
+
@@ -574,7 +681,7 @@ Variables
-

This option will cause tjTransform() to discard any partial MCU blocks that cannot be transformed.

+

This option will cause tjTransform() to discard any partial MCU blocks that cannot be transformed.

@@ -644,6 +751,28 @@ Variables + +
+
+ + + + +
enum TJERR
+
+ +

Error codes.

+ + + +
Enumerator
TJERR_WARNING  +

The error was non-fatal and recoverable, but the image may still be corrupt.

+
TJERR_FATAL  +

The error was fatal and non-recoverable.

+
+ +
+
@@ -704,6 +833,10 @@ Variables

CMYK pixel format.

Unlike RGB, which is an additive color model used primarily for display, CMYK (Cyan/Magenta/Yellow/Key) is a subtractive color model used primarily for printing. In the CMYK color model, the value of each color component typically corresponds to an amount of cyan, magenta, yellow, or black ink that is applied to a white background. In order to convert between CMYK and RGB, it is necessary to use a color management system (CMS.) A CMS will attempt to map colors within the printer's gamut to perceptually similar colors in the display's gamut and vice versa, but the mapping is typically not 1:1 or reversible, nor can it be defined with a simple formula. Thus, such a conversion is out of scope for a codec library. However, the TurboJPEG API allows for compressing CMYK pixels into a YCCK JPEG image (see TJCS_YCCK) and decompressing YCCK JPEG images into CMYK pixels.

+TJPF_UNKNOWN  +

Unknown pixel format.

+

Currently this is only used by tjLoadImage().

+
@@ -761,7 +894,7 @@ Variables
-

Transform operations for tjTransform()

+

Transform operations for tjTransform()

Function Documentation

- +
Enumerator
TJXOP_NONE 

Do not transform the position of the image pixels.

@@ -797,12 +930,12 @@ Variables
- + @@ -820,16 +953,16 @@ Variables
Returns
a pointer to a newly-allocated buffer with the specified number of bytes.
-
See Also
tjFree()
+
See Also
tjFree()
- +
DLLEXPORT unsigned char* DLLCALL tjAlloc DLLEXPORT unsigned char* tjAlloc ( int  bytes)
- + @@ -868,12 +1001,12 @@ Variables - +
DLLEXPORT unsigned long DLLCALL tjBufSize DLLEXPORT unsigned long tjBufSize ( int  width,
- + @@ -918,12 +1051,12 @@ Variables - +
DLLEXPORT unsigned long DLLCALL tjBufSizeYUV2 DLLEXPORT unsigned long tjBufSizeYUV2 ( int  width,
- + @@ -1006,9 +1139,9 @@ Variables @@ -1018,16 +1151,16 @@ If you choose option 1, *jpegSize should be set to the size of your
DLLEXPORT int DLLCALL tjCompress2 DLLEXPORT int tjCompress2 ( tjhandle  handle,
heightheight (in pixels) of the source image
pixelFormatpixel format of the source image (see Pixel formats.)
jpegBufaddress of a pointer to an image buffer that will receive the JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to accommodate the size of the JPEG image. Thus, you can choose to:
    -
  1. pre-allocate the JPEG buffer with an arbitrary size using tjAlloc() and let TurboJPEG grow the buffer as needed,
  2. +
  3. pre-allocate the JPEG buffer with an arbitrary size using tjAlloc() and let TurboJPEG grow the buffer as needed,
  4. set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer for you, or
  5. -
  6. pre-allocate the buffer to a "worst case" size determined by calling tjBufSize(). This should ensure that the buffer never has to be re-allocated (setting TJFLAG_NOREALLOC guarantees that it won't be.)
  7. +
  8. pre-allocate the buffer to a "worst case" size determined by calling tjBufSize(). This should ensure that the buffer never has to be re-allocated (setting TJFLAG_NOREALLOC guarantees that it won't be.)
If you choose option 1, *jpegSize should be set to the size of your pre-allocated buffer. In any case, unless you have set TJFLAG_NOREALLOC, you should always check *jpegBuf upon return from this function, as it may have changed.
jpegSizepointer to an unsigned long variable that holds the size of the JPEG image buffer. If *jpegBuf points to a pre-allocated buffer, then *jpegSize should be set to the size of the buffer. Upon return, *jpegSize will contain the size of the JPEG image (in bytes.) If *jpegBuf points to a JPEG image buffer that is being reused from a previous call to one of the JPEG compression functions, then *jpegSize is ignored.
-
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr().)
+
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr2() and tjGetErrorCode().)
- +
- + @@ -1098,15 +1231,15 @@ If you choose option 1, *jpegSize should be set to the size of your
Parameters
DLLEXPORT int DLLCALL tjCompressFromYUV DLLEXPORT int tjCompressFromYUV ( tjhandle  handle,
- + @@ -1115,16 +1248,16 @@ If you choose option 1, *jpegSize should be set to the size of your
handlea handle to a TurboJPEG compressor or transformer instance
srcBufpointer to an image buffer containing a YUV planar image to be compressed. The size of this buffer should match the value returned by tjBufSizeYUV2() for the given image width, height, padding, and level of chrominance subsampling. The Y, U (Cb), and V (Cr) image planes should be stored sequentially in the source buffer (refer to YUV Image Format Notes.)
srcBufpointer to an image buffer containing a YUV planar image to be compressed. The size of this buffer should match the value returned by tjBufSizeYUV2() for the given image width, height, padding, and level of chrominance subsampling. The Y, U (Cb), and V (Cr) image planes should be stored sequentially in the source buffer (refer to YUV Image Format Notes.)
widthwidth (in pixels) of the source image. If the width is not an even multiple of the MCU block width (see tjMCUWidth), then an intermediate buffer copy will be performed within TurboJPEG.
padthe line padding used in the source image. For instance, if each line in each plane of the YUV image is padded to the nearest multiple of 4 bytes, then pad should be set to 4.
heightheight (in pixels) of the source image. If the height is not an even multiple of the MCU block height (see tjMCUHeight), then an intermediate buffer copy will be performed within TurboJPEG.
subsampthe level of chrominance subsampling used in the source image (see Chrominance subsampling options.)
jpegBufaddress of a pointer to an image buffer that will receive the JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to accommodate the size of the JPEG image. Thus, you can choose to:
    -
  1. pre-allocate the JPEG buffer with an arbitrary size using tjAlloc() and let TurboJPEG grow the buffer as needed,
  2. +
  3. pre-allocate the JPEG buffer with an arbitrary size using tjAlloc() and let TurboJPEG grow the buffer as needed,
  4. set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer for you, or
  5. -
  6. pre-allocate the buffer to a "worst case" size determined by calling tjBufSize(). This should ensure that the buffer never has to be re-allocated (setting TJFLAG_NOREALLOC guarantees that it won't be.)
  7. +
  8. pre-allocate the buffer to a "worst case" size determined by calling tjBufSize(). This should ensure that the buffer never has to be re-allocated (setting TJFLAG_NOREALLOC guarantees that it won't be.)
If you choose option 1, *jpegSize should be set to the size of your pre-allocated buffer. In any case, unless you have set TJFLAG_NOREALLOC, you should always check *jpegBuf upon return from this function, as it may have changed.
jpegSizepointer to an unsigned long variable that holds the size of the JPEG image buffer. If *jpegBuf points to a pre-allocated buffer, then *jpegSize should be set to the size of the buffer. Upon return, *jpegSize will contain the size of the JPEG image (in bytes.) If *jpegBuf points to a JPEG image buffer that is being reused from a previous call to one of the JPEG compression functions, then *jpegSize is ignored.
-
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr().)
+
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr2() and tjGetErrorCode().)
- +
- + @@ -1195,15 +1328,15 @@ If you choose option 1, *jpegSize should be set to the size of your
Parameters
DLLEXPORT int DLLCALL tjCompressFromYUVPlanes DLLEXPORT int tjCompressFromYUVPlanes ( tjhandle  handle,
- + @@ -1212,16 +1345,16 @@ If you choose option 1, *jpegSize should be set to the size of your
handlea handle to a TurboJPEG compressor or transformer instance
srcPlanesan array of pointers to Y, U (Cb), and V (Cr) image planes (or just a Y plane, if compressing a grayscale image) that contain a YUV image to be compressed. These planes can be contiguous or non-contiguous in memory. The size of each plane should match the value returned by tjPlaneSizeYUV() for the given image width, height, strides, and level of chrominance subsampling. Refer to YUV Image Format Notes for more details.
srcPlanesan array of pointers to Y, U (Cb), and V (Cr) image planes (or just a Y plane, if compressing a grayscale image) that contain a YUV image to be compressed. These planes can be contiguous or non-contiguous in memory. The size of each plane should match the value returned by tjPlaneSizeYUV() for the given image width, height, strides, and level of chrominance subsampling. Refer to YUV Image Format Notes for more details.
widthwidth (in pixels) of the source image. If the width is not an even multiple of the MCU block width (see tjMCUWidth), then an intermediate buffer copy will be performed within TurboJPEG.
stridesan array of integers, each specifying the number of bytes per line in the corresponding plane of the YUV source image. Setting the stride for any plane to 0 is the same as setting it to the plane width (see YUV Image Format Notes.) If strides is NULL, then the strides for all planes will be set to their respective plane widths. You can adjust the strides in order to specify an arbitrary amount of line padding in each plane or to create a JPEG image from a subregion of a larger YUV planar image.
heightheight (in pixels) of the source image. If the height is not an even multiple of the MCU block height (see tjMCUHeight), then an intermediate buffer copy will be performed within TurboJPEG.
subsampthe level of chrominance subsampling used in the source image (see Chrominance subsampling options.)
jpegBufaddress of a pointer to an image buffer that will receive the JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to accommodate the size of the JPEG image. Thus, you can choose to:
    -
  1. pre-allocate the JPEG buffer with an arbitrary size using tjAlloc() and let TurboJPEG grow the buffer as needed,
  2. +
  3. pre-allocate the JPEG buffer with an arbitrary size using tjAlloc() and let TurboJPEG grow the buffer as needed,
  4. set *jpegBuf to NULL to tell TurboJPEG to allocate the buffer for you, or
  5. -
  6. pre-allocate the buffer to a "worst case" size determined by calling tjBufSize(). This should ensure that the buffer never has to be re-allocated (setting TJFLAG_NOREALLOC guarantees that it won't be.)
  7. +
  8. pre-allocate the buffer to a "worst case" size determined by calling tjBufSize(). This should ensure that the buffer never has to be re-allocated (setting TJFLAG_NOREALLOC guarantees that it won't be.)
If you choose option 1, *jpegSize should be set to the size of your pre-allocated buffer. In any case, unless you have set TJFLAG_NOREALLOC, you should always check *jpegBuf upon return from this function, as it may have changed.
jpegSizepointer to an unsigned long variable that holds the size of the JPEG image buffer. If *jpegBuf points to a pre-allocated buffer, then *jpegSize should be set to the size of the buffer. Upon return, *jpegSize will contain the size of the JPEG image (in bytes.) If *jpegBuf points to a JPEG image buffer that is being reused from a previous call to one of the JPEG compression functions, then *jpegSize is ignored.
-
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr().)
+
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr2() and tjGetErrorCode().)
- +
- + @@ -1293,7 +1426,7 @@ If you choose option 1, *jpegSize should be set to the size of your
Parameters
DLLEXPORT int DLLCALL tjDecodeYUV DLLEXPORT int tjDecodeYUV ( tjhandle  handle,
- + @@ -1305,16 +1438,16 @@ If you choose option 1, *jpegSize should be set to the size of your
handlea handle to a TurboJPEG decompressor or transformer instance
srcBufpointer to an image buffer containing a YUV planar image to be decoded. The size of this buffer should match the value returned by tjBufSizeYUV2() for the given image width, height, padding, and level of chrominance subsampling. The Y, U (Cb), and V (Cr) image planes should be stored sequentially in the source buffer (refer to YUV Image Format Notes.)
srcBufpointer to an image buffer containing a YUV planar image to be decoded. The size of this buffer should match the value returned by tjBufSizeYUV2() for the given image width, height, padding, and level of chrominance subsampling. The Y, U (Cb), and V (Cr) image planes should be stored sequentially in the source buffer (refer to YUV Image Format Notes.)
padUse this parameter to specify that the width of each line in each plane of the YUV source image is padded to the nearest multiple of this number of bytes (must be a power of 2.)
subsampthe level of chrominance subsampling used in the YUV source image (see Chrominance subsampling options.)
dstBufpointer to an image buffer that will receive the decoded image. This buffer should normally be pitch * height bytes in size, but the dstBuf pointer can also be used to decode into a specific region of a larger buffer.
-
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr().)
+
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr2() and tjGetErrorCode().)
- +
- + @@ -1386,7 +1519,7 @@ If you choose option 1, *jpegSize should be set to the size of your
Parameters
DLLEXPORT int DLLCALL tjDecodeYUVPlanes DLLEXPORT int tjDecodeYUVPlanes ( tjhandle  handle,
- + @@ -1398,16 +1531,16 @@ If you choose option 1, *jpegSize should be set to the size of your
handlea handle to a TurboJPEG decompressor or transformer instance
srcPlanesan array of pointers to Y, U (Cb), and V (Cr) image planes (or just a Y plane, if decoding a grayscale image) that contain a YUV image to be decoded. These planes can be contiguous or non-contiguous in memory. The size of each plane should match the value returned by tjPlaneSizeYUV() for the given image width, height, strides, and level of chrominance subsampling. Refer to YUV Image Format Notes for more details.
srcPlanesan array of pointers to Y, U (Cb), and V (Cr) image planes (or just a Y plane, if decoding a grayscale image) that contain a YUV image to be decoded. These planes can be contiguous or non-contiguous in memory. The size of each plane should match the value returned by tjPlaneSizeYUV() for the given image width, height, strides, and level of chrominance subsampling. Refer to YUV Image Format Notes for more details.
stridesan array of integers, each specifying the number of bytes per line in the corresponding plane of the YUV source image. Setting the stride for any plane to 0 is the same as setting it to the plane width (see YUV Image Format Notes.) If strides is NULL, then the strides for all planes will be set to their respective plane widths. You can adjust the strides in order to specify an arbitrary amount of line padding in each plane or to decode a subregion of a larger YUV planar image.
subsampthe level of chrominance subsampling used in the YUV source image (see Chrominance subsampling options.)
dstBufpointer to an image buffer that will receive the decoded image. This buffer should normally be pitch * height bytes in size, but the dstBuf pointer can also be used to decode into a specific region of a larger buffer.
-
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr().)
+
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr2() and tjGetErrorCode().)
- +
- + @@ -1474,25 +1607,25 @@ If you choose option 1, *jpegSize should be set to the size of your - + - +
DLLEXPORT int DLLCALL tjDecompress2 DLLEXPORT int tjDecompress2 ( tjhandle  handle,
handlea handle to a TurboJPEG decompressor or transformer instance
jpegBufpointer to a buffer containing the JPEG image to decompress
jpegSizesize of the JPEG image (in bytes)
dstBufpointer to an image buffer that will receive the decompressed image. This buffer should normally be pitch * scaledHeight bytes in size, where scaledHeight can be determined by calling TJSCALED() with the JPEG image height and one of the scaling factors returned by tjGetScalingFactors(). The dstBuf pointer may also be used to decompress into a specific region of a larger buffer.
dstBufpointer to an image buffer that will receive the decompressed image. This buffer should normally be pitch * scaledHeight bytes in size, where scaledHeight can be determined by calling TJSCALED() with the JPEG image height and one of the scaling factors returned by tjGetScalingFactors(). The dstBuf pointer may also be used to decompress into a specific region of a larger buffer.
widthdesired width (in pixels) of the destination image. If this is different than the width of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired width. If width is set to 0, then only the height will be considered when determining the scaled image size.
pitchbytes per line in the destination image. Normally, this is scaledWidth * tjPixelSize[pixelFormat] if the decompressed image is unpadded, else TJPAD(scaledWidth * tjPixelSize[pixelFormat]) if each line of the decompressed image is padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. (NOTE: scaledWidth can be determined by calling TJSCALED() with the JPEG image width and one of the scaling factors returned by tjGetScalingFactors().) You can also be clever and use the pitch parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to scaledWidth * tjPixelSize[pixelFormat].
pitchbytes per line in the destination image. Normally, this is scaledWidth * tjPixelSize[pixelFormat] if the decompressed image is unpadded, else TJPAD(scaledWidth * tjPixelSize[pixelFormat]) if each line of the decompressed image is padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. (NOTE: scaledWidth can be determined by calling TJSCALED() with the JPEG image width and one of the scaling factors returned by tjGetScalingFactors().) You can also be clever and use the pitch parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to scaledWidth * tjPixelSize[pixelFormat].
heightdesired height (in pixels) of the destination image. If this is different than the height of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired height. If height is set to 0, then only the width will be considered when determining the scaled image size.
pixelFormatpixel format of the destination image (see Pixel formats.)
flagsthe bitwise OR of one or more of the flags
-
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr().)
+
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr2() and tjGetErrorCode().)
- +
- + @@ -1554,16 +1687,16 @@ If you choose option 1, *jpegSize should be set to the size of your
DLLEXPORT int DLLCALL tjDecompressHeader3 DLLEXPORT int tjDecompressHeader3 ( tjhandle  handle,
-
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr().)
+
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr2() and tjGetErrorCode().)
- +
- + @@ -1625,7 +1758,7 @@ If you choose option 1, *jpegSize should be set to the size of your - + @@ -1633,16 +1766,16 @@ If you choose option 1, *jpegSize should be set to the size of your
DLLEXPORT int DLLCALL tjDecompressToYUV2 DLLEXPORT int tjDecompressToYUV2 ( tjhandle  handle,
handlea handle to a TurboJPEG decompressor or transformer instance
jpegBufpointer to a buffer containing the JPEG image to decompress
jpegSizesize of the JPEG image (in bytes)
dstBufpointer to an image buffer that will receive the YUV image. Use tjBufSizeYUV2() to determine the appropriate size for this buffer based on the image width, height, padding, and level of subsampling. The Y, U (Cb), and V (Cr) image planes will be stored sequentially in the buffer (refer to YUV Image Format Notes.)
dstBufpointer to an image buffer that will receive the YUV image. Use tjBufSizeYUV2() to determine the appropriate size for this buffer based on the image width, height, padding, and level of subsampling. The Y, U (Cb), and V (Cr) image planes will be stored sequentially in the buffer (refer to YUV Image Format Notes.)
widthdesired width (in pixels) of the YUV image. If this is different than the width of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired width. If width is set to 0, then only the height will be considered when determining the scaled image size. If the scaled width is not an even multiple of the MCU block width (see tjMCUWidth), then an intermediate buffer copy will be performed within TurboJPEG.
padthe width of each line in each plane of the YUV image will be padded to the nearest multiple of this number of bytes (must be a power of 2.) To generate images suitable for X Video, pad should be set to 4.
heightdesired height (in pixels) of the YUV image. If this is different than the height of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired height. If height is set to 0, then only the width will be considered when determining the scaled image size. If the scaled height is not an even multiple of the MCU block height (see tjMCUHeight), then an intermediate buffer copy will be performed within TurboJPEG.
-
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr().)
+
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr2() and tjGetErrorCode().)
- +
- + @@ -1704,7 +1837,7 @@ If you choose option 1, *jpegSize should be set to the size of your - + @@ -1712,16 +1845,16 @@ If you choose option 1, *jpegSize should be set to the size of your
DLLEXPORT int DLLCALL tjDecompressToYUVPlanes DLLEXPORT int tjDecompressToYUVPlanes ( tjhandle  handle,
handlea handle to a TurboJPEG decompressor or transformer instance
jpegBufpointer to a buffer containing the JPEG image to decompress
jpegSizesize of the JPEG image (in bytes)
dstPlanesan array of pointers to Y, U (Cb), and V (Cr) image planes (or just a Y plane, if decompressing a grayscale image) that will receive the YUV image. These planes can be contiguous or non-contiguous in memory. Use tjPlaneSizeYUV() to determine the appropriate size for each plane based on the scaled image width, scaled image height, strides, and level of chrominance subsampling. Refer to YUV Image Format Notes for more details.
dstPlanesan array of pointers to Y, U (Cb), and V (Cr) image planes (or just a Y plane, if decompressing a grayscale image) that will receive the YUV image. These planes can be contiguous or non-contiguous in memory. Use tjPlaneSizeYUV() to determine the appropriate size for each plane based on the scaled image width, scaled image height, strides, and level of chrominance subsampling. Refer to YUV Image Format Notes for more details.
widthdesired width (in pixels) of the YUV image. If this is different than the width of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired width. If width is set to 0, then only the height will be considered when determining the scaled image size. If the scaled width is not an even multiple of the MCU block width (see tjMCUWidth), then an intermediate buffer copy will be performed within TurboJPEG.
stridesan array of integers, each specifying the number of bytes per line in the corresponding plane of the output image. Setting the stride for any plane to 0 is the same as setting it to the scaled plane width (see YUV Image Format Notes.) If strides is NULL, then the strides for all planes will be set to their respective scaled plane widths. You can adjust the strides in order to add an arbitrary amount of line padding to each plane or to decompress the JPEG image into a subregion of a larger YUV planar image.
heightdesired height (in pixels) of the YUV image. If this is different than the height of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired height. If height is set to 0, then only the width will be considered when determining the scaled image size. If the scaled height is not an even multiple of the MCU block height (see tjMCUHeight), then an intermediate buffer copy will be performed within TurboJPEG.
-
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr().)
+
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr2() and tjGetErrorCode().)
- +
- + @@ -1737,16 +1870,16 @@ If you choose option 1, *jpegSize should be set to the size of your
DLLEXPORT int DLLCALL tjDestroy DLLEXPORT int tjDestroy ( tjhandle  handle)
-
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr().)
+
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr2().)
- +
- + @@ -1823,23 +1956,23 @@ If you choose option 1, *jpegSize should be set to the size of your - +
DLLEXPORT int DLLCALL tjEncodeYUV3 DLLEXPORT int tjEncodeYUV3 ( tjhandle  handle,
pitchbytes per line in the source image. Normally, this should be width * tjPixelSize[pixelFormat] if the image is unpadded, or TJPAD(width * tjPixelSize[pixelFormat]) if each line of the image is padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. You can also be clever and use this parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to width * tjPixelSize[pixelFormat].
heightheight (in pixels) of the source image
pixelFormatpixel format of the source image (see Pixel formats.)
dstBufpointer to an image buffer that will receive the YUV image. Use tjBufSizeYUV2() to determine the appropriate size for this buffer based on the image width, height, padding, and level of chrominance subsampling. The Y, U (Cb), and V (Cr) image planes will be stored sequentially in the buffer (refer to YUV Image Format Notes.)
dstBufpointer to an image buffer that will receive the YUV image. Use tjBufSizeYUV2() to determine the appropriate size for this buffer based on the image width, height, padding, and level of chrominance subsampling. The Y, U (Cb), and V (Cr) image planes will be stored sequentially in the buffer (refer to YUV Image Format Notes.)
padthe width of each line in each plane of the YUV image will be padded to the nearest multiple of this number of bytes (must be a power of 2.) To generate images suitable for X Video, pad should be set to 4.
subsampthe level of chrominance subsampling to be used when generating the YUV image (see Chrominance subsampling options.) To generate images suitable for X Video, subsamp should be set to TJSAMP_420. This produces an image compatible with the I420 (AKA "YUV420P") format.
flagsthe bitwise OR of one or more of the flags
-
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr().)
+
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr2() and tjGetErrorCode().)
- +
- + @@ -1916,23 +2049,23 @@ If you choose option 1, *jpegSize should be set to the size of your - +
DLLEXPORT int DLLCALL tjEncodeYUVPlanes DLLEXPORT int tjEncodeYUVPlanes ( tjhandle  handle,
pitchbytes per line in the source image. Normally, this should be width * tjPixelSize[pixelFormat] if the image is unpadded, or TJPAD(width * tjPixelSize[pixelFormat]) if each line of the image is padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. You can also be clever and use this parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to width * tjPixelSize[pixelFormat].
heightheight (in pixels) of the source image
pixelFormatpixel format of the source image (see Pixel formats.)
dstPlanesan array of pointers to Y, U (Cb), and V (Cr) image planes (or just a Y plane, if generating a grayscale image) that will receive the encoded image. These planes can be contiguous or non-contiguous in memory. Use tjPlaneSizeYUV() to determine the appropriate size for each plane based on the image width, height, strides, and level of chrominance subsampling. Refer to YUV Image Format Notes for more details.
dstPlanesan array of pointers to Y, U (Cb), and V (Cr) image planes (or just a Y plane, if generating a grayscale image) that will receive the encoded image. These planes can be contiguous or non-contiguous in memory. Use tjPlaneSizeYUV() to determine the appropriate size for each plane based on the image width, height, strides, and level of chrominance subsampling. Refer to YUV Image Format Notes for more details.
stridesan array of integers, each specifying the number of bytes per line in the corresponding plane of the output image. Setting the stride for any plane to 0 is the same as setting it to the plane width (see YUV Image Format Notes.) If strides is NULL, then the strides for all planes will be set to their respective plane widths. You can adjust the strides in order to add an arbitrary amount of line padding to each plane or to encode an RGB or grayscale image into a subregion of a larger YUV planar image.
subsampthe level of chrominance subsampling to be used when generating the YUV image (see Chrominance subsampling options.) To generate images suitable for X Video, subsamp should be set to TJSAMP_420. This produces an image compatible with the I420 (AKA "YUV420P") format.
flagsthe bitwise OR of one or more of the flags
-
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr().)
+
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr2() and tjGetErrorCode().)
- +
- + @@ -1942,42 +2075,74 @@ If you choose option 1, *jpegSize should be set to the size of your

Free an image buffer previously allocated by TurboJPEG.

-

You should always use this function to free JPEG destination buffer(s) that were automatically (re)allocated by the compression and transform functions or that were manually allocated using tjAlloc().

+

You should always use this function to free JPEG destination buffer(s) that were automatically (re)allocated by the compression and transform functions or that were manually allocated using tjAlloc().

Parameters
DLLEXPORT void DLLCALL tjFree DLLEXPORT void tjFree ( unsigned char *  buffer)
bufferaddress of the buffer to free
-
See Also
tjAlloc()
+
See Also
tjAlloc()
- +
- + - - + + + + +
DLLEXPORT char* DLLCALL tjGetErrorStr DLLEXPORT int tjGetErrorCode (void )tjhandle handle)
+
+ +

Returns a code indicating the severity of the last error.

+

See Error codes.

+
Parameters
+ + +
handlea handle to a TurboJPEG compressor, decompressor or transformer instance
+
+
+
Returns
a code indicating the severity of the last error. See Error codes.
+ +
+
+ +
+
+ + + + + +
DLLEXPORT char* tjGetErrorStr2 (tjhandle handle)

Returns a descriptive error message explaining why the last command failed.

+
Parameters
+ + +
handlea handle to a TurboJPEG compressor, decompressor, or transformer instance, or NULL if the error was generated by a global function (but note that retrieving the error message for a global function is not thread-safe.)
+
+
Returns
a descriptive error message explaining why the last command failed.
- +
- + @@ -1993,16 +2158,16 @@ If you choose option 1, *jpegSize should be set to the size of your
DLLEXPORT tjscalingfactor* DLLCALL tjGetScalingFactors DLLEXPORT tjscalingfactor* tjGetScalingFactors ( int *  numscalingfactors)
-
Returns
a pointer to a list of fractional scaling factors, or NULL if an error is encountered (see tjGetErrorStr().)
+
Returns
a pointer to a list of fractional scaling factors, or NULL if an error is encountered (see tjGetErrorStr2().)
- +
- + @@ -2012,16 +2177,16 @@ If you choose option 1, *jpegSize should be set to the size of your

Create a TurboJPEG compressor instance.

-
Returns
a handle to the newly-created instance, or NULL if an error occurred (see tjGetErrorStr().)
+
Returns
a handle to the newly-created instance, or NULL if an error occurred (see tjGetErrorStr2().)
- +
DLLEXPORT tjhandle DLLCALL tjInitCompress DLLEXPORT tjhandle tjInitCompress ( void  )
- + @@ -2031,16 +2196,16 @@ If you choose option 1, *jpegSize should be set to the size of your

Create a TurboJPEG decompressor instance.

-
Returns
a handle to the newly-created instance, or NULL if an error occurred (see tjGetErrorStr().)
+
Returns
a handle to the newly-created instance, or NULL if an error occurred (see tjGetErrorStr2().)
- +
DLLEXPORT tjhandle DLLCALL tjInitDecompress DLLEXPORT tjhandle tjInitDecompress ( void  )
- + @@ -2050,7 +2215,77 @@ If you choose option 1, *jpegSize should be set to the size of your

Create a new TurboJPEG transformer instance.

-
Returns
a handle to the newly-created instance, or NULL if an error occurred (see tjGetErrorStr().)
+
Returns
a handle to the newly-created instance, or NULL if an error occurred (see tjGetErrorStr2().)
+ +
+ + +
+
+
DLLEXPORT tjhandle DLLCALL tjInitTransform DLLEXPORT tjhandle tjInitTransform ( void  )
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DLLEXPORT unsigned char* tjLoadImage (const char * filename,
int * width,
int align,
int * height,
int * pixelFormat,
int flags 
)
+
+ +

Load an uncompressed image from disk into memory.

+
Parameters
+ + + + + + + +
filenamename of a file containing an uncompressed image in Windows BMP or PBMPLUS (PPM/PGM) format
widthpointer to an integer variable that will receive the width (in pixels) of the uncompressed image
alignrow alignment of the image buffer to be returned (must be a power of 2.) For instance, setting this parameter to 4 will cause all rows in the image buffer to be padded to the nearest 32-bit boundary, and setting this parameter to 1 will cause all rows in the image buffer to be unpadded.
heightpointer to an integer variable that will receive the height (in pixels) of the uncompressed image
pixelFormatpointer to an integer variable that specifies or will receive the pixel format of the uncompressed image buffer. The behavior of tjLoadImage() will vary depending on the value of *pixelFormat passed to the function:
    +
  • TJPF_UNKNOWN : The uncompressed image buffer returned by the function will use the most optimal pixel format for the file type, and *pixelFormat will contain the ID of this pixel format upon successful return from the function.
  • +
  • TJPF_GRAY : Only PGM files and 8-bit BMP files with a grayscale colormap can be loaded.
  • +
  • TJPF_CMYK : The RGB or grayscale pixels stored in the file will be converted using a quick & dirty algorithm that is suitable only for testing purposes (proper conversion between CMYK and other formats requires a color management system.)
  • +
  • Other pixel formats : The uncompressed image buffer will use the specified pixel format, and pixel format conversion will be performed if necessary.
  • +
+
flagsthe bitwise OR of one or more of the flags.
+
+
+
Returns
a pointer to a newly-allocated buffer containing the uncompressed image, converted to the chosen pixel format and with the chosen row alignment, or NULL if an error occurred (see tjGetErrorStr2().) This buffer should be freed using tjFree().
@@ -2098,12 +2333,12 @@ If you choose option 1, *jpegSize should be set to the size of your - +
- + @@ -2199,12 +2434,83 @@ If you choose option 1, *jpegSize should be set to the size of your - + +
+
+
DLLEXPORT unsigned long DLLCALL tjPlaneSizeYUV DLLEXPORT unsigned long tjPlaneSizeYUV ( int  componentID,
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DLLEXPORT int tjSaveImage (const char * filename,
unsigned char * buffer,
int width,
int pitch,
int height,
int pixelFormat,
int flags 
)
+
+ +

Save an uncompressed image from memory to disk.

+
Parameters
+ + + + + + + + +
filenamename of a file to which to save the uncompressed image. The image will be stored in Windows BMP or PBMPLUS (PPM/PGM) format, depending on the file extension.
bufferpointer to an image buffer containing RGB, grayscale, or CMYK pixels to be saved
widthwidth (in pixels) of the uncompressed image
pitchbytes per line in the image buffer. Setting this parameter to 0 is the equivalent of setting it to width * tjPixelSize[pixelFormat].
heightheight (in pixels) of the uncompressed image
pixelFormatpixel format of the image buffer (see Pixel formats.) If this parameter is set to TJPF_GRAY, then the image will be stored in PGM or 8-bit (indexed color) BMP format. Otherwise, the image will be stored in PPM or 24-bit BMP format. If this parameter is set to TJPF_CMYK, then the CMYK pixels will be converted to RGB using a quick & dirty algorithm that is suitable only for testing (proper conversion between CMYK and other formats requires a color management system.)
flagsthe bitwise OR of one or more of the flags.
+
+
+
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr2().)
+ +
+
+
- + @@ -2268,9 +2574,9 @@ If you choose option 1, *jpegSize should be set to the size of your @@ -2279,11 +2585,34 @@ If you choose option 1, dstSizes[i] should be set to the size of yo
DLLEXPORT int DLLCALL tjTransform DLLEXPORT int tjTransform ( tjhandle  handle,
jpegSizesize of the JPEG source image (in bytes)
nthe number of transformed JPEG images to generate
dstBufspointer to an array of n image buffers. dstBufs[i] will receive a JPEG image that has been transformed using the parameters in transforms[i]. TurboJPEG has the ability to reallocate the JPEG buffer to accommodate the size of the JPEG image. Thus, you can choose to:
    -
  1. pre-allocate the JPEG buffer with an arbitrary size using tjAlloc() and let TurboJPEG grow the buffer as needed,
  2. +
  3. pre-allocate the JPEG buffer with an arbitrary size using tjAlloc() and let TurboJPEG grow the buffer as needed,
  4. set dstBufs[i] to NULL to tell TurboJPEG to allocate the buffer for you, or
  5. -
  6. pre-allocate the buffer to a "worst case" size determined by calling tjBufSize() with the transformed or cropped width and height. Under normal circumstances, this should ensure that the buffer never has to be re-allocated (setting TJFLAG_NOREALLOC guarantees that it won't be.) Note, however, that there are some rare cases (such as transforming images with a large amount of embedded EXIF or ICC profile data) in which the output image will be larger than the worst-case size, and TJFLAG_NOREALLOC cannot be used in those cases.
  7. +
  8. pre-allocate the buffer to a "worst case" size determined by calling tjBufSize() with the transformed or cropped width and height. Under normal circumstances, this should ensure that the buffer never has to be re-allocated (setting TJFLAG_NOREALLOC guarantees that it won't be.) Note, however, that there are some rare cases (such as transforming images with a large amount of embedded EXIF or ICC profile data) in which the output image will be larger than the worst-case size, and TJFLAG_NOREALLOC cannot be used in those cases.
If you choose option 1, dstSizes[i] should be set to the size of your pre-allocated buffer. In any case, unless you have set TJFLAG_NOREALLOC, you should always check dstBufs[i] upon return from this function, as it may have changed.
dstSizespointer to an array of n unsigned long variables that will receive the actual sizes (in bytes) of each transformed JPEG image. If dstBufs[i] points to a pre-allocated buffer, then dstSizes[i] should be set to the size of the buffer. Upon return, dstSizes[i] will contain the size of the JPEG image (in bytes.)
-
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr().)
+
Returns
0 if successful, or -1 if an error occurred (see tjGetErrorStr2() and tjGetErrorCode().)

Variable Documentation

+ +
+
+ + + + + +
+ + + + +
const int tjAlphaOffset[TJ_NUMPF]
+
+static
+
+ +

Alpha offset (in bytes) for a given pixel format.

+

This specifies the number of bytes that the Alpha component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRA is stored in char pixel[], then the alpha component will be pixel[tjAlphaOffset[TJ_BGRA]]. This will be -1 if the pixel format does not have an alpha component.

+ +
+
@@ -2303,7 +2632,7 @@ If you choose option 1, dstSizes[i] should be set to the size of yo

Blue offset (in bytes) for a given pixel format.

-

This specifies the number of bytes that the Blue component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRX is stored in char pixel[], then the blue component will be pixel[tjBlueOffset[TJ_BGRX]].

+

This specifies the number of bytes that the Blue component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRX is stored in char pixel[], then the blue component will be pixel[tjBlueOffset[TJ_BGRX]]. This will be -1 if the pixel format does not have a blue component.

@@ -2326,7 +2655,7 @@ If you choose option 1, dstSizes[i] should be set to the size of yo

Green offset (in bytes) for a given pixel format.

-

This specifies the number of bytes that the green component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRX is stored in char pixel[], then the green component will be pixel[tjGreenOffset[TJ_BGRX]].

+

This specifies the number of bytes that the green component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRX is stored in char pixel[], then the green component will be pixel[tjGreenOffset[TJ_BGRX]]. This will be -1 if the pixel format does not have a green component.

@@ -2431,7 +2760,7 @@ If you choose option 1, dstSizes[i] should be set to the size of yo

Red offset (in bytes) for a given pixel format.

-

This specifies the number of bytes that the red component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRX is stored in char pixel[], then the red component will be pixel[tjRedOffset[TJ_BGRX]].

+

This specifies the number of bytes that the red component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRX is stored in char pixel[], then the red component will be pixel[tjRedOffset[TJ_BGRX]]. This will be -1 if the pixel format does not have a red component.

diff --git a/doc/html/index.html b/doc/html/index.html index 3cc1b3e..a60f4d0 100644 --- a/doc/html/index.html +++ b/doc/html/index.html @@ -24,7 +24,7 @@
TurboJPEG -  1.5 +  2.0
diff --git a/doc/html/modules.html b/doc/html/modules.html index 8e6f815..e79f226 100644 --- a/doc/html/modules.html +++ b/doc/html/modules.html @@ -24,7 +24,7 @@
TurboJPEG -  1.5 +  2.0
diff --git a/doc/html/search/all_74.js b/doc/html/search/all_74.js index 444aaef..5b97c71 100644 --- a/doc/html/search/all_74.js +++ b/doc/html/search/all_74.js @@ -1,44 +1,53 @@ var searchData= [ ['tj_5fnumcs',['TJ_NUMCS',['../group___turbo_j_p_e_g.html#ga39f57a6fb02d9cf32e7b6890099b5a71',1,'turbojpeg.h']]], + ['tj_5fnumerr',['TJ_NUMERR',['../group___turbo_j_p_e_g.html#ga79bde1b4a3e2351e00887e47781b966e',1,'turbojpeg.h']]], ['tj_5fnumpf',['TJ_NUMPF',['../group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e',1,'turbojpeg.h']]], ['tj_5fnumsamp',['TJ_NUMSAMP',['../group___turbo_j_p_e_g.html#ga5ef3d169162ce77ce348e292a0b7477c',1,'turbojpeg.h']]], ['tj_5fnumxop',['TJ_NUMXOP',['../group___turbo_j_p_e_g.html#ga0f6dbd18adf38b7d46ac547f0f4d562c',1,'turbojpeg.h']]], - ['tjalloc',['tjAlloc',['../group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff',1,'turbojpeg.h']]], + ['tjalloc',['tjAlloc',['../group___turbo_j_p_e_g.html#gaec627dd4c5f30b7a775a7aea3bec5d83',1,'turbojpeg.h']]], + ['tjalphaoffset',['tjAlphaOffset',['../group___turbo_j_p_e_g.html#ga5af0ab065feefd526debf1e20c43e837',1,'turbojpeg.h']]], ['tjblueoffset',['tjBlueOffset',['../group___turbo_j_p_e_g.html#ga84e2e35d3f08025f976ec1ec53693dea',1,'turbojpeg.h']]], - ['tjbufsize',['tjBufSize',['../group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b',1,'turbojpeg.h']]], - ['tjbufsizeyuv2',['tjBufSizeYUV2',['../group___turbo_j_p_e_g.html#gaf451664a62c1f6c7cc5a6401f32908c9',1,'turbojpeg.h']]], - ['tjcompress2',['tjCompress2',['../group___turbo_j_p_e_g.html#gaf38f2ed44bdc88e730e08b632fa6e88e',1,'turbojpeg.h']]], - ['tjcompressfromyuv',['tjCompressFromYUV',['../group___turbo_j_p_e_g.html#ga6f6de375d6ec0020faba627e37e5a060',1,'turbojpeg.h']]], - ['tjcompressfromyuvplanes',['tjCompressFromYUVPlanes',['../group___turbo_j_p_e_g.html#ga0b84c682d8accf097d7a743c965d3464',1,'turbojpeg.h']]], + ['tjbufsize',['tjBufSize',['../group___turbo_j_p_e_g.html#ga67ac12fee79073242cb216e07c9f1f90',1,'turbojpeg.h']]], + ['tjbufsizeyuv2',['tjBufSizeYUV2',['../group___turbo_j_p_e_g.html#ga2be2b9969d4df9ecce9b05deed273194',1,'turbojpeg.h']]], + ['tjcompress2',['tjCompress2',['../group___turbo_j_p_e_g.html#gafbdce0112fd78fd38efae841443a9bcf',1,'turbojpeg.h']]], + ['tjcompressfromyuv',['tjCompressFromYUV',['../group___turbo_j_p_e_g.html#ga7622a459b79aa1007e005b58783f875b',1,'turbojpeg.h']]], + ['tjcompressfromyuvplanes',['tjCompressFromYUVPlanes',['../group___turbo_j_p_e_g.html#ga29ec5dfbd2d84b8724e951d6fa0d5d9e',1,'turbojpeg.h']]], ['tjcs',['TJCS',['../group___turbo_j_p_e_g.html#ga4f83ad3368e0e29d1957be0efa7c3720',1,'turbojpeg.h']]], ['tjcs_5fcmyk',['TJCS_CMYK',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a6c8b636152ac8195b869587db315ee53',1,'turbojpeg.h']]], ['tjcs_5fgray',['TJCS_GRAY',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720ab3e7d6a87f695e45b81c1b5262b5a50a',1,'turbojpeg.h']]], ['tjcs_5frgb',['TJCS_RGB',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a677cb7ccb85c4038ac41964a2e09e555',1,'turbojpeg.h']]], ['tjcs_5fycbcr',['TJCS_YCbCr',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a7389b8f65bb387ffedce3efd0d78ec75',1,'turbojpeg.h']]], ['tjcs_5fycck',['TJCS_YCCK',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a53839e0fe867b76b58d16b0a1a7c598e',1,'turbojpeg.h']]], - ['tjdecodeyuv',['tjDecodeYUV',['../group___turbo_j_p_e_g.html#ga077c61027b875afecd5a1613bf18b3c1',1,'turbojpeg.h']]], - ['tjdecodeyuvplanes',['tjDecodeYUVPlanes',['../group___turbo_j_p_e_g.html#gaf42f19b7a496eb18bdc84fe61ee6d3e2',1,'turbojpeg.h']]], - ['tjdecompress2',['tjDecompress2',['../group___turbo_j_p_e_g.html#gad8026a417e16a76313bc0a6c9e8b2ba2',1,'turbojpeg.h']]], - ['tjdecompressheader3',['tjDecompressHeader3',['../group___turbo_j_p_e_g.html#ga3fced455e504e8ff4fbad28ba94a3020',1,'turbojpeg.h']]], - ['tjdecompresstoyuv2',['tjDecompressToYUV2',['../group___turbo_j_p_e_g.html#ga39e08906528db5a764670ea48d344b09',1,'turbojpeg.h']]], - ['tjdecompresstoyuvplanes',['tjDecompressToYUVPlanes',['../group___turbo_j_p_e_g.html#ga38d0ef90692663b3ffb5b16da2541512',1,'turbojpeg.h']]], - ['tjdestroy',['tjDestroy',['../group___turbo_j_p_e_g.html#ga674adee917b95ad4a896f1ba39e12540',1,'turbojpeg.h']]], - ['tjencodeyuv3',['tjEncodeYUV3',['../group___turbo_j_p_e_g.html#gaabe05acd734990053ad1294b5ef239aa',1,'turbojpeg.h']]], - ['tjencodeyuvplanes',['tjEncodeYUVPlanes',['../group___turbo_j_p_e_g.html#ga8a65ed3bd12df57c219d46afbc9008f1',1,'turbojpeg.h']]], + ['tjdecodeyuv',['tjDecodeYUV',['../group___turbo_j_p_e_g.html#ga70abbf38f77a26fd6da8813bef96f695',1,'turbojpeg.h']]], + ['tjdecodeyuvplanes',['tjDecodeYUVPlanes',['../group___turbo_j_p_e_g.html#ga10e837c07fa9d25770565b237d3898d9',1,'turbojpeg.h']]], + ['tjdecompress2',['tjDecompress2',['../group___turbo_j_p_e_g.html#gae9eccef8b682a48f43a9117c231ed013',1,'turbojpeg.h']]], + ['tjdecompressheader3',['tjDecompressHeader3',['../group___turbo_j_p_e_g.html#ga0595681096bba7199cc6f3533cb25f77',1,'turbojpeg.h']]], + ['tjdecompresstoyuv2',['tjDecompressToYUV2',['../group___turbo_j_p_e_g.html#ga04d1e839ff9a0860dd1475cff78d3364',1,'turbojpeg.h']]], + ['tjdecompresstoyuvplanes',['tjDecompressToYUVPlanes',['../group___turbo_j_p_e_g.html#gaa59f901a5258ada5bd0185ad59368540',1,'turbojpeg.h']]], + ['tjdestroy',['tjDestroy',['../group___turbo_j_p_e_g.html#ga75f355fa27225ba1a4ee392c852394d2',1,'turbojpeg.h']]], + ['tjencodeyuv3',['tjEncodeYUV3',['../group___turbo_j_p_e_g.html#gac519b922cdf446e97d0cdcba513636bf',1,'turbojpeg.h']]], + ['tjencodeyuvplanes',['tjEncodeYUVPlanes',['../group___turbo_j_p_e_g.html#gae2d04c72457fe7f4d60cf78ab1b1feb1',1,'turbojpeg.h']]], + ['tjerr',['TJERR',['../group___turbo_j_p_e_g.html#gafbc17cfa57d0d5d11fea35ac025950fe',1,'turbojpeg.h']]], + ['tjerr_5ffatal',['TJERR_FATAL',['../group___turbo_j_p_e_g.html#ggafbc17cfa57d0d5d11fea35ac025950feafc9cceeada13122b09e4851e3788039a',1,'turbojpeg.h']]], + ['tjerr_5fwarning',['TJERR_WARNING',['../group___turbo_j_p_e_g.html#ggafbc17cfa57d0d5d11fea35ac025950fea342dd6e2aedb47bb257b4e7568329b59',1,'turbojpeg.h']]], ['tjflag_5faccuratedct',['TJFLAG_ACCURATEDCT',['../group___turbo_j_p_e_g.html#gacb233cfd722d66d1ccbf48a7de81f0e0',1,'turbojpeg.h']]], ['tjflag_5fbottomup',['TJFLAG_BOTTOMUP',['../group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec',1,'turbojpeg.h']]], ['tjflag_5ffastdct',['TJFLAG_FASTDCT',['../group___turbo_j_p_e_g.html#gaabce235db80d3f698b27f36cbd453da2',1,'turbojpeg.h']]], ['tjflag_5ffastupsample',['TJFLAG_FASTUPSAMPLE',['../group___turbo_j_p_e_g.html#ga4ee4506c81177a06f77e2504a22efd2d',1,'turbojpeg.h']]], ['tjflag_5fnorealloc',['TJFLAG_NOREALLOC',['../group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963',1,'turbojpeg.h']]], - ['tjfree',['tjFree',['../group___turbo_j_p_e_g.html#ga8c4a1231dc06a450514c835f6471f137',1,'turbojpeg.h']]], - ['tjgeterrorstr',['tjGetErrorStr',['../group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf',1,'turbojpeg.h']]], - ['tjgetscalingfactors',['tjGetScalingFactors',['../group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8',1,'turbojpeg.h']]], + ['tjflag_5fprogressive',['TJFLAG_PROGRESSIVE',['../group___turbo_j_p_e_g.html#ga43b426750b46190a25d34a67ef76df1b',1,'turbojpeg.h']]], + ['tjflag_5fstoponwarning',['TJFLAG_STOPONWARNING',['../group___turbo_j_p_e_g.html#ga519cfa4ef6c18d9e5b455fdf59306a3a',1,'turbojpeg.h']]], + ['tjfree',['tjFree',['../group___turbo_j_p_e_g.html#gaea863d2da0cdb609563aabdf9196514b',1,'turbojpeg.h']]], + ['tjgeterrorcode',['tjGetErrorCode',['../group___turbo_j_p_e_g.html#ga414feeffbf860ebd31c745df203de410',1,'turbojpeg.h']]], + ['tjgeterrorstr2',['tjGetErrorStr2',['../group___turbo_j_p_e_g.html#ga1ead8574f9f39fbafc6b497124e7aafa',1,'turbojpeg.h']]], + ['tjgetscalingfactors',['tjGetScalingFactors',['../group___turbo_j_p_e_g.html#gac3854476006b10787bd128f7ede48057',1,'turbojpeg.h']]], ['tjgreenoffset',['tjGreenOffset',['../group___turbo_j_p_e_g.html#ga82d6e35da441112a411da41923c0ba2f',1,'turbojpeg.h']]], ['tjhandle',['tjhandle',['../group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763',1,'turbojpeg.h']]], - ['tjinitcompress',['tjInitCompress',['../group___turbo_j_p_e_g.html#ga3d10c47fbe4a2489a2b30c931551d01a',1,'turbojpeg.h']]], - ['tjinitdecompress',['tjInitDecompress',['../group___turbo_j_p_e_g.html#gae5408179d041e2a2f7199c8283cf649e',1,'turbojpeg.h']]], - ['tjinittransform',['tjInitTransform',['../group___turbo_j_p_e_g.html#ga3155b775bfbac9dbba869b95a0367902',1,'turbojpeg.h']]], + ['tjinitcompress',['tjInitCompress',['../group___turbo_j_p_e_g.html#ga9d63a05fc6d813f4aae06107041a37e8',1,'turbojpeg.h']]], + ['tjinitdecompress',['tjInitDecompress',['../group___turbo_j_p_e_g.html#ga52300eac3f3d9ef4bab303bc244f62d3',1,'turbojpeg.h']]], + ['tjinittransform',['tjInitTransform',['../group___turbo_j_p_e_g.html#ga928beff6ac248ceadf01089fc6b41957',1,'turbojpeg.h']]], + ['tjloadimage',['tjLoadImage',['../group___turbo_j_p_e_g.html#gaffbd83c375e79f5db4b5c5d8ad4466e7',1,'turbojpeg.h']]], ['tjmcuheight',['tjMCUHeight',['../group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf',1,'turbojpeg.h']]], ['tjmcuwidth',['tjMCUWidth',['../group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c',1,'turbojpeg.h']]], ['tjpad',['TJPAD',['../group___turbo_j_p_e_g.html#ga0aba955473315e405295d978f0c16511',1,'turbojpeg.h']]], @@ -53,11 +62,12 @@ var searchData= ['tjpf_5frgb',['TJPF_RGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa7ce93230bff449518ce387c17e6ed37c',1,'turbojpeg.h']]], ['tjpf_5frgba',['TJPF_RGBA',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa88d2e88fab67f6503cf972e14851cc12',1,'turbojpeg.h']]], ['tjpf_5frgbx',['TJPF_RGBX',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa83973bebb7e2dc6fa8bae89ff3f42e01',1,'turbojpeg.h']]], + ['tjpf_5funknown',['TJPF_UNKNOWN',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa84c1a6cead7952998e2fb895844a21ed',1,'turbojpeg.h']]], ['tjpf_5fxbgr',['TJPF_XBGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aaf6603b27147de47e212e75dac027b2af',1,'turbojpeg.h']]], ['tjpf_5fxrgb',['TJPF_XRGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aadae996905efcfa3b42a0bb3bea7f9d84',1,'turbojpeg.h']]], ['tjpixelsize',['tjPixelSize',['../group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c',1,'turbojpeg.h']]], ['tjplaneheight',['tjPlaneHeight',['../group___turbo_j_p_e_g.html#ga1a209696c6a80748f20e134b3c64789f',1,'turbojpeg.h']]], - ['tjplanesizeyuv',['tjPlaneSizeYUV',['../group___turbo_j_p_e_g.html#ga6f98d977bfa9d167c97172e876ba61e2',1,'turbojpeg.h']]], + ['tjplanesizeyuv',['tjPlaneSizeYUV',['../group___turbo_j_p_e_g.html#gab4ab7b24f6e797d79abaaa670373961d',1,'turbojpeg.h']]], ['tjplanewidth',['tjPlaneWidth',['../group___turbo_j_p_e_g.html#ga63fb66bb1e36c74008c4634360becbb1',1,'turbojpeg.h']]], ['tjredoffset',['tjRedOffset',['../group___turbo_j_p_e_g.html#gadd9b446742ac8a3923f7992c7988fea8',1,'turbojpeg.h']]], ['tjregion',['tjregion',['../structtjregion.html',1,'']]], @@ -68,9 +78,10 @@ var searchData= ['tjsamp_5f440',['TJSAMP_440',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074accf740e6f3aa6ba20ba922cad13cb974',1,'turbojpeg.h']]], ['tjsamp_5f444',['TJSAMP_444',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074afb8da4f44197837bdec0a4f593dacae3',1,'turbojpeg.h']]], ['tjsamp_5fgray',['TJSAMP_GRAY',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a3f1c9504842ddc7a48d0f690754b6248',1,'turbojpeg.h']]], + ['tjsaveimage',['tjSaveImage',['../group___turbo_j_p_e_g.html#ga6f445b22d8933ae4815b3370a538d879',1,'turbojpeg.h']]], ['tjscaled',['TJSCALED',['../group___turbo_j_p_e_g.html#ga84878bb65404204743aa18cac02781df',1,'turbojpeg.h']]], ['tjscalingfactor',['tjscalingfactor',['../structtjscalingfactor.html',1,'']]], - ['tjtransform',['tjtransform',['../structtjtransform.html',1,'tjtransform'],['../group___turbo_j_p_e_g.html#gad02cd42b69f193a0623a9c801788df3a',1,'tjTransform(tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, int n, unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *transforms, int flags): turbojpeg.h'],['../group___turbo_j_p_e_g.html#gaa29f3189c41be12ec5dee7caec318a31',1,'tjtransform(): turbojpeg.h']]], + ['tjtransform',['tjtransform',['../structtjtransform.html',1,'tjtransform'],['../group___turbo_j_p_e_g.html#gaa29f3189c41be12ec5dee7caec318a31',1,'tjtransform(): turbojpeg.h'],['../group___turbo_j_p_e_g.html#ga9cb8abf4cc91881e04a0329b2270be25',1,'tjTransform(tjhandle handle, const unsigned char *jpegBuf, unsigned long jpegSize, int n, unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *transforms, int flags): turbojpeg.h']]], ['tjxop',['TJXOP',['../group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866',1,'turbojpeg.h']]], ['tjxop_5fhflip',['TJXOP_HFLIP',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aa0df69776caa30f0fa28e26332d311ce',1,'turbojpeg.h']]], ['tjxop_5fnone',['TJXOP_NONE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aad88c0366cd3f7d0eac9d7a3fa1c2c27',1,'turbojpeg.h']]], @@ -80,10 +91,12 @@ var searchData= ['tjxop_5ftranspose',['TJXOP_TRANSPOSE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a31060aed199f886afdd417f80499c32d',1,'turbojpeg.h']]], ['tjxop_5ftransverse',['TJXOP_TRANSVERSE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866af3b14d488aea6ece9e5b3df73a74d6a4',1,'turbojpeg.h']]], ['tjxop_5fvflip',['TJXOP_VFLIP',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a324eddfbec53b7e691f61e56929d0d5d',1,'turbojpeg.h']]], + ['tjxopt_5fcopynone',['TJXOPT_COPYNONE',['../group___turbo_j_p_e_g.html#ga153b468cfb905d0de61706c838986fe8',1,'turbojpeg.h']]], ['tjxopt_5fcrop',['TJXOPT_CROP',['../group___turbo_j_p_e_g.html#ga9c771a757fc1294add611906b89ab2d2',1,'turbojpeg.h']]], ['tjxopt_5fgray',['TJXOPT_GRAY',['../group___turbo_j_p_e_g.html#ga3acee7b48ade1b99e5588736007c2589',1,'turbojpeg.h']]], ['tjxopt_5fnooutput',['TJXOPT_NOOUTPUT',['../group___turbo_j_p_e_g.html#gafbf992bbf6e006705886333703ffab31',1,'turbojpeg.h']]], ['tjxopt_5fperfect',['TJXOPT_PERFECT',['../group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00',1,'turbojpeg.h']]], + ['tjxopt_5fprogressive',['TJXOPT_PROGRESSIVE',['../group___turbo_j_p_e_g.html#gad2371c80674584ecc1a7d75e564cf026',1,'turbojpeg.h']]], ['tjxopt_5ftrim',['TJXOPT_TRIM',['../group___turbo_j_p_e_g.html#ga319826b7eb1583c0595bbe7b95428709',1,'turbojpeg.h']]], ['turbojpeg',['TurboJPEG',['../group___turbo_j_p_e_g.html',1,'']]] ]; diff --git a/doc/html/search/enums_74.js b/doc/html/search/enums_74.js index 276aa24..19c20cf 100644 --- a/doc/html/search/enums_74.js +++ b/doc/html/search/enums_74.js @@ -1,6 +1,7 @@ var searchData= [ ['tjcs',['TJCS',['../group___turbo_j_p_e_g.html#ga4f83ad3368e0e29d1957be0efa7c3720',1,'turbojpeg.h']]], + ['tjerr',['TJERR',['../group___turbo_j_p_e_g.html#gafbc17cfa57d0d5d11fea35ac025950fe',1,'turbojpeg.h']]], ['tjpf',['TJPF',['../group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a',1,'turbojpeg.h']]], ['tjsamp',['TJSAMP',['../group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074',1,'turbojpeg.h']]], ['tjxop',['TJXOP',['../group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866',1,'turbojpeg.h']]] diff --git a/doc/html/search/enumvalues_74.js b/doc/html/search/enumvalues_74.js index 7dc2f8d..e683856 100644 --- a/doc/html/search/enumvalues_74.js +++ b/doc/html/search/enumvalues_74.js @@ -5,6 +5,8 @@ var searchData= ['tjcs_5frgb',['TJCS_RGB',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a677cb7ccb85c4038ac41964a2e09e555',1,'turbojpeg.h']]], ['tjcs_5fycbcr',['TJCS_YCbCr',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a7389b8f65bb387ffedce3efd0d78ec75',1,'turbojpeg.h']]], ['tjcs_5fycck',['TJCS_YCCK',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a53839e0fe867b76b58d16b0a1a7c598e',1,'turbojpeg.h']]], + ['tjerr_5ffatal',['TJERR_FATAL',['../group___turbo_j_p_e_g.html#ggafbc17cfa57d0d5d11fea35ac025950feafc9cceeada13122b09e4851e3788039a',1,'turbojpeg.h']]], + ['tjerr_5fwarning',['TJERR_WARNING',['../group___turbo_j_p_e_g.html#ggafbc17cfa57d0d5d11fea35ac025950fea342dd6e2aedb47bb257b4e7568329b59',1,'turbojpeg.h']]], ['tjpf_5fabgr',['TJPF_ABGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa1ba1a7f1631dbeaa49a0a85fc4a40081',1,'turbojpeg.h']]], ['tjpf_5fargb',['TJPF_ARGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aae8f846ed9d9de99b6e1dfe448848765c',1,'turbojpeg.h']]], ['tjpf_5fbgr',['TJPF_BGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aab10624437fb8ef495a0b153e65749839',1,'turbojpeg.h']]], @@ -15,6 +17,7 @@ var searchData= ['tjpf_5frgb',['TJPF_RGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa7ce93230bff449518ce387c17e6ed37c',1,'turbojpeg.h']]], ['tjpf_5frgba',['TJPF_RGBA',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa88d2e88fab67f6503cf972e14851cc12',1,'turbojpeg.h']]], ['tjpf_5frgbx',['TJPF_RGBX',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa83973bebb7e2dc6fa8bae89ff3f42e01',1,'turbojpeg.h']]], + ['tjpf_5funknown',['TJPF_UNKNOWN',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa84c1a6cead7952998e2fb895844a21ed',1,'turbojpeg.h']]], ['tjpf_5fxbgr',['TJPF_XBGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aaf6603b27147de47e212e75dac027b2af',1,'turbojpeg.h']]], ['tjpf_5fxrgb',['TJPF_XRGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aadae996905efcfa3b42a0bb3bea7f9d84',1,'turbojpeg.h']]], ['tjsamp_5f411',['TJSAMP_411',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a28ec62575e5ea295c3fde3001dc628e2',1,'turbojpeg.h']]], diff --git a/doc/html/search/functions_74.js b/doc/html/search/functions_74.js index 69410b0..bd4f34f 100644 --- a/doc/html/search/functions_74.js +++ b/doc/html/search/functions_74.js @@ -1,28 +1,31 @@ var searchData= [ - ['tjalloc',['tjAlloc',['../group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff',1,'turbojpeg.h']]], - ['tjbufsize',['tjBufSize',['../group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b',1,'turbojpeg.h']]], - ['tjbufsizeyuv2',['tjBufSizeYUV2',['../group___turbo_j_p_e_g.html#gaf451664a62c1f6c7cc5a6401f32908c9',1,'turbojpeg.h']]], - ['tjcompress2',['tjCompress2',['../group___turbo_j_p_e_g.html#gaf38f2ed44bdc88e730e08b632fa6e88e',1,'turbojpeg.h']]], - ['tjcompressfromyuv',['tjCompressFromYUV',['../group___turbo_j_p_e_g.html#ga6f6de375d6ec0020faba627e37e5a060',1,'turbojpeg.h']]], - ['tjcompressfromyuvplanes',['tjCompressFromYUVPlanes',['../group___turbo_j_p_e_g.html#ga0b84c682d8accf097d7a743c965d3464',1,'turbojpeg.h']]], - ['tjdecodeyuv',['tjDecodeYUV',['../group___turbo_j_p_e_g.html#ga077c61027b875afecd5a1613bf18b3c1',1,'turbojpeg.h']]], - ['tjdecodeyuvplanes',['tjDecodeYUVPlanes',['../group___turbo_j_p_e_g.html#gaf42f19b7a496eb18bdc84fe61ee6d3e2',1,'turbojpeg.h']]], - ['tjdecompress2',['tjDecompress2',['../group___turbo_j_p_e_g.html#gad8026a417e16a76313bc0a6c9e8b2ba2',1,'turbojpeg.h']]], - ['tjdecompressheader3',['tjDecompressHeader3',['../group___turbo_j_p_e_g.html#ga3fced455e504e8ff4fbad28ba94a3020',1,'turbojpeg.h']]], - ['tjdecompresstoyuv2',['tjDecompressToYUV2',['../group___turbo_j_p_e_g.html#ga39e08906528db5a764670ea48d344b09',1,'turbojpeg.h']]], - ['tjdecompresstoyuvplanes',['tjDecompressToYUVPlanes',['../group___turbo_j_p_e_g.html#ga38d0ef90692663b3ffb5b16da2541512',1,'turbojpeg.h']]], - ['tjdestroy',['tjDestroy',['../group___turbo_j_p_e_g.html#ga674adee917b95ad4a896f1ba39e12540',1,'turbojpeg.h']]], - ['tjencodeyuv3',['tjEncodeYUV3',['../group___turbo_j_p_e_g.html#gaabe05acd734990053ad1294b5ef239aa',1,'turbojpeg.h']]], - ['tjencodeyuvplanes',['tjEncodeYUVPlanes',['../group___turbo_j_p_e_g.html#ga8a65ed3bd12df57c219d46afbc9008f1',1,'turbojpeg.h']]], - ['tjfree',['tjFree',['../group___turbo_j_p_e_g.html#ga8c4a1231dc06a450514c835f6471f137',1,'turbojpeg.h']]], - ['tjgeterrorstr',['tjGetErrorStr',['../group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf',1,'turbojpeg.h']]], - ['tjgetscalingfactors',['tjGetScalingFactors',['../group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8',1,'turbojpeg.h']]], - ['tjinitcompress',['tjInitCompress',['../group___turbo_j_p_e_g.html#ga3d10c47fbe4a2489a2b30c931551d01a',1,'turbojpeg.h']]], - ['tjinitdecompress',['tjInitDecompress',['../group___turbo_j_p_e_g.html#gae5408179d041e2a2f7199c8283cf649e',1,'turbojpeg.h']]], - ['tjinittransform',['tjInitTransform',['../group___turbo_j_p_e_g.html#ga3155b775bfbac9dbba869b95a0367902',1,'turbojpeg.h']]], + ['tjalloc',['tjAlloc',['../group___turbo_j_p_e_g.html#gaec627dd4c5f30b7a775a7aea3bec5d83',1,'turbojpeg.h']]], + ['tjbufsize',['tjBufSize',['../group___turbo_j_p_e_g.html#ga67ac12fee79073242cb216e07c9f1f90',1,'turbojpeg.h']]], + ['tjbufsizeyuv2',['tjBufSizeYUV2',['../group___turbo_j_p_e_g.html#ga2be2b9969d4df9ecce9b05deed273194',1,'turbojpeg.h']]], + ['tjcompress2',['tjCompress2',['../group___turbo_j_p_e_g.html#gafbdce0112fd78fd38efae841443a9bcf',1,'turbojpeg.h']]], + ['tjcompressfromyuv',['tjCompressFromYUV',['../group___turbo_j_p_e_g.html#ga7622a459b79aa1007e005b58783f875b',1,'turbojpeg.h']]], + ['tjcompressfromyuvplanes',['tjCompressFromYUVPlanes',['../group___turbo_j_p_e_g.html#ga29ec5dfbd2d84b8724e951d6fa0d5d9e',1,'turbojpeg.h']]], + ['tjdecodeyuv',['tjDecodeYUV',['../group___turbo_j_p_e_g.html#ga70abbf38f77a26fd6da8813bef96f695',1,'turbojpeg.h']]], + ['tjdecodeyuvplanes',['tjDecodeYUVPlanes',['../group___turbo_j_p_e_g.html#ga10e837c07fa9d25770565b237d3898d9',1,'turbojpeg.h']]], + ['tjdecompress2',['tjDecompress2',['../group___turbo_j_p_e_g.html#gae9eccef8b682a48f43a9117c231ed013',1,'turbojpeg.h']]], + ['tjdecompressheader3',['tjDecompressHeader3',['../group___turbo_j_p_e_g.html#ga0595681096bba7199cc6f3533cb25f77',1,'turbojpeg.h']]], + ['tjdecompresstoyuv2',['tjDecompressToYUV2',['../group___turbo_j_p_e_g.html#ga04d1e839ff9a0860dd1475cff78d3364',1,'turbojpeg.h']]], + ['tjdecompresstoyuvplanes',['tjDecompressToYUVPlanes',['../group___turbo_j_p_e_g.html#gaa59f901a5258ada5bd0185ad59368540',1,'turbojpeg.h']]], + ['tjdestroy',['tjDestroy',['../group___turbo_j_p_e_g.html#ga75f355fa27225ba1a4ee392c852394d2',1,'turbojpeg.h']]], + ['tjencodeyuv3',['tjEncodeYUV3',['../group___turbo_j_p_e_g.html#gac519b922cdf446e97d0cdcba513636bf',1,'turbojpeg.h']]], + ['tjencodeyuvplanes',['tjEncodeYUVPlanes',['../group___turbo_j_p_e_g.html#gae2d04c72457fe7f4d60cf78ab1b1feb1',1,'turbojpeg.h']]], + ['tjfree',['tjFree',['../group___turbo_j_p_e_g.html#gaea863d2da0cdb609563aabdf9196514b',1,'turbojpeg.h']]], + ['tjgeterrorcode',['tjGetErrorCode',['../group___turbo_j_p_e_g.html#ga414feeffbf860ebd31c745df203de410',1,'turbojpeg.h']]], + ['tjgeterrorstr2',['tjGetErrorStr2',['../group___turbo_j_p_e_g.html#ga1ead8574f9f39fbafc6b497124e7aafa',1,'turbojpeg.h']]], + ['tjgetscalingfactors',['tjGetScalingFactors',['../group___turbo_j_p_e_g.html#gac3854476006b10787bd128f7ede48057',1,'turbojpeg.h']]], + ['tjinitcompress',['tjInitCompress',['../group___turbo_j_p_e_g.html#ga9d63a05fc6d813f4aae06107041a37e8',1,'turbojpeg.h']]], + ['tjinitdecompress',['tjInitDecompress',['../group___turbo_j_p_e_g.html#ga52300eac3f3d9ef4bab303bc244f62d3',1,'turbojpeg.h']]], + ['tjinittransform',['tjInitTransform',['../group___turbo_j_p_e_g.html#ga928beff6ac248ceadf01089fc6b41957',1,'turbojpeg.h']]], + ['tjloadimage',['tjLoadImage',['../group___turbo_j_p_e_g.html#gaffbd83c375e79f5db4b5c5d8ad4466e7',1,'turbojpeg.h']]], ['tjplaneheight',['tjPlaneHeight',['../group___turbo_j_p_e_g.html#ga1a209696c6a80748f20e134b3c64789f',1,'turbojpeg.h']]], - ['tjplanesizeyuv',['tjPlaneSizeYUV',['../group___turbo_j_p_e_g.html#ga6f98d977bfa9d167c97172e876ba61e2',1,'turbojpeg.h']]], + ['tjplanesizeyuv',['tjPlaneSizeYUV',['../group___turbo_j_p_e_g.html#gab4ab7b24f6e797d79abaaa670373961d',1,'turbojpeg.h']]], ['tjplanewidth',['tjPlaneWidth',['../group___turbo_j_p_e_g.html#ga63fb66bb1e36c74008c4634360becbb1',1,'turbojpeg.h']]], - ['tjtransform',['tjTransform',['../group___turbo_j_p_e_g.html#gad02cd42b69f193a0623a9c801788df3a',1,'turbojpeg.h']]] + ['tjsaveimage',['tjSaveImage',['../group___turbo_j_p_e_g.html#ga6f445b22d8933ae4815b3370a538d879',1,'turbojpeg.h']]], + ['tjtransform',['tjTransform',['../group___turbo_j_p_e_g.html#ga9cb8abf4cc91881e04a0329b2270be25',1,'turbojpeg.h']]] ]; diff --git a/doc/html/search/variables_74.js b/doc/html/search/variables_74.js index 13a056e..2d20942 100644 --- a/doc/html/search/variables_74.js +++ b/doc/html/search/variables_74.js @@ -1,5 +1,6 @@ var searchData= [ + ['tjalphaoffset',['tjAlphaOffset',['../group___turbo_j_p_e_g.html#ga5af0ab065feefd526debf1e20c43e837',1,'turbojpeg.h']]], ['tjblueoffset',['tjBlueOffset',['../group___turbo_j_p_e_g.html#ga84e2e35d3f08025f976ec1ec53693dea',1,'turbojpeg.h']]], ['tjgreenoffset',['tjGreenOffset',['../group___turbo_j_p_e_g.html#ga82d6e35da441112a411da41923c0ba2f',1,'turbojpeg.h']]], ['tjmcuheight',['tjMCUHeight',['../group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf',1,'turbojpeg.h']]], diff --git a/doc/html/structtjregion.html b/doc/html/structtjregion.html index af2a473..50a9adb 100644 --- a/doc/html/structtjregion.html +++ b/doc/html/structtjregion.html @@ -24,7 +24,7 @@
TurboJPEG -  1.5 +  2.0
diff --git a/doc/html/structtjscalingfactor.html b/doc/html/structtjscalingfactor.html index 3bb50f5..d7fa67b 100644 --- a/doc/html/structtjscalingfactor.html +++ b/doc/html/structtjscalingfactor.html @@ -24,7 +24,7 @@
TurboJPEG -  1.5 +  2.0
diff --git a/doc/html/structtjtransform.html b/doc/html/structtjtransform.html index 9fd97f7..fcf72ee 100644 --- a/doc/html/structtjtransform.html +++ b/doc/html/structtjtransform.html @@ -24,7 +24,7 @@
TurboJPEG -  1.5 +  2.0
@@ -133,7 +133,7 @@ Data Fields arrayRegiontjregion structure containing the width and height of the array pointed to by coeffs as well as its offset relative to the component plane. TurboJPEG implementations may choose to split each component plane into multiple DCT coefficient arrays and call the callback function once for each array. planeRegiontjregion structure containing the width and height of the component plane to which coeffs belongs componentIDID number of the component plane to which coeffs belongs (Y, Cb, and Cr have, respectively, ID's of 0, 1, and 2 in typical JPEG images.) - transformIDID number of the transformed image to which coeffs belongs. This is the same as the index of the transform in the transforms array that was passed to tjTransform(). + transformIDID number of the transformed image to which coeffs belongs. This is the same as the index of the transform in the transforms array that was passed to tjTransform(). transforma pointer to a tjtransform structure that specifies the parameters and/or cropping region for this transform diff --git a/doxygen.config b/doxygen.config index 1723123..cb884f9 100644 --- a/doxygen.config +++ b/doxygen.config @@ -1,5 +1,5 @@ PROJECT_NAME = TurboJPEG -PROJECT_NUMBER = 1.5 +PROJECT_NUMBER = 2.0 OUTPUT_DIRECTORY = doc/ USE_WINDOWS_ENCODING = NO OPTIMIZE_OUTPUT_FOR_C = YES diff --git a/example.c b/example.txt similarity index 92% rename from example.c rename to example.txt index ac27f49..04c11fe 100644 --- a/example.c +++ b/example.txt @@ -1,5 +1,5 @@ /* - * example.c + * example.txt * * This file illustrates how to use the IJG code as a subroutine library * to read or write JPEG image files. You should look at this code in @@ -13,6 +13,20 @@ * routines in a different style if you prefer. */ +/* This example was part of the original libjpeg documentation and has been + * unchanged since 1994. It is, as described in libjpeg.txt, "heavily + * commented skeleton code for calling the JPEG library." It is not meant to + * be compiled as a standalone program, since it has no main() function and + * does not compress from/decompress to a real image buffer (corollary: + * put_scanline_someplace() is not a real function.) First-time users of + * libjpeg-turbo would be better served by looking at tjexample.c, which uses + * the more straightforward TurboJPEG API, or at cjpeg.c and djpeg.c, which are + * examples of libjpeg API usage that can be (and are) compiled into standalone + * programs. Note that this example, as well as the examples in cjpeg.c and + * djpeg.c, interleave disk I/O with JPEG compression/decompression, so none of + * these examples is suitable for benchmarking purposes. + */ + #include /* @@ -69,7 +83,7 @@ extern int image_width; /* Number of columns in image */ */ GLOBAL(void) -write_JPEG_file (char *filename, int quality) +write_JPEG_file(char *filename, int quality) { /* This struct contains the JPEG compression parameters and pointers to * working space (which is allocated as needed by the JPEG library). @@ -158,8 +172,8 @@ write_JPEG_file (char *filename, int quality) * Here the array is only one element long, but you could pass * more than one scanline at a time if that's more convenient. */ - row_pointer[0] = & image_buffer[cinfo.next_scanline * row_stride]; - (void) jpeg_write_scanlines(&cinfo, row_pointer, 1); + row_pointer[0] = &image_buffer[cinfo.next_scanline * row_stride]; + (void)jpeg_write_scanlines(&cinfo, row_pointer, 1); } /* Step 6: Finish compression */ @@ -260,10 +274,10 @@ typedef struct my_error_mgr *my_error_ptr; */ METHODDEF(void) -my_error_exit (j_common_ptr cinfo) +my_error_exit(j_common_ptr cinfo) { /* cinfo->err really points to a my_error_mgr struct, so coerce pointer */ - my_error_ptr myerr = (my_error_ptr) cinfo->err; + my_error_ptr myerr = (my_error_ptr)cinfo->err; /* Always display the message. */ /* We could postpone this until after returning, if we chose. */ @@ -281,7 +295,7 @@ my_error_exit (j_common_ptr cinfo) GLOBAL(int) -read_JPEG_file (char *filename) +read_JPEG_file(char *filename) { /* This struct contains the JPEG decompression parameters and pointers to * working space (which is allocated as needed by the JPEG library). @@ -331,7 +345,7 @@ read_JPEG_file (char *filename) /* Step 3: read file parameters with jpeg_read_header() */ - (void) jpeg_read_header(&cinfo, TRUE); + (void)jpeg_read_header(&cinfo, TRUE); /* We can ignore the return value from jpeg_read_header since * (a) suspension is not possible with the stdio data source, and * (b) we passed TRUE to reject a tables-only JPEG file as an error. @@ -346,7 +360,7 @@ read_JPEG_file (char *filename) /* Step 5: Start decompressor */ - (void) jpeg_start_decompress(&cinfo); + (void)jpeg_start_decompress(&cinfo); /* We can ignore the return value since suspension is not possible * with the stdio data source. */ @@ -361,7 +375,7 @@ read_JPEG_file (char *filename) row_stride = cinfo.output_width * cinfo.output_components; /* Make a one-row-high sample array that will go away when done with image */ buffer = (*cinfo.mem->alloc_sarray) - ((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1); + ((j_common_ptr)&cinfo, JPOOL_IMAGE, row_stride, 1); /* Step 6: while (scan lines remain to be read) */ /* jpeg_read_scanlines(...); */ @@ -374,14 +388,14 @@ read_JPEG_file (char *filename) * Here the array is only one element long, but you could ask for * more than one scanline at a time if that's more convenient. */ - (void) jpeg_read_scanlines(&cinfo, buffer, 1); + (void)jpeg_read_scanlines(&cinfo, buffer, 1); /* Assume put_scanline_someplace wants a pointer and sample count. */ put_scanline_someplace(buffer[0], row_stride); } /* Step 7: Finish decompression */ - (void) jpeg_finish_decompress(&cinfo); + (void)jpeg_finish_decompress(&cinfo); /* We can ignore the return value since suspension is not possible * with the stdio data source. */ diff --git a/jaricom.c b/jaricom.c index 3bb557f..215640c 100644 --- a/jaricom.c +++ b/jaricom.c @@ -4,16 +4,16 @@ * This file was part of the Independent JPEG Group's software: * Developed 1997-2009 by Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2015, D. R. Commander. + * Copyright (C) 2015, 2018, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * * This file contains probability estimation tables for common use in * arithmetic entropy encoding and decoding routines. * - * This data represents Table D.2 in the JPEG spec (ISO/IEC IS 10918-1 - * and CCITT Recommendation ITU-T T.81) and Table 24 in the JBIG spec - * (ISO/IEC IS 11544 and CCITT Recommendation ITU-T T.82). + * This data represents Table D.2 in + * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994 and Table 24 in + * Recommendation ITU-T T.82 (1993) | ISO/IEC 11544:1993. */ #define JPEG_INTERNALS @@ -29,9 +29,10 @@ * implementation (jbig_tab.c). */ -#define V(i,a,b,c,d) (((JLONG)a << 16) | ((JLONG)c << 8) | ((JLONG)d << 7) | b) +#define V(i, a, b, c, d) \ + (((JLONG)a << 16) | ((JLONG)c << 8) | ((JLONG)d << 7) | b) -const JLONG jpeg_aritab[113+1] = { +const JLONG jpeg_aritab[113 + 1] = { /* * Index, Qe_Value, Next_Index_LPS, Next_Index_MPS, Switch_MPS */ diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index 0af8ae1..3d863bd 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -1,57 +1,88 @@ -set(JAR_FILE turbojpeg.jar) -set(MANIFEST_FILE ${CMAKE_CURRENT_SOURCE_DIR}/MANIFEST.MF) - -set(JAVA_CLASSNAMES org/libjpegturbo/turbojpeg/TJ - org/libjpegturbo/turbojpeg/TJCompressor - org/libjpegturbo/turbojpeg/TJCustomFilter - org/libjpegturbo/turbojpeg/TJDecompressor - org/libjpegturbo/turbojpeg/TJException - org/libjpegturbo/turbojpeg/TJScalingFactor - org/libjpegturbo/turbojpeg/TJTransform - org/libjpegturbo/turbojpeg/TJTransformer - org/libjpegturbo/turbojpeg/YUVImage - TJUnitTest - TJExample - TJBench) - -if(MSVC_IDE) - set(OBJDIR "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}") -else() - set(OBJDIR ${CMAKE_CURRENT_BINARY_DIR}) +find_package(Java REQUIRED) +find_package(JNI REQUIRED) + +# Allow the Java compiler flags to be set using an environment variable +if(NOT DEFINED CMAKE_JAVA_COMPILE_FLAGS AND DEFINED ENV{JAVAFLAGS}) + set(CMAKE_JAVA_COMPILE_FLAGS $ENV{JAVAFLAGS}) endif() +include(UseJava) + +set(CMAKE_JAVA_COMPILE_FLAGS "${CMAKE_JAVA_COMPILE_FLAGS} -J-Dfile.encoding=UTF8") +message(STATUS "CMAKE_JAVA_COMPILE_FLAGS = ${CMAKE_JAVA_COMPILE_FLAGS}") +string(REGEX REPLACE " " ";" CMAKE_JAVA_COMPILE_FLAGS "${CMAKE_JAVA_COMPILE_FLAGS}") + +set(JAVAARGS "" CACHE STRING "Additional arguments to pass to java when running unit tests (example: -d32)") +message(STATUS "JAVAARGS = ${JAVAARGS}") + +set(JAVA_SOURCES org/libjpegturbo/turbojpeg/TJ.java + org/libjpegturbo/turbojpeg/TJCompressor.java + org/libjpegturbo/turbojpeg/TJCustomFilter.java + org/libjpegturbo/turbojpeg/TJDecompressor.java + org/libjpegturbo/turbojpeg/TJException.java + org/libjpegturbo/turbojpeg/TJScalingFactor.java + org/libjpegturbo/turbojpeg/TJTransform.java + org/libjpegturbo/turbojpeg/TJTransformer.java + org/libjpegturbo/turbojpeg/YUVImage.java + TJUnitTest.java + TJExample.java + TJBench.java) + set(TURBOJPEG_DLL_NAME "turbojpeg") if(MINGW) set(TURBOJPEG_DLL_NAME "libturbojpeg") endif() -configure_file(org/libjpegturbo/turbojpeg/TJLoader.java.in - ${CMAKE_CURRENT_BINARY_DIR}/org/libjpegturbo/turbojpeg/TJLoader.java) - -set(JAVA_SOURCES "") -set(JAVA_CLASSES "") -set(JAVA_CLASSES_FULL "") -foreach(class ${JAVA_CLASSNAMES}) - set(JAVA_SOURCES ${JAVA_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/${class}.java) - set(JAVA_CLASSES ${JAVA_CLASSES} ${class}.class) - set(JAVA_CLASSES_FULL ${JAVA_CLASSES_FULL} ${OBJDIR}/${class}.class) -endforeach() - +if(WIN32) + configure_file(org/libjpegturbo/turbojpeg/TJLoader-win.java.in + ${CMAKE_CURRENT_BINARY_DIR}/org/libjpegturbo/turbojpeg/TJLoader.java) +else() + configure_file(org/libjpegturbo/turbojpeg/TJLoader-unix.java.in + ${CMAKE_CURRENT_BINARY_DIR}/org/libjpegturbo/turbojpeg/TJLoader.java) +endif() set(JAVA_SOURCES ${JAVA_SOURCES} ${CMAKE_CURRENT_BINARY_DIR}/org/libjpegturbo/turbojpeg/TJLoader.java) -set(JAVA_CLASSES ${JAVA_CLASSES} - org/libjpegturbo/turbojpeg/TJLoader.class) -set(JAVA_CLASSES_FULL ${JAVA_CLASSES_FULL} - ${OBJDIR}/org/libjpegturbo/turbojpeg/TJLoader.class) -string(REGEX REPLACE " " ";" JAVACFLAGS "${JAVACFLAGS}") -add_custom_command(OUTPUT ${JAVA_CLASSES_FULL} DEPENDS ${JAVA_SOURCES} - COMMAND ${JAVA_COMPILE} ARGS ${JAVACFLAGS} -d ${OBJDIR} ${JAVA_SOURCES}) - -add_custom_command(OUTPUT ${JAR_FILE} DEPENDS ${JAVA_CLASSES_FULL} - ${MANIFEST_FILE} - COMMAND ${JAVA_ARCHIVE} cfm ${JAR_FILE} ${MANIFEST_FILE} ${JAVA_CLASSES} - WORKING_DIRECTORY ${OBJDIR}) +if(MSYS) + # UGLY HACK ALERT: If we don't do this, then UseJava.cmake will separate + # class path members with a semicolon, which is interpreted as a command + # separator by the MSYS shell. + set(CMAKE_HOST_SYSTEM_NAME_BAK ${CMAKE_HOST_SYSTEM_NAME}) + set(CMAKE_HOST_SYSTEM_NAME "MSYS") +endif() +add_jar(turbojpeg-java ${JAVA_SOURCES} OUTPUT_NAME turbojpeg + ENTRY_POINT TJExample) +if(MSYS) + set(CMAKE_HOST_SYSTEM_NAME ${CMAKE_HOST_SYSTEM_NAME_BAK}) +endif() -add_custom_target(java ALL DEPENDS ${JAR_FILE}) +add_custom_target(javadoc COMMAND + javadoc -notimestamp -d ${CMAKE_CURRENT_SOURCE_DIR}/doc -sourcepath ${CMAKE_CURRENT_SOURCE_DIR} org.libjpegturbo.turbojpeg) +set(JAVACLASSPATH ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_FILES_DIRECTORY}/turbojpeg-java.dir) +if(Java_VERSION_MAJOR GREATER 9) + add_custom_target(javah + COMMAND javac -h ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} + -d ${CMAKE_CURRENT_BINARY_DIR}/__unused + ${CMAKE_CURRENT_SOURCE_DIR}/org/libjpegturbo/turbojpeg/TJ.java + ${CMAKE_CURRENT_SOURCE_DIR}/org/libjpegturbo/turbojpeg/TJCompressor.java + ${CMAKE_CURRENT_SOURCE_DIR}/org/libjpegturbo/turbojpeg/TJDecompressor.java + ${CMAKE_CURRENT_SOURCE_DIR}/org/libjpegturbo/turbojpeg/TJTransformer.java) +else() + add_custom_target(javah + COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJ + COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJCompressor + COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJDecompressor + COMMAND javah -d ${CMAKE_CURRENT_SOURCE_DIR} -classpath ${JAVACLASSPATH} org.libjpegturbo.turbojpeg.TJTransformer) +endif() -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${JAR_FILE} DESTINATION classes) +if(NOT DEFINED CMAKE_INSTALL_DEFAULT_JAVADIR) + set(CMAKE_INSTALL_DEFAULT_JAVADIR "/java") +endif() +GNUInstallDirs_set_install_dir(JAVADIR + "The directory into which Java classes should be installed") +GNUInstallDirs_get_absolute_install_dir(CMAKE_INSTALL_FULL_JAVADIR + CMAKE_INSTALL_JAVADIR) +set(CMAKE_INSTALL_JAVADIR ${CMAKE_INSTALL_JAVADIR} PARENT_SCOPE) +set(CMAKE_INSTALL_FULL_JAVADIR ${CMAKE_INSTALL_FULL_JAVADIR} PARENT_SCOPE) +report_directory(JAVADIR) +install_jar(turbojpeg-java ${CMAKE_INSTALL_JAVADIR}) +mark_as_advanced(CLEAR CMAKE_INSTALL_JAVADIR) diff --git a/java/Makefile.am b/java/Makefile.am deleted file mode 100644 index d3fc59c..0000000 --- a/java/Makefile.am +++ /dev/null @@ -1,75 +0,0 @@ -JAVAROOT = . - -org/libjpegturbo/turbojpeg/TJLoader.java: $(srcdir)/org/libjpegturbo/turbojpeg/TJLoader.java.tmpl - mkdir -p org/libjpegturbo/turbojpeg; \ - cat $(srcdir)/org/libjpegturbo/turbojpeg/TJLoader.java.tmpl | \ - sed s@%{__libdir}@$(libdir)@g > org/libjpegturbo/turbojpeg/TJLoader.java - - -JAVASOURCES = org/libjpegturbo/turbojpeg/TJ.java \ - org/libjpegturbo/turbojpeg/TJCompressor.java \ - org/libjpegturbo/turbojpeg/TJCustomFilter.java \ - org/libjpegturbo/turbojpeg/TJDecompressor.java \ - org/libjpegturbo/turbojpeg/TJException.java \ - org/libjpegturbo/turbojpeg/TJScalingFactor.java \ - org/libjpegturbo/turbojpeg/TJTransform.java \ - org/libjpegturbo/turbojpeg/TJTransformer.java \ - org/libjpegturbo/turbojpeg/YUVImage.java \ - TJExample.java \ - TJUnitTest.java \ - TJBench.java - -JNIHEADERS = org_libjpegturbo_turbojpeg_TJ.h \ - org_libjpegturbo_turbojpeg_TJCompressor.h \ - org_libjpegturbo_turbojpeg_TJDecompressor.h \ - org_libjpegturbo_turbojpeg_TJTransformer.h - -if WITH_JAVA - -nodist_noinst_JAVA = ${JAVASOURCES} org/libjpegturbo/turbojpeg/TJLoader.java - -JAVA_CLASSES = org/libjpegturbo/turbojpeg/TJ.class \ - org/libjpegturbo/turbojpeg/TJCompressor.class \ - org/libjpegturbo/turbojpeg/TJCustomFilter.class \ - org/libjpegturbo/turbojpeg/TJDecompressor.class \ - org/libjpegturbo/turbojpeg/TJException.class \ - org/libjpegturbo/turbojpeg/TJLoader.class \ - org/libjpegturbo/turbojpeg/TJScalingFactor.class \ - org/libjpegturbo/turbojpeg/TJTransform.class \ - org/libjpegturbo/turbojpeg/TJTransformer.class \ - org/libjpegturbo/turbojpeg/YUVImage.class \ - TJExample.class \ - TJUnitTest.class \ - TJBench.class - -all: all-am turbojpeg.jar - -turbojpeg.jar: classnoinst.stamp ${srcdir}/MANIFEST.MF - $(JAR) cfm turbojpeg.jar ${srcdir}/MANIFEST.MF $(JAVA_CLASSES) - -clean-local: - rm -f turbojpeg.jar - -install-exec-local: turbojpeg.jar - mkdir -p $(DESTDIR)/$(datadir)/classes - $(INSTALL) -m 644 turbojpeg.jar $(DESTDIR)/$(datadir)/classes/ - -uninstall-local: - rm -f $(DESTDIR)/$(datadir)/classes/turbojpeg.jar - if [ -d $(DESTDIR)/$(datadir)/classes ]; then rmdir $(DESTDIR)/$(datadir)/classes; fi - -headers: all - javah -d ${srcdir} org.libjpegturbo.turbojpeg.TJ; \ - javah -d ${srcdir} org.libjpegturbo.turbojpeg.TJCompressor; \ - javah -d ${srcdir} org.libjpegturbo.turbojpeg.TJDecompressor; \ - javah -d ${srcdir} org.libjpegturbo.turbojpeg.TJTransformer - -docs: all - mkdir -p ${srcdir}/doc; \ - javadoc -notimestamp -d ${srcdir}/doc -sourcepath ${srcdir} org.libjpegturbo.turbojpeg - -endif - -EXTRA_DIST = MANIFEST.MF ${JAVASOURCES} ${JNIHEADERS} doc CMakeLists.txt \ - org/libjpegturbo/turbojpeg/TJLoader.java.tmpl \ - org/libjpegturbo/turbojpeg/TJLoader.java.in diff --git a/java/TJBench.java b/java/TJBench.java index ddc414c..bd55562 100644 --- a/java/TJBench.java +++ b/java/TJBench.java @@ -1,5 +1,5 @@ /* - * Copyright (C)2009-2014, 2016-2017 D. R. Commander. All Rights Reserved. + * Copyright (C)2009-2014, 2016-2018 D. R. Commander. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,50 +32,73 @@ import javax.imageio.*; import java.util.*; import org.libjpegturbo.turbojpeg.*; -class TJBench { +final class TJBench { - static int flags = 0, quiet = 0, pf = TJ.PF_BGR, yuvpad = 1; - static boolean compOnly, decompOnly, doTile, doYUV, write = true; + private TJBench() {} - static final String[] pixFormatStr = { + private static int flags = 0, quiet = 0, pf = TJ.PF_BGR, yuvPad = 1; + private static boolean compOnly, decompOnly, doTile, doYUV, write = true; + + static final String[] PIXFORMATSTR = { "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "GRAY" }; - static final String[] subNameLong = { + static final String[] SUBNAME_LONG = { "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1" }; - static final String[] subName = { + static final String[] SUBNAME = { "444", "422", "420", "GRAY", "440", "411" }; - static final String[] csName = { + static final String[] CSNAME = { "RGB", "YCbCr", "GRAY", "CMYK", "YCCK" }; - static TJScalingFactor sf; - static int xformOp = TJTransform.OP_NONE, xformOpt = 0; - static double benchTime = 5.0, warmup = 1.0; + private static TJScalingFactor sf; + private static int xformOp = TJTransform.OP_NONE, xformOpt = 0; + private static double benchTime = 5.0, warmup = 1.0; - static final double getTime() { + static double getTime() { return (double)System.nanoTime() / 1.0e9; } + private static String tjErrorMsg; + private static int tjErrorCode = -1; + + static void handleTJException(TJException e) throws TJException { + String errorMsg = e.getMessage(); + int errorCode = e.getErrorCode(); + + if ((flags & TJ.FLAG_STOPONWARNING) == 0 && + errorCode == TJ.ERR_WARNING) { + if (tjErrorMsg == null || !tjErrorMsg.equals(errorMsg) || + tjErrorCode != errorCode) { + tjErrorMsg = errorMsg; + tjErrorCode = errorCode; + System.out.println("WARNING: " + errorMsg); + } + } else + throw e; + } + + static String formatName(int subsamp, int cs) { if (cs == TJ.CS_YCbCr) - return subNameLong[subsamp]; + return SUBNAME_LONG[subsamp]; else if (cs == TJ.CS_YCCK) - return csName[cs] + " " + subNameLong[subsamp]; + return CSNAME[cs] + " " + SUBNAME_LONG[subsamp]; else - return csName[cs]; + return CSNAME[cs]; } static String sigFig(double val, int figs) { String format; int digitsAfterDecimal = figs - (int)Math.ceil(Math.log10(Math.abs(val))); + if (digitsAfterDecimal < 1) format = new String("%.0f"); else @@ -87,10 +110,12 @@ class TJBench { static byte[] loadImage(String fileName, int[] w, int[] h, int pixelFormat) throws Exception { BufferedImage img = ImageIO.read(new File(fileName)); + if (img == null) throw new Exception("Could not read " + fileName); w[0] = img.getWidth(); h[0] = img.getHeight(); + int[] rgb = img.getRGB(0, 0, w[0], h[0], null, 0, w[0]); int ps = TJ.getPixelSize(pixelFormat); int rindex = TJ.getRedOffset(pixelFormat); @@ -98,6 +123,7 @@ class TJBench { int bindex = TJ.getBlueOffset(pixelFormat); byte[] dstBuf = new byte[w[0] * h[0] * ps]; int pixels = w[0] * h[0], dstPtr = 0, rgbPtr = 0; + while (pixels-- > 0) { dstBuf[dstPtr + rindex] = (byte)((rgb[rgbPtr] >> 16) & 0xff); dstBuf[dstPtr + gindex] = (byte)((rgb[rgbPtr] >> 8) & 0xff); @@ -117,11 +143,13 @@ class TJBench { int rindex = TJ.getRedOffset(pixelFormat); int gindex = TJ.getGreenOffset(pixelFormat); int bindex = TJ.getBlueOffset(pixelFormat); + for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++, srcPtr += ps) { int pixel = (srcBuf[srcPtr + rindex] & 0xff) << 16 | (srcBuf[srcPtr + gindex] & 0xff) << 8 | (srcBuf[srcPtr + bindex] & 0xff); + img.setRGB(x, y, pixel); } } @@ -157,7 +185,8 @@ class TJBench { if (doYUV) { int width = doTile ? tilew : scaledw; int height = doTile ? tileh : scaledh; - yuvImage = new YUVImage(width, yuvpad, height, subsamp); + + yuvImage = new YUVImage(width, yuvPad, height, subsamp); Arrays.fill(yuvImage.getBuf(), (byte)127); } @@ -167,21 +196,30 @@ class TJBench { while (true) { int tile = 0; double start = getTime(); + for (int y = 0; y < h; y += tileh) { for (int x = 0; x < w; x += tilew, tile++) { int width = doTile ? Math.min(tilew, w - x) : scaledw; int height = doTile ? Math.min(tileh, h - y) : scaledh; + tjd.setSourceImage(jpegBuf[tile], jpegSize[tile]); if (doYUV) { - yuvImage.setBuf(yuvImage.getBuf(), width, yuvpad, height, subsamp); - tjd.decompressToYUV(yuvImage, flags); + yuvImage.setBuf(yuvImage.getBuf(), width, yuvPad, height, subsamp); + try { + tjd.decompressToYUV(yuvImage, flags); + } catch (TJException e) { handleTJException(e); } double startDecode = getTime(); tjd.setSourceImage(yuvImage); - tjd.decompress(dstBuf, x, y, width, pitch, height, pf, flags); + try { + tjd.decompress(dstBuf, x, y, width, pitch, height, pf, flags); + } catch (TJException e) { handleTJException(e); } if (iter >= 0) elapsedDecode += getTime() - startDecode; - } else - tjd.decompress(dstBuf, x, y, width, pitch, height, pf, flags); + } else { + try { + tjd.decompress(dstBuf, x, y, width, pitch, height, pf, flags); + } catch (TJException e) { handleTJException(e); } + } } } elapsed += getTime() - start; @@ -194,7 +232,7 @@ class TJBench { elapsed = elapsedDecode = 0.0; } } - if(doYUV) + if (doYUV) elapsed -= elapsedDecode; tjd = null; @@ -205,16 +243,18 @@ class TJBench { if (quiet != 0) { System.out.format("%-6s%s", - sigFig((double)(w * h) / 1000000. * (double)iter / elapsed, 4), - quiet == 2 ? "\n" : " "); + sigFig((double)(w * h) / 1000000. * + (double)iter / elapsed, 4), + quiet == 2 ? "\n" : " "); if (doYUV) System.out.format("%s\n", - sigFig((double)(w * h) / 1000000. * (double)iter / elapsedDecode, 4)); + sigFig((double)(w * h) / 1000000. * + (double)iter / elapsedDecode, 4)); else if (quiet != 2) System.out.print("\n"); } else { System.out.format("%s --> Frame rate: %f fps\n", - (doYUV ? "Decomp to YUV":"Decompress "), + (doYUV ? "Decomp to YUV" : "Decompress "), (double)iter / elapsed); System.out.format(" Throughput: %f Megapixels/sec\n", (double)(w * h) / 1000000. * (double)iter / elapsed); @@ -222,7 +262,8 @@ class TJBench { System.out.format("YUV Decode --> Frame rate: %f fps\n", (double)iter / elapsedDecode); System.out.format(" Throughput: %f Megapixels/sec\n", - (double)(w * h) / 1000000. * (double)iter / elapsedDecode); + (double)(w * h) / 1000000. * + (double)iter / elapsedDecode); } } @@ -237,7 +278,7 @@ class TJBench { if (decompOnly) tempStr = new String(fileName + "_" + sizeStr + ".bmp"); else - tempStr = new String(fileName + "_" + subName[subsamp] + qualStr + + tempStr = new String(fileName + "_" + SUBNAME[subsamp] + qualStr + "_" + sizeStr + ".bmp"); saveImage(tempStr, dstBuf, scaledw, scaledh, pf); @@ -255,6 +296,7 @@ class TJBench { int lum = (int)((double)(srcBuf[rindex] & 0xff) * 0.299 + (double)(srcBuf[gindex] & 0xff) * 0.587 + (double)(srcBuf[bindex] & 0xff) * 0.114 + 0.5); + if (lum > 255) lum = 255; if (lum < 0) lum = 0; dstBuf[rindex] = (byte)Math.abs((dstBuf[rindex] & 0xff) - lum); @@ -284,15 +326,16 @@ class TJBench { int totalJpegSize = 0, tilew, tileh, i, iter; int ps = TJ.getPixelSize(pf); int ntilesw = 1, ntilesh = 1, pitch = w * ps; - String pfStr = pixFormatStr[pf]; + String pfStr = PIXFORMATSTR[pf]; YUVImage yuvImage = null; tmpBuf = new byte[pitch * h]; if (quiet == 0) System.out.format(">>>>> %s (%s) <--> JPEG %s Q%d <<<<<\n", pfStr, - (flags & TJ.FLAG_BOTTOMUP) != 0 ? "Bottom-up" : "Top-down", - subNameLong[subsamp], jpegQual); + (flags & TJ.FLAG_BOTTOMUP) != 0 ? + "Bottom-up" : "Top-down", + SUBNAME_LONG[subsamp], jpegQual); tjc = new TJCompressor(); @@ -312,14 +355,14 @@ class TJBench { if (quiet == 1) System.out.format("%-4s (%s) %-5s %-3d ", pfStr, (flags & TJ.FLAG_BOTTOMUP) != 0 ? "BU" : "TD", - subNameLong[subsamp], jpegQual); + SUBNAME_LONG[subsamp], jpegQual); for (i = 0; i < h; i++) System.arraycopy(srcBuf, w * ps * i, tmpBuf, pitch * i, w * ps); tjc.setJPEGQuality(jpegQual); tjc.setSubsamp(subsamp); if (doYUV) { - yuvImage = new YUVImage(tilew, yuvpad, tileh, subsamp); + yuvImage = new YUVImage(tilew, yuvPad, tileh, subsamp); Arrays.fill(yuvImage.getBuf(), (byte)127); } @@ -328,16 +371,19 @@ class TJBench { elapsed = elapsedEncode = 0.0; while (true) { int tile = 0; + totalJpegSize = 0; start = getTime(); for (int y = 0; y < h; y += tileh) { for (int x = 0; x < w; x += tilew, tile++) { int width = Math.min(tilew, w - x); int height = Math.min(tileh, h - y); + tjc.setSourceImage(srcBuf, x, y, width, pitch, height, pf); if (doYUV) { double startEncode = getTime(); - yuvImage.setBuf(yuvImage.getBuf(), width, yuvpad, height, + + yuvImage.setBuf(yuvImage.getBuf(), width, yuvPad, height, subsamp); tjc.encodeYUV(yuvImage, flags); if (iter >= 0) @@ -367,14 +413,17 @@ class TJBench { if (quiet != 0) { if (doYUV) System.out.format("%-6s%s", - sigFig((double)(w * h) / 1000000. * (double)iter / elapsedEncode, 4), - quiet == 2 ? "\n" : " "); + sigFig((double)(w * h) / 1000000. * + (double)iter / elapsedEncode, 4), + quiet == 2 ? "\n" : " "); System.out.format("%-6s%s", - sigFig((double)(w * h) / 1000000. * (double)iter / elapsed, 4), - quiet == 2 ? "\n" : " "); + sigFig((double)(w * h) / 1000000. * + (double)iter / elapsed, 4), + quiet == 2 ? "\n" : " "); System.out.format("%-6s%s", - sigFig((double)(w * h * ps) / (double)totalJpegSize, 4), - quiet == 2 ? "\n" : " "); + sigFig((double)(w * h * ps) / (double)totalJpegSize, + 4), + quiet == 2 ? "\n" : " "); } else { System.out.format("\n%s size: %d x %d\n", doTile ? "Tile" : "Image", tilew, tileh); @@ -386,9 +435,11 @@ class TJBench { System.out.format(" Compression ratio: %f:1\n", (double)(w * h * ps) / (double)yuvImage.getSize()); System.out.format(" Throughput: %f Megapixels/sec\n", - (double)(w * h) / 1000000. * (double)iter / elapsedEncode); + (double)(w * h) / 1000000. * + (double)iter / elapsedEncode); System.out.format(" Output bit stream: %f Megabits/sec\n", - (double)yuvImage.getSize() * 8. / 1000000. * (double)iter / elapsedEncode); + (double)yuvImage.getSize() * 8. / 1000000. * + (double)iter / elapsedEncode); } System.out.format("%s --> Frame rate: %f fps\n", doYUV ? "Comp from YUV" : "Compress ", @@ -400,12 +451,14 @@ class TJBench { System.out.format(" Throughput: %f Megapixels/sec\n", (double)(w * h) / 1000000. * (double)iter / elapsed); System.out.format(" Output bit stream: %f Megabits/sec\n", - (double)totalJpegSize * 8. / 1000000. * (double)iter / elapsed); + (double)totalJpegSize * 8. / 1000000. * + (double)iter / elapsed); } if (tilew == w && tileh == h && write) { - String tempStr = fileName + "_" + subName[subsamp] + "_" + "Q" + + String tempStr = fileName + "_" + SUBNAME[subsamp] + "_" + "Q" + jpegQual + ".jpg"; FileOutputStream fos = new FileOutputStream(tempStr); + fos.write(jpegBuf[0], 0, jpegSize[0]); fos.close(); if (quiet == 0) @@ -428,11 +481,12 @@ class TJBench { byte[] srcBuf; int[] jpegSize = null; int totalJpegSize; - int w = 0, h = 0, subsamp = -1, cs = -1, _w, _h, _tilew, _tileh, - _ntilesw, _ntilesh, _subsamp, x, y, iter; - int ntilesw = 1, ntilesh = 1; double start, elapsed; - int ps = TJ.getPixelSize(pf), tile; + int ps = TJ.getPixelSize(pf), tile, x, y, iter; + // Original image + int w = 0, h = 0, ntilesw = 1, ntilesh = 1, subsamp = -1, cs = -1; + // Transformed image + int tw, th, ttilew, ttileh, tntilesw, tntilesh, tsubsamp; FileInputStream fis = new FileInputStream(fileName); int srcSize = (int)fis.getChannel().size(); @@ -466,8 +520,9 @@ class TJBench { System.out.println("\n"); } else if (quiet == 0) System.out.format(">>>>> JPEG %s --> %s (%s) <<<<<\n", - formatName(subsamp, cs), pixFormatStr[pf], - (flags & TJ.FLAG_BOTTOMUP) != 0 ? "Bottom-up" : "Top-down"); + formatName(subsamp, cs), PIXFORMATSTR[pf], + (flags & TJ.FLAG_BOTTOMUP) != 0 ? + "Bottom-up" : "Top-down"); for (int tilew = doTile ? 16 : w, tileh = doTile ? 16 : h; ; tilew *= 2, tileh *= 2) { @@ -478,65 +533,66 @@ class TJBench { ntilesw = (w + tilew - 1) / tilew; ntilesh = (h + tileh - 1) / tileh; - _w = w; _h = h; _tilew = tilew; _tileh = tileh; + tw = w; th = h; ttilew = tilew; ttileh = tileh; if (quiet == 0) { System.out.format("\n%s size: %d x %d", (doTile ? "Tile" : "Image"), - _tilew, _tileh); + ttilew, ttileh); if (sf.getNum() != 1 || sf.getDenom() != 1) - System.out.format(" --> %d x %d", sf.getScaled(_w), - sf.getScaled(_h)); + System.out.format(" --> %d x %d", sf.getScaled(tw), + sf.getScaled(th)); System.out.println(""); } else if (quiet == 1) { - System.out.format("%-4s (%s) %-5s %-5s ", pixFormatStr[pf], + System.out.format("%-4s (%s) %-5s %-5s ", PIXFORMATSTR[pf], (flags & TJ.FLAG_BOTTOMUP) != 0 ? "BU" : "TD", - csName[cs], subNameLong[subsamp]); + CSNAME[cs], SUBNAME_LONG[subsamp]); System.out.format("%-5d %-5d ", tilew, tileh); } - _subsamp = subsamp; + tsubsamp = subsamp; if (doTile || xformOp != TJTransform.OP_NONE || xformOpt != 0) { if (xformOp == TJTransform.OP_TRANSPOSE || xformOp == TJTransform.OP_TRANSVERSE || xformOp == TJTransform.OP_ROT90 || xformOp == TJTransform.OP_ROT270) { - _w = h; _h = w; _tilew = tileh; _tileh = tilew; + tw = h; th = w; ttilew = tileh; ttileh = tilew; } if ((xformOpt & TJTransform.OPT_GRAY) != 0) - _subsamp = TJ.SAMP_GRAY; + tsubsamp = TJ.SAMP_GRAY; if (xformOp == TJTransform.OP_HFLIP || xformOp == TJTransform.OP_ROT180) - _w = _w - (_w % TJ.getMCUWidth(_subsamp)); + tw = tw - (tw % TJ.getMCUWidth(tsubsamp)); if (xformOp == TJTransform.OP_VFLIP || xformOp == TJTransform.OP_ROT180) - _h = _h - (_h % TJ.getMCUHeight(_subsamp)); + th = th - (th % TJ.getMCUHeight(tsubsamp)); if (xformOp == TJTransform.OP_TRANSVERSE || xformOp == TJTransform.OP_ROT90) - _w = _w - (_w % TJ.getMCUHeight(_subsamp)); + tw = tw - (tw % TJ.getMCUHeight(tsubsamp)); if (xformOp == TJTransform.OP_TRANSVERSE || xformOp == TJTransform.OP_ROT270) - _h = _h - (_h % TJ.getMCUWidth(_subsamp)); - _ntilesw = (_w + _tilew - 1) / _tilew; - _ntilesh = (_h + _tileh - 1) / _tileh; + th = th - (th % TJ.getMCUWidth(tsubsamp)); + tntilesw = (tw + ttilew - 1) / ttilew; + tntilesh = (th + ttileh - 1) / ttileh; if (xformOp == TJTransform.OP_TRANSPOSE || xformOp == TJTransform.OP_TRANSVERSE || xformOp == TJTransform.OP_ROT90 || xformOp == TJTransform.OP_ROT270) { - if (_subsamp == TJ.SAMP_422) - _subsamp = TJ.SAMP_440; - else if (_subsamp == TJ.SAMP_440) - _subsamp = TJ.SAMP_422; + if (tsubsamp == TJ.SAMP_422) + tsubsamp = TJ.SAMP_440; + else if (tsubsamp == TJ.SAMP_440) + tsubsamp = TJ.SAMP_422; } - TJTransform[] t = new TJTransform[_ntilesw * _ntilesh]; - jpegBuf = new byte[_ntilesw * _ntilesh][TJ.bufSize(_tilew, _tileh, subsamp)]; + TJTransform[] t = new TJTransform[tntilesw * tntilesh]; + jpegBuf = + new byte[tntilesw * tntilesh][TJ.bufSize(ttilew, ttileh, subsamp)]; - for (y = 0, tile = 0; y < _h; y += _tileh) { - for (x = 0; x < _w; x += _tilew, tile++) { + for (y = 0, tile = 0; y < th; y += ttileh) { + for (x = 0; x < tw; x += ttilew, tile++) { t[tile] = new TJTransform(); - t[tile].width = Math.min(_tilew, _w - x); - t[tile].height = Math.min(_tileh, _h - y); + t[tile].width = Math.min(ttilew, tw - x); + t[tile].height = Math.min(ttileh, th - y); t[tile].x = x; t[tile].y = y; t[tile].op = xformOp; @@ -565,15 +621,16 @@ class TJBench { } t = null; - for (tile = 0, totalJpegSize = 0; tile < _ntilesw * _ntilesh; tile++) + for (tile = 0, totalJpegSize = 0; tile < tntilesw * tntilesh; tile++) totalJpegSize += jpegSize[tile]; if (quiet != 0) { System.out.format("%-6s%s%-6s%s", - sigFig((double)(w * h) / 1000000. / elapsed, 4), - quiet == 2 ? "\n" : " ", - sigFig((double)(w * h * ps) / (double)totalJpegSize, 4), - quiet == 2 ? "\n" : " "); + sigFig((double)(w * h) / 1000000. / elapsed, 4), + quiet == 2 ? "\n" : " ", + sigFig((double)(w * h * ps) / + (double)totalJpegSize, 4), + quiet == 2 ? "\n" : " "); } else if (quiet == 0) { System.out.format("Transform --> Frame rate: %f fps\n", 1.0 / elapsed); @@ -589,19 +646,19 @@ class TJBench { } else { if (quiet == 1) System.out.print("N/A N/A "); - jpegBuf = new byte[1][TJ.bufSize(_tilew, _tileh, subsamp)]; + jpegBuf = new byte[1][TJ.bufSize(ttilew, ttileh, subsamp)]; jpegSize = new int[1]; jpegBuf[0] = srcBuf; jpegSize[0] = srcSize; } if (w == tilew) - _tilew = _w; + ttilew = tw; if (h == tileh) - _tileh = _h; + ttileh = th; if ((xformOpt & TJTransform.OPT_NOOUTPUT) == 0) - decomp(null, jpegBuf, jpegSize, null, _w, _h, _subsamp, 0, - fileName, _tilew, _tileh); + decomp(null, jpegBuf, jpegSize, null, tw, th, tsubsamp, 0, + fileName, ttilew, ttileh); else if (quiet == 1) System.out.println("N/A"); @@ -636,6 +693,8 @@ class TJBench { System.out.println(" codec"); System.out.println("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the"); System.out.println(" underlying codec"); + System.out.println("-progressive = Use progressive entropy coding in JPEG images generated by"); + System.out.println(" compression and transform operations."); System.out.println("-subsamp = When testing JPEG compression, this option specifies the level"); System.out.println(" of chrominance subsampling to use ( = 444, 422, 440, 420, 411, or"); System.out.println(" GRAY). The default is to test Grayscale, 4:2:0, 4:2:2, and 4:4:4 in"); @@ -667,13 +726,18 @@ class TJBench { System.out.println(" decompression (these options are mutually exclusive)"); System.out.println("-grayscale = Perform lossless grayscale conversion prior to decompression"); System.out.println(" test (can be combined with the other transforms above)"); + System.out.println("-copynone = Do not copy any extra markers (including EXIF and ICC profile data)"); + System.out.println(" when transforming the image."); System.out.println("-benchtime = Run each benchmark for at least seconds (default = 5.0)"); System.out.println("-warmup = Run each benchmark for seconds (default = 1.0) prior to"); System.out.println(" starting the timer, in order to prime the caches and thus improve the"); System.out.println(" consistency of the results."); System.out.println("-componly = Stop after running compression tests. Do not test decompression."); System.out.println("-nowrite = Do not write reference or output images (improves consistency"); - System.out.println(" of performance measurements.)\n"); + System.out.println(" of performance measurements.)"); + System.out.println("-stoponwarning = Immediately discontinue the current"); + System.out.println(" compression/decompression/transform operation if the underlying codec"); + System.out.println(" throws a warning (non-fatal error)\n"); System.out.println("NOTE: If the quality is specified as a range (e.g. 90-100), a separate"); System.out.println("test will be performed for all quality values in the range.\n"); System.exit(1); @@ -681,9 +745,9 @@ class TJBench { public static void main(String[] argv) { - byte[] srcBuf = null; int w = 0, h = 0; - int minQual = -1, maxQual = -1; - int minArg = 1; int retval = 0; + byte[] srcBuf = null; + int w = 0, h = 0, minQual = -1, maxQual = -1; + int minArg = 1, retval = 0; int subsamp = -1; try { @@ -720,20 +784,19 @@ class TJBench { for (int i = minArg; i < argv.length; i++) { if (argv[i].equalsIgnoreCase("-tile")) { doTile = true; xformOpt |= TJTransform.OPT_CROP; - } - else if (argv[i].equalsIgnoreCase("-fastupsample")) { + } else if (argv[i].equalsIgnoreCase("-fastupsample")) { System.out.println("Using fast upsampling code\n"); flags |= TJ.FLAG_FASTUPSAMPLE; - } - else if (argv[i].equalsIgnoreCase("-fastdct")) { + } else if (argv[i].equalsIgnoreCase("-fastdct")) { System.out.println("Using fastest DCT/IDCT algorithm\n"); flags |= TJ.FLAG_FASTDCT; - } - else if (argv[i].equalsIgnoreCase("-accuratedct")) { + } else if (argv[i].equalsIgnoreCase("-accuratedct")) { System.out.println("Using most accurate DCT/IDCT algorithm\n"); flags |= TJ.FLAG_ACCURATEDCT; - } - else if (argv[i].equalsIgnoreCase("-rgb")) + } else if (argv[i].equalsIgnoreCase("-progressive")) { + System.out.println("Using progressive entropy coding\n"); + flags |= TJ.FLAG_PROGRESSIVE; + } else if (argv[i].equalsIgnoreCase("-rgb")) pf = TJ.PF_RGB; else if (argv[i].equalsIgnoreCase("-rgbx")) pf = TJ.PF_RGBX; @@ -755,26 +818,27 @@ class TJBench { int temp1 = 0, temp2 = 0; boolean match = false, scanned = true; Scanner scanner = new Scanner(argv[++i]).useDelimiter("/"); + try { temp1 = scanner.nextInt(); temp2 = scanner.nextInt(); - } catch(Exception e) {} + } catch (Exception e) {} if (temp2 <= 0) temp2 = 1; if (temp1 > 0) { TJScalingFactor[] scalingFactors = TJ.getScalingFactors(); + for (int j = 0; j < scalingFactors.length; j++) { if ((double)temp1 / (double)temp2 == (double)scalingFactors[j].getNum() / (double)scalingFactors[j].getDenom()) { sf = scalingFactors[j]; - match = true; break; + match = true; break; } } if (!match) usage(); } else usage(); - } - else if (argv[i].equalsIgnoreCase("-hflip")) + } else if (argv[i].equalsIgnoreCase("-hflip")) xformOp = TJTransform.OP_HFLIP; else if (argv[i].equalsIgnoreCase("-vflip")) xformOp = TJTransform.OP_VFLIP; @@ -792,8 +856,12 @@ class TJBench { xformOpt |= TJTransform.OPT_GRAY; else if (argv[i].equalsIgnoreCase("-nooutput")) xformOpt |= TJTransform.OPT_NOOUTPUT; - else if (argv[i].equalsIgnoreCase("-benchtime") && i < argv.length - 1) { + else if (argv[i].equalsIgnoreCase("-copynone")) + xformOpt |= TJTransform.OPT_COPYNONE; + else if (argv[i].equalsIgnoreCase("-benchtime") && + i < argv.length - 1) { double temp = -1; + try { temp = Double.parseDouble(argv[++i]); } catch (NumberFormatException e) {} @@ -801,20 +869,32 @@ class TJBench { benchTime = temp; else usage(); - } - else if (argv[i].equalsIgnoreCase("-yuv")) { + } else if (argv[i].equalsIgnoreCase("-warmup") && + i < argv.length - 1) { + double temp = -1; + + try { + temp = Double.parseDouble(argv[++i]); + } catch (NumberFormatException e) {} + if (temp >= 0.0) { + warmup = temp; + System.out.format("Warmup time = %.1f seconds\n\n", warmup); + } else + usage(); + } else if (argv[i].equalsIgnoreCase("-yuv")) { System.out.println("Testing YUV planar encoding/decoding\n"); doYUV = true; - } - else if (argv[i].equalsIgnoreCase("-yuvpad") && i < argv.length - 1) { + } else if (argv[i].equalsIgnoreCase("-yuvpad") && + i < argv.length - 1) { int temp = 0; + try { - temp = Integer.parseInt(argv[++i]); + temp = Integer.parseInt(argv[++i]); } catch (NumberFormatException e) {} if (temp >= 1) - yuvpad = temp; - } - else if (argv[i].equalsIgnoreCase("-subsamp") && i < argv.length - 1) { + yuvPad = temp; + } else if (argv[i].equalsIgnoreCase("-subsamp") && + i < argv.length - 1) { i++; if (argv[i].toUpperCase().startsWith("G")) subsamp = TJ.SAMP_GRAY; @@ -828,22 +908,12 @@ class TJBench { subsamp = TJ.SAMP_420; else if (argv[i].equals("411")) subsamp = TJ.SAMP_411; - } - else if (argv[i].equalsIgnoreCase("-componly")) + } else if (argv[i].equalsIgnoreCase("-componly")) compOnly = true; else if (argv[i].equalsIgnoreCase("-nowrite")) write = false; - else if (argv[i].equalsIgnoreCase("-warmup") && i < argv.length - 1) { - double temp = -1; - try { - temp = Double.parseDouble(argv[++i]); - } catch (NumberFormatException e) {} - if (temp >= 0.0) { - warmup = temp; - System.out.format("Warmup time = %.1f seconds\n\n", warmup); - } else - usage(); - } + else if (argv[i].equalsIgnoreCase("-stoponwarning")) + flags |= TJ.FLAG_STOPONWARNING; else usage(); } } @@ -859,6 +929,7 @@ class TJBench { if (!decompOnly) { int[] width = new int[1], height = new int[1]; + srcBuf = loadImage(argv[0], width, height, pf); w = width[0]; h = height[0]; int index = -1; @@ -869,7 +940,8 @@ class TJBench { if (quiet == 1 && !decompOnly) { System.out.println("All performance values in Mpixels/sec\n"); System.out.format("Bitmap JPEG JPEG %s %s ", - (doTile ? "Tile " : "Image"), (doTile ? "Tile " : "Image")); + (doTile ? "Tile " : "Image"), + (doTile ? "Tile " : "Image")); if (doYUV) System.out.print("Encode "); System.out.print("Comp Comp Decomp "); @@ -915,7 +987,13 @@ class TJBench { } } catch (Exception e) { - System.out.println("ERROR: " + e.getMessage()); + if (e instanceof TJException) { + TJException tje = (TJException)e; + + System.out.println((tje.getErrorCode() == TJ.ERR_WARNING ? + "WARNING: " : "ERROR: ") + tje.getMessage()); + } else + System.out.println("ERROR: " + e.getMessage()); e.printStackTrace(); retval = -1; } diff --git a/java/TJExample.java b/java/TJExample.java index 835a5b9..7859886 100644 --- a/java/TJExample.java +++ b/java/TJExample.java @@ -1,6 +1,6 @@ /* - * Copyright (C)2011-2012, 2014-2015, 2017 D. R. Commander. - * All Rights Reserved. + * Copyright (C)2011-2012, 2014-2015, 2017-2018 D. R. Commander. + * All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,8 +28,8 @@ */ /* - * This program demonstrates how to compress and decompress JPEG files using - * the TurboJPEG JNI wrapper + * This program demonstrates how to compress, decompress, and transform JPEG + * images using the TurboJPEG Java API */ import java.io.*; @@ -40,138 +40,178 @@ import javax.imageio.*; import javax.swing.*; import org.libjpegturbo.turbojpeg.*; -public class TJExample implements TJCustomFilter { - public static final String classname = new TJExample().getClass().getName(); +@SuppressWarnings("checkstyle:JavadocType") +class TJExample implements TJCustomFilter { - private static void usage() throws Exception { - System.out.println("\nUSAGE: java " + classname + " [options]\n"); - System.out.println("Input and output files can be any image format that the Java Image I/O"); + static final String CLASS_NAME = + new TJExample().getClass().getName(); + + static final int DEFAULT_SUBSAMP = TJ.SAMP_444; + static final int DEFAULT_QUALITY = 95; + + + static final String[] SUBSAMP_NAME = { + "4:4:4", "4:2:2", "4:2:0", "Grayscale", "4:4:0", "4:1:1" + }; + + static final String[] COLORSPACE_NAME = { + "RGB", "YCbCr", "GRAY", "CMYK", "YCCK" + }; + + + /* DCT filter example. This produces a negative of the image. */ + + @SuppressWarnings("checkstyle:JavadocMethod") + public void customFilter(ShortBuffer coeffBuffer, Rectangle bufferRegion, + Rectangle planeRegion, int componentIndex, + int transformIndex, TJTransform transform) + throws TJException { + for (int i = 0; i < bufferRegion.width * bufferRegion.height; i++) { + coeffBuffer.put(i, (short)(-coeffBuffer.get(i))); + } + } + + + static void usage() throws Exception { + System.out.println("\nUSAGE: java [Java options] " + CLASS_NAME + + " [options]\n"); + + System.out.println("Input and output images can be in any image format that the Java Image I/O"); System.out.println("extensions understand. If either filename ends in a .jpg extension, then"); - System.out.println("TurboJPEG will be used to compress or decompress the file.\n"); - System.out.println("Options:\n"); - System.out.println("-scale M/N = if the input image is a JPEG file, scale the width/height of the"); - System.out.print(" output image by a factor of M/N (M/N = "); - for (int i = 0; i < sf.length; i++) { - System.out.print(sf[i].getNum() + "/" + sf[i].getDenom()); - if (sf.length == 2 && i != sf.length - 1) + System.out.println("the TurboJPEG API will be used to compress or decompress the image.\n"); + + System.out.println("Compression Options (used if the output image is a JPEG image)"); + System.out.println("--------------------------------------------------------------\n"); + + System.out.println("-subsamp <444|422|420|gray> = Apply this level of chrominance subsampling when"); + System.out.println(" compressing the output image. The default is to use the same level of"); + System.out.println(" subsampling as in the input image, if the input image is also a JPEG"); + System.out.println(" image, or to use grayscale if the input image is a grayscale non-JPEG"); + System.out.println(" image, or to use " + + SUBSAMP_NAME[DEFAULT_SUBSAMP] + + " subsampling otherwise.\n"); + + System.out.println("-q <1-100> = Compress the output image with this JPEG quality level"); + System.out.println(" (default = " + DEFAULT_QUALITY + ").\n"); + + System.out.println("Decompression Options (used if the input image is a JPEG image)"); + System.out.println("---------------------------------------------------------------\n"); + + System.out.println("-scale M/N = Scale the input image by a factor of M/N when decompressing it."); + System.out.print("(M/N = "); + for (int i = 0; i < SCALING_FACTORS.length; i++) { + System.out.print(SCALING_FACTORS[i].getNum() + "/" + + SCALING_FACTORS[i].getDenom()); + if (SCALING_FACTORS.length == 2 && i != SCALING_FACTORS.length - 1) System.out.print(" or "); - else if (sf.length > 2) { - if (i != sf.length - 1) + else if (SCALING_FACTORS.length > 2) { + if (i != SCALING_FACTORS.length - 1) System.out.print(", "); - if (i == sf.length - 2) + if (i == SCALING_FACTORS.length - 2) System.out.print("or "); } } System.out.println(")\n"); - System.out.println("-samp <444|422|420|gray> = If the output image is a JPEG file, this specifies"); - System.out.println(" the level of chrominance subsampling to use when"); - System.out.println(" recompressing it. Default is to use the same level"); - System.out.println(" of subsampling as the input, if the input is a JPEG"); - System.out.println(" file, or 4:4:4 otherwise.\n"); - System.out.println("-q <1-100> = If the output image is a JPEG file, this specifies the JPEG"); - System.out.println(" quality to use when recompressing it (default = 95).\n"); + System.out.println("-hflip, -vflip, -transpose, -transverse, -rot90, -rot180, -rot270 ="); - System.out.println(" If the input image is a JPEG file, perform the corresponding lossless"); - System.out.println(" transform prior to decompression (these options are mutually exclusive)\n"); - System.out.println("-grayscale = If the input image is a JPEG file, perform lossless grayscale"); - System.out.println(" conversion prior to decompression (can be combined with the other"); - System.out.println(" transforms above)\n"); - System.out.println("-crop X,Y,WxH = If the input image is a JPEG file, perform lossless cropping"); - System.out.println(" prior to decompression. X,Y specifies the upper left corner of the"); - System.out.println(" cropping region, and WxH specifies its width and height. X,Y must be"); - System.out.println(" evenly divible by the MCU block size (8x8 if the source image was"); - System.out.println(" compressed using no subsampling or grayscale, or 16x8 for 4:2:2 or 16x16"); - System.out.println(" for 4:2:0.)\n"); - System.out.println("-display = Display output image (Output file need not be specified in this"); + System.out.println(" Perform one of these lossless transform operations on the input image"); + System.out.println(" prior to decompressing it (these options are mutually exclusive.)\n"); + + System.out.println("-grayscale = Perform lossless grayscale conversion on the input image prior"); + System.out.println(" to decompressing it (can be combined with the other transform operations"); + System.out.println(" above.)\n"); + + System.out.println("-crop WxH+X+Y = Perform lossless cropping on the input image prior to"); + System.out.println(" decompressing it. X and Y specify the upper left corner of the cropping"); + System.out.println(" region, and W and H specify the width and height of the cropping region."); + System.out.println(" X and Y must be evenly divible by the MCU block size (8x8 if the input"); + System.out.println(" image was compressed using no subsampling or grayscale, 16x8 if it was"); + System.out.println(" compressed using 4:2:2 subsampling, or 16x16 if it was compressed using"); + System.out.println(" 4:2:0 subsampling.)\n"); + + System.out.println("General Options"); + System.out.println("---------------\n"); + + System.out.println("-display = Display output image (Output filename need not be specified in this"); System.out.println(" case.)\n"); + System.out.println("-fastupsample = Use the fastest chrominance upsampling algorithm available in"); - System.out.println(" the underlying codec\n"); + System.out.println(" the underlying codec.\n"); + System.out.println("-fastdct = Use the fastest DCT/IDCT algorithms available in the underlying"); - System.out.println(" codec\n"); + System.out.println(" codec.\n"); + System.out.println("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the"); - System.out.println(" underlying codec\n"); + System.out.println(" underlying codec.\n"); + System.exit(1); } - private static final String[] sampName = { - "4:4:4", "4:2:2", "4:2:0", "Grayscale", "4:4:0", "4:1:1" - }; public static void main(String[] argv) { - BufferedImage img = null; - byte[] bmpBuf = null; - TJTransform xform = new TJTransform(); - int flags = 0; - try { - sf = TJ.getScalingFactors(); + TJScalingFactor scalingFactor = new TJScalingFactor(1, 1); + int outSubsamp = -1, outQual = -1; + TJTransform xform = new TJTransform(); + boolean display = false; + int flags = 0; + int width, height; + String inFormat = "jpg", outFormat = "jpg"; + BufferedImage img = null; + byte[] imgBuf = null; - if (argv.length < 2) { + if (argv.length < 2) usage(); - } - - TJScalingFactor scaleFactor = new TJScalingFactor(1, 1); - String inFormat = "jpg", outFormat = "jpg"; - int outSubsamp = -1, outQual = 95; - boolean display = false; if (argv[1].substring(0, 2).equalsIgnoreCase("-d")) display = true; + /* Parse arguments. */ for (int i = 2; i < argv.length; i++) { if (argv[i].length() < 2) continue; else if (argv[i].length() > 2 && - argv[i].substring(0, 3).equalsIgnoreCase("-sc")) { + argv[i].substring(0, 3).equalsIgnoreCase("-sc") && + i < argv.length - 1) { int match = 0; - if (i < argv.length - 1) { - String[] scaleArg = argv[++i].split("/"); - if (scaleArg.length == 2) { - TJScalingFactor tempsf = - new TJScalingFactor(Integer.parseInt(scaleArg[0]), - Integer.parseInt(scaleArg[1])); - for (int j = 0; j < sf.length; j++) { - if (tempsf.equals(sf[j])) { - scaleFactor = sf[j]; - match = 1; - break; - } + String[] scaleArg = argv[++i].split("/"); + if (scaleArg.length == 2) { + TJScalingFactor tempsf = + new TJScalingFactor(Integer.parseInt(scaleArg[0]), + Integer.parseInt(scaleArg[1])); + for (int j = 0; j < SCALING_FACTORS.length; j++) { + if (tempsf.equals(SCALING_FACTORS[j])) { + scalingFactor = SCALING_FACTORS[j]; + match = 1; + break; } } } - if (match != 1) usage(); - } - else if (argv[i].length() > 2 && - argv[i].substring(0, 3).equalsIgnoreCase("-sa")) { - if (i < argv.length - 1) { - i++; - if (argv[i].substring(0, 1).equalsIgnoreCase("g")) - outSubsamp = TJ.SAMP_GRAY; - else if (argv[i].equals("444")) - outSubsamp = TJ.SAMP_444; - else if (argv[i].equals("422")) - outSubsamp = TJ.SAMP_422; - else if (argv[i].equals("420")) - outSubsamp = TJ.SAMP_420; - else - usage(); - } else + if (match != 1) usage(); - } - else if (argv[i].substring(0, 2).equalsIgnoreCase("-q")) { - if (i < argv.length - 1) { - int qual = Integer.parseInt(argv[++i]); - if (qual >= 1 && qual <= 100) - outQual = qual; - else - usage(); - } else + } else if (argv[i].length() > 2 && + argv[i].substring(0, 3).equalsIgnoreCase("-su") && + i < argv.length - 1) { + i++; + if (argv[i].substring(0, 1).equalsIgnoreCase("g")) + outSubsamp = TJ.SAMP_GRAY; + else if (argv[i].equals("444")) + outSubsamp = TJ.SAMP_444; + else if (argv[i].equals("422")) + outSubsamp = TJ.SAMP_422; + else if (argv[i].equals("420")) + outSubsamp = TJ.SAMP_420; + else usage(); - } - else if (argv[i].substring(0, 2).equalsIgnoreCase("-g")) + } else if (argv[i].substring(0, 2).equalsIgnoreCase("-q") && + i < argv.length - 1) { + outQual = Integer.parseInt(argv[++i]); + if (outQual < 1 || outQual > 100) + usage(); + } else if (argv[i].substring(0, 2).equalsIgnoreCase("-g")) xform.options |= TJTransform.OPT_GRAY; else if (argv[i].equalsIgnoreCase("-hflip")) xform.op = TJTransform.OP_HFLIP; @@ -190,43 +230,34 @@ public class TJExample implements TJCustomFilter { else if (argv[i].equalsIgnoreCase("-custom")) xform.cf = new TJExample(); else if (argv[i].length() > 2 && - argv[i].substring(0, 2).equalsIgnoreCase("-c")) { - if (i >= argv.length - 1) - usage(); - String[] cropArg = argv[++i].split(","); - if (cropArg.length != 3) - usage(); - String[] dimArg = cropArg[2].split("[xX]"); - if (dimArg.length != 2) + argv[i].substring(0, 2).equalsIgnoreCase("-c") && + i < argv.length - 1) { + String[] cropArg = argv[++i].split("[x\\+]"); + if (cropArg.length != 4) usage(); - int tempx = Integer.parseInt(cropArg[0]); - int tempy = Integer.parseInt(cropArg[1]); - int tempw = Integer.parseInt(dimArg[0]); - int temph = Integer.parseInt(dimArg[1]); - if (tempx < 0 || tempy < 0 || tempw < 0 || temph < 0) + xform.width = Integer.parseInt(cropArg[0]); + xform.height = Integer.parseInt(cropArg[1]); + xform.x = Integer.parseInt(cropArg[2]); + xform.y = Integer.parseInt(cropArg[3]); + if (xform.x < 0 || xform.y < 0 || xform.width < 1 || + xform.height < 1) usage(); - xform.x = tempx; - xform.y = tempy; - xform.width = tempw; - xform.height = temph; xform.options |= TJTransform.OPT_CROP; - } - else if (argv[i].substring(0, 2).equalsIgnoreCase("-d")) + } else if (argv[i].substring(0, 2).equalsIgnoreCase("-d")) display = true; else if (argv[i].equalsIgnoreCase("-fastupsample")) { System.out.println("Using fast upsampling code"); flags |= TJ.FLAG_FASTUPSAMPLE; - } - else if (argv[i].equalsIgnoreCase("-fastdct")) { + } else if (argv[i].equalsIgnoreCase("-fastdct")) { System.out.println("Using fastest DCT/IDCT algorithm"); flags |= TJ.FLAG_FASTDCT; - } - else if (argv[i].equalsIgnoreCase("-accuratedct")) { + } else if (argv[i].equalsIgnoreCase("-accuratedct")) { System.out.println("Using most accurate DCT/IDCT algorithm"); flags |= TJ.FLAG_ACCURATEDCT; - } - else usage(); + } else usage(); } + + /* Determine input and output image formats based on file extensions. */ String[] inFileTokens = argv[0].split("\\."); if (inFileTokens.length > 1) inFormat = inFileTokens[inFileTokens.length - 1]; @@ -239,61 +270,75 @@ public class TJExample implements TJCustomFilter { outFormat = outFileTokens[outFileTokens.length - 1]; } - File file = new File(argv[0]); - int width, height; - if (inFormat.equalsIgnoreCase("jpg")) { - FileInputStream fis = new FileInputStream(file); - int inputSize = fis.available(); - if (inputSize < 1) { + /* Input image is a JPEG image. Decompress and/or transform it. */ + boolean doTransform = (xform.op != TJTransform.OP_NONE || + xform.options != 0 || xform.cf != null); + + /* Read the JPEG file into memory. */ + File jpegFile = new File(argv[0]); + FileInputStream fis = new FileInputStream(jpegFile); + int jpegSize = fis.available(); + if (jpegSize < 1) { System.out.println("Input file contains no data"); System.exit(1); } - byte[] inputBuf = new byte[inputSize]; - fis.read(inputBuf); + byte[] jpegBuf = new byte[jpegSize]; + fis.read(jpegBuf); fis.close(); TJDecompressor tjd; - if (xform.op != TJTransform.OP_NONE || xform.options != 0 || - xform.cf != null) { - TJTransformer tjt = new TJTransformer(inputBuf); - TJTransform[] t = new TJTransform[1]; - t[0] = xform; - t[0].options |= TJTransform.OPT_TRIM; - TJDecompressor[] tjdx = tjt.transform(t, 0); - tjd = tjdx[0]; + if (doTransform) { + /* Transform it. */ + TJTransformer tjt = new TJTransformer(jpegBuf); + TJTransform[] xforms = new TJTransform[1]; + xforms[0] = xform; + xforms[0].options |= TJTransform.OPT_TRIM; + TJDecompressor[] tjds = tjt.transform(xforms, 0); + tjd = tjds[0]; + tjt.close(); } else - tjd = new TJDecompressor(inputBuf); + tjd = new TJDecompressor(jpegBuf); width = tjd.getWidth(); height = tjd.getHeight(); int inSubsamp = tjd.getSubsamp(); - System.out.println("Source Image: " + width + " x " + height + - " pixels, " + sampName[inSubsamp] + " subsampling"); - if (outSubsamp < 0) - outSubsamp = inSubsamp; + int inColorspace = tjd.getColorspace(); - if (outFormat.equalsIgnoreCase("jpg") && - (xform.op != TJTransform.OP_NONE || xform.options != 0) && - scaleFactor.isOne()) { - file = new File(argv[1]); - FileOutputStream fos = new FileOutputStream(file); + System.out.println((doTransform ? "Transformed" : "Input") + + " Image (jpg): " + width + " x " + height + + " pixels, " + SUBSAMP_NAME[inSubsamp] + + " subsampling, " + COLORSPACE_NAME[inColorspace]); + + if (outFormat.equalsIgnoreCase("jpg") && doTransform && + scalingFactor.isOne() && outSubsamp < 0 && outQual < 0) { + /* Input image has been transformed, and no re-compression options + have been selected. Write the transformed image to disk and + exit. */ + File outFile = new File(argv[1]); + FileOutputStream fos = new FileOutputStream(outFile); fos.write(tjd.getJPEGBuf(), 0, tjd.getJPEGSize()); fos.close(); System.exit(0); } - width = scaleFactor.getScaled(width); - height = scaleFactor.getScaled(height); + /* Scaling and/or a non-JPEG output image format and/or compression + options have been selected, so we need to decompress the + input/transformed image. */ + width = scalingFactor.getScaled(width); + height = scalingFactor.getScaled(height); + if (outSubsamp < 0) + outSubsamp = inSubsamp; if (!outFormat.equalsIgnoreCase("jpg")) img = tjd.decompress(width, height, BufferedImage.TYPE_INT_RGB, flags); else - bmpBuf = tjd.decompress(width, 0, height, TJ.PF_BGRX, flags); + imgBuf = tjd.decompress(width, 0, height, TJ.PF_BGRX, flags); tjd.close(); } else { - img = ImageIO.read(file); + /* Input image is not a JPEG image. Load it into memory. */ + img = ImageIO.read(new File(argv[0])); if (img == null) throw new Exception("Input image type not supported."); width = img.getWidth(); @@ -302,61 +347,59 @@ public class TJExample implements TJCustomFilter { if (img.getType() == BufferedImage.TYPE_BYTE_GRAY) outSubsamp = TJ.SAMP_GRAY; else - outSubsamp = TJ.SAMP_444; + outSubsamp = DEFAULT_SUBSAMP; } + System.out.println("Input Image: " + width + " x " + height + + " pixels"); } System.gc(); if (!display) - System.out.print("Dest. Image (" + outFormat + "): " + width + " x " + - height + " pixels"); + System.out.print("Output Image (" + outFormat + "): " + width + + " x " + height + " pixels"); if (display) { + /* Display the uncompressed image */ ImageIcon icon = new ImageIcon(img); JLabel label = new JLabel(icon, JLabel.CENTER); JOptionPane.showMessageDialog(null, label, "Output Image", JOptionPane.PLAIN_MESSAGE); } else if (outFormat.equalsIgnoreCase("jpg")) { - System.out.println(", " + sampName[outSubsamp] + + /* Output image format is JPEG. Compress the uncompressed image. */ + if (outQual < 0) + outQual = DEFAULT_QUALITY; + System.out.println(", " + SUBSAMP_NAME[outSubsamp] + " subsampling, quality = " + outQual); - TJCompressor tjc = new TJCompressor(); - int jpegSize; - byte[] jpegBuf; + TJCompressor tjc = new TJCompressor(); tjc.setSubsamp(outSubsamp); tjc.setJPEGQuality(outQual); if (img != null) tjc.setSourceImage(img, 0, 0, 0, 0); - else { - tjc.setSourceImage(bmpBuf, 0, 0, width, 0, height, TJ.PF_BGRX); - } - jpegBuf = tjc.compress(flags); - jpegSize = tjc.getCompressedSize(); + else + tjc.setSourceImage(imgBuf, 0, 0, width, 0, height, TJ.PF_BGRX); + byte[] jpegBuf = tjc.compress(flags); + int jpegSize = tjc.getCompressedSize(); tjc.close(); - file = new File(argv[1]); - FileOutputStream fos = new FileOutputStream(file); + /* Write the JPEG image to disk. */ + File outFile = new File(argv[1]); + FileOutputStream fos = new FileOutputStream(outFile); fos.write(jpegBuf, 0, jpegSize); fos.close(); } else { + /* Output image format is not JPEG. Save the uncompressed image + directly to disk. */ System.out.print("\n"); - file = new File(argv[1]); - ImageIO.write(img, outFormat, file); + File outFile = new File(argv[1]); + ImageIO.write(img, outFormat, outFile); } - } catch(Exception e) { + } catch (Exception e) { e.printStackTrace(); System.exit(-1); } } - public void customFilter(ShortBuffer coeffBuffer, Rectangle bufferRegion, - Rectangle planeRegion, int componentIndex, - int transformIndex, TJTransform transform) - throws TJException { - for (int i = 0; i < bufferRegion.width * bufferRegion.height; i++) { - coeffBuffer.put(i, (short)(-coeffBuffer.get(i))); - } - } - - static TJScalingFactor[] sf = null; + static final TJScalingFactor[] SCALING_FACTORS = + TJ.getScalingFactors(); }; diff --git a/java/TJUnitTest.java b/java/TJUnitTest.java index 47ff7bb..91ad5fd 100644 --- a/java/TJUnitTest.java +++ b/java/TJUnitTest.java @@ -1,5 +1,5 @@ /* - * Copyright (C)2011-2017 D. R. Commander. All Rights Reserved. + * Copyright (C)2011-2018 D. R. Commander. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,13 +37,16 @@ import javax.imageio.*; import java.nio.*; import org.libjpegturbo.turbojpeg.*; -public class TJUnitTest { +@SuppressWarnings("checkstyle:JavadocType") +final class TJUnitTest { - private static final String classname = + private TJUnitTest() {} + + static final String CLASS_NAME = new TJUnitTest().getClass().getName(); - private static void usage() { - System.out.println("\nUSAGE: java " + classname + " [options]\n"); + static void usage() { + System.out.println("\nUSAGE: java " + CLASS_NAME + " [options]\n"); System.out.println("Options:"); System.out.println("-yuv = test YUV encoding/decoding support"); System.out.println("-noyuvpad = do not pad each line of each Y, U, and V plane to the nearest"); @@ -52,43 +55,39 @@ public class TJUnitTest { System.exit(1); } - private static final String[] subNameLong = { + static final String[] SUBNAME_LONG = { "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1" }; - private static final String[] subName = { + static final String[] SUBNAME = { "444", "422", "420", "GRAY", "440", "411" }; - private static final String[] pixFormatStr = { + static final String[] PIXFORMATSTR = { "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "Grayscale", "RGBA", "BGRA", "ABGR", "ARGB", "CMYK" }; - private static final int[] alphaOffset = { - -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1 - }; - - private static final int[] _3byteFormats = { + static final int[] FORMATS_3BYTE = { TJ.PF_RGB, TJ.PF_BGR }; - private static final int[] _3byteFormatsBI = { + static final int[] FORMATS_3BYTEBI = { BufferedImage.TYPE_3BYTE_BGR }; - private static final int[] _4byteFormats = { + static final int[] FORMATS_4BYTE = { TJ.PF_RGBX, TJ.PF_BGRX, TJ.PF_XBGR, TJ.PF_XRGB, TJ.PF_CMYK }; - private static final int[] _4byteFormatsBI = { + static final int[] FORMATS_4BYTEBI = { BufferedImage.TYPE_INT_BGR, BufferedImage.TYPE_INT_RGB, BufferedImage.TYPE_4BYTE_ABGR, BufferedImage.TYPE_4BYTE_ABGR_PRE, BufferedImage.TYPE_INT_ARGB, BufferedImage.TYPE_INT_ARGB_PRE }; - private static final int[] onlyGray = { + static final int[] FORMATS_GRAY = { TJ.PF_GRAY }; - private static final int[] onlyGrayBI = { + static final int[] FORMATS_GRAYBI = { BufferedImage.TYPE_BYTE_GRAY }; - private static final int[] onlyRGB = { + static final int[] FORMATS_RGB = { TJ.PF_RGB }; @@ -98,55 +97,57 @@ public class TJUnitTest { private static int exitStatus = 0; - private static int biTypePF(int biType) { + static int biTypePF(int biType) { ByteOrder byteOrder = ByteOrder.nativeOrder(); - switch(biType) { - case BufferedImage.TYPE_3BYTE_BGR: - return TJ.PF_BGR; - case BufferedImage.TYPE_4BYTE_ABGR: - case BufferedImage.TYPE_4BYTE_ABGR_PRE: - return TJ.PF_ABGR; - case BufferedImage.TYPE_BYTE_GRAY: - return TJ.PF_GRAY; - case BufferedImage.TYPE_INT_BGR: - return TJ.PF_RGBX; - case BufferedImage.TYPE_INT_RGB: - return TJ.PF_BGRX; - case BufferedImage.TYPE_INT_ARGB: - case BufferedImage.TYPE_INT_ARGB_PRE: - return TJ.PF_BGRA; + switch (biType) { + case BufferedImage.TYPE_3BYTE_BGR: + return TJ.PF_BGR; + case BufferedImage.TYPE_4BYTE_ABGR: + case BufferedImage.TYPE_4BYTE_ABGR_PRE: + return TJ.PF_ABGR; + case BufferedImage.TYPE_BYTE_GRAY: + return TJ.PF_GRAY; + case BufferedImage.TYPE_INT_BGR: + return TJ.PF_RGBX; + case BufferedImage.TYPE_INT_RGB: + return TJ.PF_BGRX; + case BufferedImage.TYPE_INT_ARGB: + case BufferedImage.TYPE_INT_ARGB_PRE: + return TJ.PF_BGRA; + default: + return 0; } - return 0; } - private static String biTypeStr(int biType) { - switch(biType) { - case BufferedImage.TYPE_3BYTE_BGR: - return "3BYTE_BGR"; - case BufferedImage.TYPE_4BYTE_ABGR: - return "4BYTE_ABGR"; - case BufferedImage.TYPE_4BYTE_ABGR_PRE: - return "4BYTE_ABGR_PRE"; - case BufferedImage.TYPE_BYTE_GRAY: - return "BYTE_GRAY"; - case BufferedImage.TYPE_INT_BGR: - return "INT_BGR"; - case BufferedImage.TYPE_INT_RGB: - return "INT_RGB"; - case BufferedImage.TYPE_INT_ARGB: - return "INT_ARGB"; - case BufferedImage.TYPE_INT_ARGB_PRE: - return "INT_ARGB_PRE"; + static String biTypeStr(int biType) { + switch (biType) { + case BufferedImage.TYPE_3BYTE_BGR: + return "3BYTE_BGR"; + case BufferedImage.TYPE_4BYTE_ABGR: + return "4BYTE_ABGR"; + case BufferedImage.TYPE_4BYTE_ABGR_PRE: + return "4BYTE_ABGR_PRE"; + case BufferedImage.TYPE_BYTE_GRAY: + return "BYTE_GRAY"; + case BufferedImage.TYPE_INT_BGR: + return "INT_BGR"; + case BufferedImage.TYPE_INT_RGB: + return "INT_RGB"; + case BufferedImage.TYPE_INT_ARGB: + return "INT_ARGB"; + case BufferedImage.TYPE_INT_ARGB_PRE: + return "INT_ARGB_PRE"; + default: + return "Unknown"; } - return "Unknown"; } - private static void initBuf(byte[] buf, int w, int pitch, int h, int pf, - int flags) throws Exception { + static void initBuf(byte[] buf, int w, int pitch, int h, int pf, int flags) + throws Exception { int roffset = TJ.getRedOffset(pf); int goffset = TJ.getGreenOffset(pf); int boffset = TJ.getBlueOffset(pf); - int aoffset = alphaOffset[pf]; + int aoffset = TJ.getAlphaOffset(pf); int ps = TJ.getPixelSize(pf); int index, row, col, halfway = 16; @@ -210,12 +211,12 @@ public class TJUnitTest { } } - private static void initIntBuf(int[] buf, int w, int pitch, int h, int pf, - int flags) throws Exception { + static void initIntBuf(int[] buf, int w, int pitch, int h, int pf, int flags) + throws Exception { int rshift = TJ.getRedOffset(pf) * 8; int gshift = TJ.getGreenOffset(pf) * 8; int bshift = TJ.getBlueOffset(pf) * 8; - int ashift = alphaOffset[pf] * 8; + int ashift = TJ.getAlphaOffset(pf) * 8; int index, row, col, halfway = 16; Arrays.fill(buf, 0); @@ -242,10 +243,10 @@ public class TJUnitTest { } } - private static void initImg(BufferedImage img, int pf, int flags) - throws Exception { + static void initImg(BufferedImage img, int pf, int flags) throws Exception { WritableRaster wr = img.getRaster(); int imgType = img.getType(); + if (imgType == BufferedImage.TYPE_INT_RGB || imgType == BufferedImage.TYPE_INT_BGR || imgType == BufferedImage.TYPE_INT_ARGB || @@ -265,8 +266,8 @@ public class TJUnitTest { } } - private static void checkVal(int row, int col, int v, String vname, int cv) - throws Exception { + static void checkVal(int row, int col, int v, String vname, int cv) + throws Exception { v = (v < 0) ? v + 256 : v; if (v < cv - 1 || v > cv + 1) { throw new Exception("Comp. " + vname + " at " + row + "," + col + @@ -274,8 +275,8 @@ public class TJUnitTest { } } - private static void checkVal0(int row, int col, int v, String vname) - throws Exception { + static void checkVal0(int row, int col, int v, String vname) + throws Exception { v = (v < 0) ? v + 256 : v; if (v > 1) { throw new Exception("Comp. " + vname + " at " + row + "," + col + @@ -283,8 +284,8 @@ public class TJUnitTest { } } - private static void checkVal255(int row, int col, int v, String vname) - throws Exception { + static void checkVal255(int row, int col, int v, String vname) + throws Exception { v = (v < 0) ? v + 256 : v; if (v < 254) { throw new Exception("Comp. " + vname + " at " + row + "," + col + @@ -292,13 +293,12 @@ public class TJUnitTest { } } - private static int checkBuf(byte[] buf, int w, int pitch, int h, int pf, - int subsamp, TJScalingFactor sf, int flags) - throws Exception { + static int checkBuf(byte[] buf, int w, int pitch, int h, int pf, int subsamp, + TJScalingFactor sf, int flags) throws Exception { int roffset = TJ.getRedOffset(pf); int goffset = TJ.getGreenOffset(pf); int boffset = TJ.getBlueOffset(pf); - int aoffset = alphaOffset[pf]; + int aoffset = TJ.getAlphaOffset(pf); int ps = TJ.getPixelSize(pf); int index, row, col, retval = 1; int halfway = 16 * sf.getNum() / sf.getDenom(); @@ -306,6 +306,9 @@ public class TJUnitTest { try { + if (pf == TJ.PF_GRAY) + roffset = goffset = boffset = 0; + if (pf == TJ.PF_CMYK) { for (row = 0; row < h; row++) { for (col = 0; col < w; col++) { @@ -382,7 +385,7 @@ public class TJUnitTest { checkVal255(row, col, a, "A"); } } - } catch(Exception e) { + } catch (Exception e) { System.out.println("\n" + e.getMessage()); retval = 0; } @@ -416,13 +419,13 @@ public class TJUnitTest { return retval; } - private static int checkIntBuf(int[] buf, int w, int pitch, int h, int pf, - int subsamp, TJScalingFactor sf, int flags) - throws Exception { + static int checkIntBuf(int[] buf, int w, int pitch, int h, int pf, + int subsamp, TJScalingFactor sf, int flags) + throws Exception { int rshift = TJ.getRedOffset(pf) * 8; int gshift = TJ.getGreenOffset(pf) * 8; int bshift = TJ.getBlueOffset(pf) * 8; - int ashift = alphaOffset[pf] * 8; + int ashift = TJ.getAlphaOffset(pf) * 8; int index, row, col, retval = 1; int halfway = 16 * sf.getNum() / sf.getDenom(); int blockSize = 8 * sf.getNum() / sf.getDenom(); @@ -472,7 +475,7 @@ public class TJUnitTest { checkVal255(row, col, a, "A"); } } - } catch(Exception e) { + } catch (Exception e) { System.out.println("\n" + e.getMessage()); retval = 0; } @@ -494,8 +497,8 @@ public class TJUnitTest { return retval; } - private static int checkImg(BufferedImage img, int pf, int subsamp, - TJScalingFactor sf, int flags) throws Exception { + static int checkImg(BufferedImage img, int pf, int subsamp, + TJScalingFactor sf, int flags) throws Exception { WritableRaster wr = img.getRaster(); int imgType = img.getType(); if (imgType == BufferedImage.TYPE_INT_RGB || @@ -519,18 +522,17 @@ public class TJUnitTest { } } - private static int PAD(int v, int p) { + static int pad(int v, int p) { return ((v + (p) - 1) & (~((p) - 1))); } - private static int checkBufYUV(byte[] buf, int size, int w, int h, - int subsamp, TJScalingFactor sf) - throws Exception { + static int checkBufYUV(byte[] buf, int size, int w, int h, int subsamp, + TJScalingFactor sf) throws Exception { int row, col; int hsf = TJ.getMCUWidth(subsamp) / 8, vsf = TJ.getMCUHeight(subsamp) / 8; - int pw = PAD(w, hsf), ph = PAD(h, vsf); + int pw = pad(w, hsf), ph = pad(h, vsf); int cw = pw / hsf, ch = ph / vsf; - int ypitch = PAD(pw, pad), uvpitch = PAD(cw, pad); + int ypitch = pad(pw, pad), uvpitch = pad(cw, pad); int retval = 1; int correctsize = ypitch * ph + (subsamp == TJ.SAMP_GRAY ? 0 : uvpitch * ch * 2); @@ -579,7 +581,7 @@ public class TJUnitTest { } } } - } catch(Exception e) { + } catch (Exception e) { System.out.println("\n" + e.getMessage()); retval = 0; } @@ -616,17 +618,17 @@ public class TJUnitTest { return retval; } - private static void writeJPEG(byte[] jpegBuf, int jpegBufSize, - String filename) throws Exception { + static void writeJPEG(byte[] jpegBuf, int jpegBufSize, String filename) + throws Exception { File file = new File(filename); FileOutputStream fos = new FileOutputStream(file); fos.write(jpegBuf, 0, jpegBufSize); fos.close(); } - private static int compTest(TJCompressor tjc, byte[] dstBuf, int w, - int h, int pf, String baseName, int subsamp, - int jpegQual, int flags) throws Exception { + static int compTest(TJCompressor tjc, byte[] dstBuf, int w, int h, int pf, + String baseName, int subsamp, int jpegQual, int flags) + throws Exception { String tempStr; byte[] srcBuf = null; BufferedImage img = null; @@ -639,9 +641,9 @@ public class TJUnitTest { if (bi) { pf = biTypePF(imgType); pfStr = biTypeStr(imgType); - pfStrLong = pfStr + " (" + pixFormatStr[pf] + ")"; + pfStrLong = pfStr + " (" + PIXFORMATSTR[pf] + ")"; } else { - pfStr = pixFormatStr[pf]; + pfStr = PIXFORMATSTR[pf]; pfStrLong = pfStr; } ps = TJ.getPixelSize(pf); @@ -650,7 +652,7 @@ public class TJUnitTest { img = new BufferedImage(w, h, imgType); initImg(img, pf, flags); tempStr = baseName + "_enc_" + pfStr + "_" + buStr + "_" + - subName[subsamp] + "_Q" + jpegQual + ".png"; + SUBNAME[subsamp] + "_Q" + jpegQual + ".png"; File file = new File(tempStr); ImageIO.write(img, "png", file); tjc.setSourceImage(img, 0, 0, 0, 0); @@ -665,38 +667,37 @@ public class TJUnitTest { tjc.setJPEGQuality(jpegQual); if (doYUV) { System.out.format("%s %s -> YUV %s ... ", pfStrLong, buStrLong, - subNameLong[subsamp]); + SUBNAME_LONG[subsamp]); YUVImage yuvImage = tjc.encodeYUV(pad, flags); if (checkBufYUV(yuvImage.getBuf(), yuvImage.getSize(), w, h, subsamp, - new TJScalingFactor(1, 1)) == 1) + new TJScalingFactor(1, 1)) == 1) System.out.print("Passed.\n"); else { System.out.print("FAILED!\n"); exitStatus = -1; } - System.out.format("YUV %s %s -> JPEG Q%d ... ", subNameLong[subsamp], + System.out.format("YUV %s %s -> JPEG Q%d ... ", SUBNAME_LONG[subsamp], buStrLong, jpegQual); tjc.setSourceImage(yuvImage); } else { System.out.format("%s %s -> %s Q%d ... ", pfStrLong, buStrLong, - subNameLong[subsamp], jpegQual); + SUBNAME_LONG[subsamp], jpegQual); } tjc.compress(dstBuf, flags); size = tjc.getCompressedSize(); tempStr = baseName + "_enc_" + pfStr + "_" + buStr + "_" + - subName[subsamp] + "_Q" + jpegQual + ".jpg"; + SUBNAME[subsamp] + "_Q" + jpegQual + ".jpg"; writeJPEG(dstBuf, size, tempStr); System.out.println("Done.\n Result in " + tempStr); return size; } - private static void decompTest(TJDecompressor tjd, byte[] jpegBuf, - int jpegSize, int w, int h, int pf, - String baseName, int subsamp, int flags, - TJScalingFactor sf) throws Exception { + static void decompTest(TJDecompressor tjd, byte[] jpegBuf, int jpegSize, + int w, int h, int pf, String baseName, int subsamp, + int flags, TJScalingFactor sf) throws Exception { String pfStr, pfStrLong, tempStr; String buStrLong = (flags & TJ.FLAG_BOTTOMUP) != 0 ? "Bottom-Up" : "Top-Down "; @@ -709,9 +710,9 @@ public class TJUnitTest { if (bi) { pf = biTypePF(imgType); pfStr = biTypeStr(imgType); - pfStrLong = pfStr + " (" + pixFormatStr[pf] + ")"; + pfStrLong = pfStr + " (" + PIXFORMATSTR[pf] + ")"; } else { - pfStr = pixFormatStr[pf]; + pfStr = PIXFORMATSTR[pf]; pfStrLong = pfStr; } @@ -728,8 +729,8 @@ public class TJUnitTest { throw new Exception("Scaled size mismatch"); if (doYUV) { - System.out.format("JPEG -> YUV %s ", subNameLong[subsamp]); - if(!sf.isOne()) + System.out.format("JPEG -> YUV %s ", SUBNAME_LONG[subsamp]); + if (!sf.isOne()) System.out.format("%d/%d ... ", sf.getNum(), sf.getDenom()); else System.out.print("... "); YUVImage yuvImage = tjd.decompressToYUV(scaledWidth, pad, scaledHeight, @@ -741,12 +742,12 @@ public class TJUnitTest { System.out.print("FAILED!\n"); exitStatus = -1; } - System.out.format("YUV %s -> %s %s ... ", subNameLong[subsamp], + System.out.format("YUV %s -> %s %s ... ", SUBNAME_LONG[subsamp], pfStrLong, buStrLong); tjd.setSourceImage(yuvImage); } else { System.out.format("JPEG -> %s %s ", pfStrLong, buStrLong); - if(!sf.isOne()) + if (!sf.isOne()) System.out.format("%d/%d ... ", sf.getNum(), sf.getDenom()); else System.out.print("... "); } @@ -758,7 +759,7 @@ public class TJUnitTest { if (bi) { tempStr = baseName + "_dec_" + pfStr + "_" + (((flags & TJ.FLAG_BOTTOMUP) != 0) ? "BU" : "TD") + "_" + - subName[subsamp] + "_" + + SUBNAME[subsamp] + "_" + (double)sf.getNum() / (double)sf.getDenom() + "x" + ".png"; File file = new File(tempStr); ImageIO.write(img, "png", file); @@ -775,10 +776,9 @@ public class TJUnitTest { } } - private static void decompTest(TJDecompressor tjd, byte[] jpegBuf, - int jpegSize, int w, int h, int pf, - String baseName, int subsamp, - int flags) throws Exception { + static void decompTest(TJDecompressor tjd, byte[] jpegBuf, int jpegSize, + int w, int h, int pf, String baseName, int subsamp, + int flags) throws Exception { int i; TJScalingFactor[] sf = TJ.getScalingFactors(); for (i = 0; i < sf.length; i++) { @@ -794,8 +794,8 @@ public class TJUnitTest { } } - private static void doTest(int w, int h, int[] formats, int subsamp, - String baseName) throws Exception { + static void doTest(int w, int h, int[] formats, int subsamp, String baseName) + throws Exception { TJCompressor tjc = null; TJDecompressor tjd = null; int size; @@ -828,7 +828,7 @@ public class TJUnitTest { } } System.out.print("--------------------\n\n"); - } catch(Exception e) { + } catch (Exception e) { if (tjc != null) tjc.close(); if (tjd != null) tjd.close(); throw e; @@ -837,7 +837,7 @@ public class TJUnitTest { if (tjd != null) tjd.close(); } - private static void bufSizeTest() throws Exception { + static void bufSizeTest() throws Exception { int w, h, i, subsamp; byte[] srcBuf, dstBuf = null; YUVImage dstImage = null; @@ -889,7 +889,7 @@ public class TJUnitTest { } } System.out.println("Done. "); - } catch(Exception e) { + } catch (Exception e) { if (tjc != null) tjc.close(); throw e; } @@ -911,46 +911,47 @@ public class TJUnitTest { usage(); } if (doYUV) - _4byteFormats[4] = -1; - doTest(35, 39, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_444, + FORMATS_4BYTE[4] = -1; + doTest(35, 39, bi ? FORMATS_3BYTEBI : FORMATS_3BYTE, TJ.SAMP_444, + testName); + doTest(39, 41, bi ? FORMATS_4BYTEBI : FORMATS_4BYTE, TJ.SAMP_444, testName); - doTest(39, 41, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_444, + doTest(41, 35, bi ? FORMATS_3BYTEBI : FORMATS_3BYTE, TJ.SAMP_422, testName); - doTest(41, 35, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_422, + doTest(35, 39, bi ? FORMATS_4BYTEBI : FORMATS_4BYTE, TJ.SAMP_422, testName); - doTest(35, 39, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_422, + doTest(39, 41, bi ? FORMATS_3BYTEBI : FORMATS_3BYTE, TJ.SAMP_420, testName); - doTest(39, 41, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_420, + doTest(41, 35, bi ? FORMATS_4BYTEBI : FORMATS_4BYTE, TJ.SAMP_420, testName); - doTest(41, 35, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_420, + doTest(35, 39, bi ? FORMATS_3BYTEBI : FORMATS_3BYTE, TJ.SAMP_440, testName); - doTest(35, 39, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_440, + doTest(39, 41, bi ? FORMATS_4BYTEBI : FORMATS_4BYTE, TJ.SAMP_440, testName); - doTest(39, 41, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_440, + doTest(41, 35, bi ? FORMATS_3BYTEBI : FORMATS_3BYTE, TJ.SAMP_411, testName); - doTest(41, 35, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_411, + doTest(35, 39, bi ? FORMATS_4BYTEBI : FORMATS_4BYTE, TJ.SAMP_411, testName); - doTest(35, 39, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_411, + doTest(39, 41, bi ? FORMATS_GRAYBI : FORMATS_GRAY, TJ.SAMP_GRAY, testName); - doTest(39, 41, bi ? onlyGrayBI : onlyGray, TJ.SAMP_GRAY, testName); - doTest(41, 35, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_GRAY, + doTest(41, 35, bi ? FORMATS_3BYTEBI : FORMATS_3BYTE, TJ.SAMP_GRAY, testName); - _4byteFormats[4] = -1; - doTest(35, 39, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_GRAY, + FORMATS_4BYTE[4] = -1; + doTest(35, 39, bi ? FORMATS_4BYTEBI : FORMATS_4BYTE, TJ.SAMP_GRAY, testName); if (!bi) bufSizeTest(); if (doYUV && !bi) { System.out.print("\n--------------------\n\n"); - doTest(48, 48, onlyRGB, TJ.SAMP_444, "javatest_yuv0"); - doTest(48, 48, onlyRGB, TJ.SAMP_422, "javatest_yuv0"); - doTest(48, 48, onlyRGB, TJ.SAMP_420, "javatest_yuv0"); - doTest(48, 48, onlyRGB, TJ.SAMP_440, "javatest_yuv0"); - doTest(48, 48, onlyRGB, TJ.SAMP_411, "javatest_yuv0"); - doTest(48, 48, onlyRGB, TJ.SAMP_GRAY, "javatest_yuv0"); - doTest(48, 48, onlyGray, TJ.SAMP_GRAY, "javatest_yuv0"); + doTest(48, 48, FORMATS_RGB, TJ.SAMP_444, "javatest_yuv0"); + doTest(48, 48, FORMATS_RGB, TJ.SAMP_422, "javatest_yuv0"); + doTest(48, 48, FORMATS_RGB, TJ.SAMP_420, "javatest_yuv0"); + doTest(48, 48, FORMATS_RGB, TJ.SAMP_440, "javatest_yuv0"); + doTest(48, 48, FORMATS_RGB, TJ.SAMP_411, "javatest_yuv0"); + doTest(48, 48, FORMATS_RGB, TJ.SAMP_GRAY, "javatest_yuv0"); + doTest(48, 48, FORMATS_GRAY, TJ.SAMP_GRAY, "javatest_yuv0"); } - } catch(Exception e) { + } catch (Exception e) { e.printStackTrace(); exitStatus = -1; } diff --git a/java/doc/constant-values.html b/java/doc/constant-values.html index ec1b21d..fb33327 100644 --- a/java/doc/constant-values.html +++ b/java/doc/constant-values.html @@ -7,8 +7,12 @@ @@ -115,6 +119,20 @@ 4 + + +public static final int +ERR_FATAL +1 + + + + +public static final int +ERR_WARNING +0 + + public static final int @@ -171,6 +189,20 @@ 128 + + +public static final int +FLAG_PROGRESSIVE +16384 + + + + +public static final int +FLAG_STOPONWARNING +8192 + + public static final int @@ -178,139 +210,146 @@ 5 + + +public static final int +NUMERR +2 + + public static final int NUMPF 12 - + public static final int NUMSAMP 6 - + public static final int PF_ABGR 9 - + public static final int PF_ARGB 10 - + public static final int PF_BGR 1 - + public static final int PF_BGRA 8 - + public static final int PF_BGRX 3 - + public static final int PF_CMYK 11 - + public static final int PF_GRAY 6 - + public static final int PF_RGB 0 - + public static final int PF_RGBA 7 - + public static final int PF_RGBX 2 - + public static final int PF_XBGR 4 - + public static final int PF_XRGB 5 - + public static final int SAMP_411 5 - + public static final int SAMP_420 2 - + public static final int SAMP_422 1 - + public static final int SAMP_440 4 - + public static final int SAMP_444 0 - + public static final int @@ -393,33 +432,47 @@ 2 + + +public static final int +OPT_COPYNONE +64 + + public static final int OPT_CROP 4 - + public static final int OPT_GRAY 8 - + public static final int OPT_NOOUTPUT 16 - + public static final int OPT_PERFECT 1 + + + +public static final int +OPT_PROGRESSIVE +32 + diff --git a/java/doc/deprecated-list.html b/java/doc/deprecated-list.html index e47ffb1..31d4e64 100644 --- a/java/doc/deprecated-list.html +++ b/java/doc/deprecated-list.html @@ -7,8 +7,12 @@ diff --git a/java/doc/help-doc.html b/java/doc/help-doc.html index ce749a9..6645d95 100644 --- a/java/doc/help-doc.html +++ b/java/doc/help-doc.html @@ -7,8 +7,12 @@ diff --git a/java/doc/index-all.html b/java/doc/index-all.html index a02d9c4..366c7ea 100644 --- a/java/doc/index-all.html +++ b/java/doc/index-all.html @@ -7,8 +7,12 @@ @@ -278,6 +282,15 @@
Returns true or false, depending on whether this instance and other have the same numerator and denominator.
+
ERR_FATAL - Static variable in class org.libjpegturbo.turbojpeg.TJ
+
+
The error was fatal and non-recoverable.
+
+
ERR_WARNING - Static variable in class org.libjpegturbo.turbojpeg.TJ
+
+
The error was non-fatal and recoverable, but the image may still be + corrupt.
+
@@ -324,12 +337,27 @@
Deprecated.
+
FLAG_PROGRESSIVE - Static variable in class org.libjpegturbo.turbojpeg.TJ
+
+
Use progressive entropy coding in JPEG images generated by compression and + transform operations.
+
+
FLAG_STOPONWARNING - Static variable in class org.libjpegturbo.turbojpeg.TJ
+
+
Immediately discontinue the current compression/decompression/transform + operation if the underlying codec throws a warning (non-fatal error).
+

G

+
getAlphaOffset(int) - Static method in class org.libjpegturbo.turbojpeg.TJ
+
+
For the given pixel format, returns the number of bytes that the alpha + component is offset from the start of the pixel.
+
getBlueOffset(int) - Static method in class org.libjpegturbo.turbojpeg.TJ
For the given pixel format, returns the number of bytes that the blue @@ -354,6 +382,11 @@
Returns denominator
+
getErrorCode() - Method in exception org.libjpegturbo.turbojpeg.TJException
+
+
Returns a code (one of TJ.ERR_*) indicating the severity of the + last error.
+
getGreenOffset(int) - Static method in class org.libjpegturbo.turbojpeg.TJ
For the given pixel format, returns the number of bytes that the green @@ -516,6 +549,10 @@
The number of JPEG colorspaces
+
NUMERR - Static variable in class org.libjpegturbo.turbojpeg.TJ
+
+
The number of error codes
+
NUMOP - Static variable in class org.libjpegturbo.turbojpeg.TJTransform
The number of lossless transform operations
@@ -571,6 +608,11 @@
Flip (mirror) image vertically.
+
OPT_COPYNONE - Static variable in class org.libjpegturbo.turbojpeg.TJTransform
+
+
This option will prevent TJTransformer.transform() from copying any extra markers (including EXIF + and ICC profile data) from the source image to the output image.
+
OPT_CROP - Static variable in class org.libjpegturbo.turbojpeg.TJTransform
This option will enable lossless cropping.
@@ -590,6 +632,11 @@
This option will cause TJTransformer.transform() to throw an exception if the transform is not perfect.
+
OPT_PROGRESSIVE - Static variable in class org.libjpegturbo.turbojpeg.TJTransform
+
+
This option will enable progressive entropy coding in the output image + generated by this particular transform.
+
OPT_TRIM - Static variable in class org.libjpegturbo.turbojpeg.TJTransform
This option will discard any partial MCU blocks that cannot be @@ -763,8 +810,6 @@
TurboJPEG utility class (cannot be instantiated)
-
TJ() - Constructor for class org.libjpegturbo.turbojpeg.TJ
-
 
TJCompressor - Class in org.libjpegturbo.turbojpeg
TurboJPEG compressor
@@ -829,6 +874,8 @@
 
TJException(String) - Constructor for exception org.libjpegturbo.turbojpeg.TJException
 
+
TJException(String, int) - Constructor for exception org.libjpegturbo.turbojpeg.TJException
+
 
TJException(Throwable) - Constructor for exception org.libjpegturbo.turbojpeg.TJException
 
TJScalingFactor - Class in org.libjpegturbo.turbojpeg
@@ -836,7 +883,9 @@
Fractional scaling factor
TJScalingFactor(int, int) - Constructor for class org.libjpegturbo.turbojpeg.TJScalingFactor
-
 
+
+
Create a TurboJPEG scaling factor instance.
+
TJTransform - Class in org.libjpegturbo.turbojpeg
Lossless transform parameters
diff --git a/java/doc/index.html b/java/doc/index.html index b983957..4e21075 100644 --- a/java/doc/index.html +++ b/java/doc/index.html @@ -4,11 +4,12 @@ Generated Documentation (Untitled) @@ -59,13 +63,13 @@
  • Summary: 
  • Nested | 
  • Field | 
  • -
  • Constr | 
  • +
  • Constr | 
  • Method
  • @@ -145,6 +149,19 @@ extends java.lang.Object static int +ERR_FATAL +
    The error was fatal and non-recoverable.
    + + + +static int +ERR_WARNING +
    The error was non-fatal and recoverable, but the image may still be + corrupt.
    + + + +static int FLAG_ACCURATEDCT
    Use the most accurate DCT/IDCT algorithm available in the underlying codec.
    @@ -197,125 +214,145 @@ extends java.lang.Object static int +FLAG_PROGRESSIVE +
    Use progressive entropy coding in JPEG images generated by compression and + transform operations.
    + + + +static int +FLAG_STOPONWARNING +
    Immediately discontinue the current compression/decompression/transform + operation if the underlying codec throws a warning (non-fatal error).
    + + + +static int NUMCS
    The number of JPEG colorspaces
    static int +NUMERR +
    The number of error codes
    + + + +static int NUMPF
    The number of pixel formats
    - + static int NUMSAMP
    The number of chrominance subsampling options
    - + static int PF_ABGR
    ABGR pixel format.
    - + static int PF_ARGB
    ARGB pixel format.
    - + static int PF_BGR
    BGR pixel format.
    - + static int PF_BGRA
    BGRA pixel format.
    - + static int PF_BGRX
    BGRX pixel format.
    - + static int PF_CMYK
    CMYK pixel format.
    - + static int PF_GRAY
    Grayscale pixel format.
    - + static int PF_RGB
    RGB pixel format.
    - + static int PF_RGBA
    RGBA pixel format.
    - + static int PF_RGBX
    RGBX pixel format.
    - + static int PF_XBGR
    XBGR pixel format.
    - + static int PF_XRGB
    XRGB pixel format.
    - + static int SAMP_411
    4:1:1 chrominance subsampling.
    - + static int SAMP_420
    4:2:0 chrominance subsampling.
    - + static int SAMP_422
    4:2:2 chrominance subsampling.
    - + static int SAMP_440
    4:4:0 chrominance subsampling.
    - + static int SAMP_444
    4:4:4 chrominance subsampling (no chrominance subsampling).
    - + static int SAMP_GRAY
    Grayscale.
    @@ -324,23 +361,6 @@ extends java.lang.Object - -
      -
    • - - -

      Constructor Summary

      - - - - - - - - -
      Constructors 
      Constructor and Description
      TJ() 
      -
    • -
    • @@ -384,53 +404,60 @@ extends java.lang.Object static int +getAlphaOffset(int pixelFormat) +
      For the given pixel format, returns the number of bytes that the alpha + component is offset from the start of the pixel.
      + + + +static int getBlueOffset(int pixelFormat)
      For the given pixel format, returns the number of bytes that the blue component is offset from the start of the pixel.
      - + static int getGreenOffset(int pixelFormat)
      For the given pixel format, returns the number of bytes that the green component is offset from the start of the pixel.
      - + static int getMCUHeight(int subsamp)
      Returns the MCU block height for the given level of chrominance subsampling.
      - + static int getMCUWidth(int subsamp)
      Returns the MCU block width for the given level of chrominance subsampling.
      - + static int getPixelSize(int pixelFormat)
      Returns the pixel size (in bytes) for the given pixel format.
      - + static int getRedOffset(int pixelFormat)
      For the given pixel format, returns the number of bytes that the red component is offset from the start of the pixel.
      - + static TJScalingFactor[] getScalingFactors()
      Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of TurboJPEG supports.
      - + static int planeHeight(int componentID, int height, @@ -438,7 +465,7 @@ extends java.lang.Object
      Returns the plane height of a YUV image plane with the given parameters.
      - + static int planeSizeYUV(int componentID, int width, @@ -449,7 +476,7 @@ extends java.lang.Object plane with the given parameters. - + static int planeWidth(int componentID, int width, @@ -938,7 +965,7 @@ public static final int FLAG_FORCESSE3 -
    @@ -1048,7 +1131,8 @@ public static final int FLAG_FORCESSE3 then the green component will be pixel[TJ.getGreenOffset(TJ.PF_BGRX)].
    Parameters:
    pixelFormat - the pixel format (one of PF_*)
    -
    Returns:
    the green offset for the given pixel format.
    +
    Returns:
    the green offset for the given pixel format, or -1 if the pixel + format does not have a green component.
    @@ -1064,7 +1148,25 @@ public static final int FLAG_FORCESSE3 then the blue component will be pixel[TJ.getBlueOffset(TJ.PF_BGRX)].
    Parameters:
    pixelFormat - the pixel format (one of PF_*)
    -
    Returns:
    the blue offset for the given pixel format.
    +
    Returns:
    the blue offset for the given pixel format, or -1 if the pixel + format does not have a blue component.
    + + +
    + + +
      +
    • +

      getAlphaOffset

      +
      public static int getAlphaOffset(int pixelFormat)
      +
      For the given pixel format, returns the number of bytes that the alpha + component is offset from the start of the pixel. For instance, if a pixel + of format TJ.PF_BGRA is stored in char pixel[], + then the alpha component will be + pixel[TJ.getAlphaOffset(TJ.PF_BGRA)].
      +
      Parameters:
      pixelFormat - the pixel format (one of PF_*)
      +
      Returns:
      the alpha offset for the given pixel format, or -1 if the pixel + format does not have a alpha component.
    @@ -1236,13 +1338,13 @@ public static int bufSizeYUV(int width,
  • Summary: 
  • Nested | 
  • Field | 
  • -
  • Constr | 
  • +
  • Constr | 
  • Method
  • diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html b/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html index 29f12b7..a53f879 100644 --- a/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html +++ b/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html @@ -7,8 +7,12 @@ @@ -575,8 +579,8 @@ public void setSourceImage(byte[] srcImage,

    NOTE: This method has no effect when compressing a JPEG image from a YUV planar source. In that case, the level of chrominance subsampling in - the JPEG image is determined by the source. Further, this method has no - effect when encoding to a pre-allocated YUVImage instance. In + the JPEG image is determined by the source. Furthermore, this method has + no effect when encoding to a pre-allocated YUVImage instance. In that case, the level of chrominance subsampling is determined by the destination.

    Parameters:
    newSubsamp - the level of chrominance subsampling to use in diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html b/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html index 6bd6fd2..412dcd4 100644 --- a/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html +++ b/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html @@ -7,8 +7,12 @@ diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html b/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html index a914de9..b281e32 100644 --- a/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html +++ b/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html @@ -7,8 +7,12 @@ @@ -785,7 +789,11 @@ public void setJPEGImage(byte[] jpegImage, throws TJException
    Decompress the JPEG source image or decode the YUV source image associated with this decompressor instance and output a grayscale, RGB, or CMYK image - to the given destination buffer.
    + to the given destination buffer. +

    + NOTE: The output image is fully recoverable if this method throws a + non-fatal TJException (unless + TJ.FLAG_STOPONWARNING is specified.)

    Parameters:
    dstBuf - buffer that will receive the decompressed/decoded image. If the source image is a JPEG image, then this buffer should normally be pitch * scaledHeight bytes in size, where @@ -895,7 +903,11 @@ public void decompress(byte[] dstBuf, YUVImage instance. This method performs JPEG decompression but leaves out the color conversion step, so a planar YUV image is generated instead of an RGB or grayscale image. This method cannot be - used to decompress JPEG source images with the CMYK or YCCK colorspace. + used to decompress JPEG source images with the CMYK or YCCK colorspace. +

    + NOTE: The YUV planar output image is fully recoverable if this method + throws a non-fatal TJException (unless + TJ.FLAG_STOPONWARNING is specified.)

    Parameters:
    dstImage - YUVImage instance that will receive the YUV planar image. The level of subsampling specified in this YUVImage instance must match that of the JPEG image, and the width and height @@ -1035,7 +1047,11 @@ public byte[] decompressToYUV(int flags) throws TJException
    Decompress the JPEG source image or decode the YUV source image associated with this decompressor instance and output a grayscale, RGB, or CMYK image - to the given destination buffer.
    + to the given destination buffer. +

    + NOTE: The output image is fully recoverable if this method throws a + non-fatal TJException (unless + TJ.FLAG_STOPONWARNING is specified.)

    Parameters:
    dstBuf - buffer that will receive the decompressed/decoded image. If the source image is a JPEG image, then this buffer should normally be stride * scaledHeight pixels in size, where @@ -1092,7 +1108,11 @@ public byte[] decompressToYUV(int flags) throws TJException
    Decompress the JPEG source image or decode the YUV source image associated with this decompressor instance and output a decompressed/decoded image to - the given BufferedImage instance.
    + the given BufferedImage instance. +

    + NOTE: The output image is fully recoverable if this method throws a + non-fatal TJException (unless + TJ.FLAG_STOPONWARNING is specified.)

    Parameters:
    dstImage - a BufferedImage instance that will receive the decompressed/decoded image. If the source image is a JPEG image, then the width and height of the BufferedImage instance must match diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJException.html b/java/doc/org/libjpegturbo/turbojpeg/TJException.html index 6088066..66d73e7 100644 --- a/java/doc/org/libjpegturbo/turbojpeg/TJException.html +++ b/java/doc/org/libjpegturbo/turbojpeg/TJException.html @@ -7,8 +7,12 @@ @@ -60,13 +64,13 @@
  • Nested | 
  • Field | 
  • Constr | 
  • -
  • Method
  • +
  • Method
  • @@ -138,10 +142,14 @@ extends java.io.IOException TJException(java.lang.String message)  +TJException(java.lang.String message, + int code)  + + TJException(java.lang.String message, java.lang.Throwable cause)  - + TJException(java.lang.Throwable cause)  @@ -153,6 +161,20 @@ extends java.io.IOException

    Method Summary

    + + + + + + + + + + +
    Methods 
    Modifier and TypeMethod and Description
    intgetErrorCode() +
    Returns a code (one of TJ.ERR_*) indicating the severity of the + last error.
    +
    + + + +
      +
    • +

      TJException

      +
      public TJException(java.lang.String message,
      +           int code)
      +
    • +
    @@ -220,6 +252,27 @@ extends java.io.IOException + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getErrorCode

        +
        public int getErrorCode()
        +
        Returns a code (one of TJ.ERR_*) indicating the severity of the + last error.
        +
        Returns:
        a code (one of TJ.ERR_*) indicating the severity of the + last error.
        +
      • +
      +
    • +
    @@ -270,13 +323,13 @@ extends java.io.IOException
  • Nested | 
  • Field | 
  • Constr | 
  • -
  • Method
  • +
  • Method
  • diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJScalingFactor.html b/java/doc/org/libjpegturbo/turbojpeg/TJScalingFactor.html index 35d6882..4006bac 100644 --- a/java/doc/org/libjpegturbo/turbojpeg/TJScalingFactor.html +++ b/java/doc/org/libjpegturbo/turbojpeg/TJScalingFactor.html @@ -7,8 +7,12 @@ @@ -114,7 +118,9 @@ extends java.lang.Object TJScalingFactor(int num, - int denom)  + int denom) +
    Create a TurboJPEG scaling factor instance.
    + @@ -193,6 +199,8 @@ extends java.lang.Object

    TJScalingFactor

    public TJScalingFactor(int num,
                    int denom)
    +
    Create a TurboJPEG scaling factor instance.
    +
    Parameters:
    num - numerator
    denom - denominator
    @@ -235,7 +243,8 @@ extends java.lang.Object
    Returns the scaled value of dimension. This function performs the integer equivalent of ceil(dimension * scalingFactor).
    -
    Returns:
    the scaled value of dimension.
    +
    Parameters:
    dimension - width or height to multiply by this scaling factor
    +
    Returns:
    the scaled value of dimension.
    @@ -247,7 +256,8 @@ extends java.lang.Object
    public boolean equals(TJScalingFactor other)
    Returns true or false, depending on whether this instance and other have the same numerator and denominator.
    -
    Returns:
    true or false, depending on whether this instance and +
    Parameters:
    other - the scaling factor against which to compare this one
    +
    Returns:
    true or false, depending on whether this instance and other have the same numerator and denominator.
    diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html b/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html index cf65bd2..5f22691 100644 --- a/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html +++ b/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html @@ -7,8 +7,12 @@ @@ -217,31 +221,45 @@ extends java.awt.Rectangle static int +OPT_COPYNONE +
    This option will prevent TJTransformer.transform() from copying any extra markers (including EXIF + and ICC profile data) from the source image to the output image.
    + + + +static int OPT_CROP
    This option will enable lossless cropping.
    - + static int OPT_GRAY
    This option will discard the color data in the input image and produce a grayscale output image.
    - + static int OPT_NOOUTPUT
    This option will prevent TJTransformer.transform() from outputting a JPEG image for this particular transform.
    - + static int OPT_PERFECT
    This option will cause TJTransformer.transform() to throw an exception if the transform is not perfect.
    + +static int +OPT_PROGRESSIVE +
    This option will enable progressive entropy coding in the output image + generated by this particular transform.
    + + static int OPT_TRIM @@ -548,6 +566,33 @@ extends java.awt.Rectangle
    See Also:
    Constant Field Values
    + + + +
      +
    • +

      OPT_PROGRESSIVE

      +
      public static final int OPT_PROGRESSIVE
      +
      This option will enable progressive entropy coding in the output image + generated by this particular transform. Progressive entropy coding will + generally improve compression relative to baseline entropy coding (the + default), but it will reduce compression and decompression performance + considerably.
      +
      See Also:
      Constant Field Values
      +
    • +
    + + + +
      +
    • +

      OPT_COPYNONE

      +
      public static final int OPT_COPYNONE
      +
      This option will prevent TJTransformer.transform() from copying any extra markers (including EXIF + and ICC profile data) from the source image to the output image.
      +
      See Also:
      Constant Field Values
      +
    • +
    diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html b/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html index 36cbdb1..a30fe30 100644 --- a/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html +++ b/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html @@ -7,8 +7,12 @@ diff --git a/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html b/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html index b2be0a0..d4485ed 100644 --- a/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html +++ b/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html @@ -7,8 +7,12 @@ diff --git a/java/doc/org/libjpegturbo/turbojpeg/package-summary.html b/java/doc/org/libjpegturbo/turbojpeg/package-summary.html index f94656e..dedcce5 100644 --- a/java/doc/org/libjpegturbo/turbojpeg/package-summary.html +++ b/java/doc/org/libjpegturbo/turbojpeg/package-summary.html @@ -7,8 +7,12 @@ diff --git a/java/doc/org/libjpegturbo/turbojpeg/package-tree.html b/java/doc/org/libjpegturbo/turbojpeg/package-tree.html index 02a5cde..5f0f8c3 100644 --- a/java/doc/org/libjpegturbo/turbojpeg/package-tree.html +++ b/java/doc/org/libjpegturbo/turbojpeg/package-tree.html @@ -7,8 +7,12 @@ diff --git a/java/doc/overview-tree.html b/java/doc/overview-tree.html index 2ae76c6..b659995 100644 --- a/java/doc/overview-tree.html +++ b/java/doc/overview-tree.html @@ -7,8 +7,12 @@ diff --git a/java/doc/script.js b/java/doc/script.js new file mode 100644 index 0000000..b346356 --- /dev/null +++ b/java/doc/script.js @@ -0,0 +1,30 @@ +function show(type) +{ + count = 0; + for (var key in methods) { + var row = document.getElementById(key); + if ((methods[key] & type) != 0) { + row.style.display = ''; + row.className = (count++ % 2) ? rowColor : altColor; + } + else + row.style.display = 'none'; + } + updateTabs(type); +} + +function updateTabs(type) +{ + for (var value in tabs) { + var sNode = document.getElementById(tabs[value][0]); + var spanNode = sNode.firstChild; + if (value == type) { + sNode.className = activeTableTab; + spanNode.innerHTML = tabs[value][1]; + } + else { + sNode.className = tableTab; + spanNode.innerHTML = "" + tabs[value][1] + ""; + } + } +} diff --git a/java/doc/serialized-form.html b/java/doc/serialized-form.html index 846cabc..45bbc86 100644 --- a/java/doc/serialized-form.html +++ b/java/doc/serialized-form.html @@ -7,8 +7,12 @@ @@ -74,6 +78,19 @@
    serialVersionUID:
    1L
    +
      +
    • + + +

      Serialized Fields

      +
        +
      • +

        errorCode

        +
        int errorCode
        +
      • +
      +
    • +
  • diff --git a/java/org/libjpegturbo/turbojpeg/TJ.java b/java/org/libjpegturbo/turbojpeg/TJ.java index 02d14c0..fbb49df 100644 --- a/java/org/libjpegturbo/turbojpeg/TJ.java +++ b/java/org/libjpegturbo/turbojpeg/TJ.java @@ -1,5 +1,5 @@ /* - * Copyright (C)2011-2013 D. R. Commander. All Rights Reserved. + * Copyright (C)2011-2013, 2017-2018 D. R. Commander. All Rights Reserved. * Copyright (C)2015 Viktor Szathmáry. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,6 +34,7 @@ package org.libjpegturbo.turbojpeg; */ public final class TJ { + private TJ() {} /** * The number of chrominance subsampling options @@ -90,10 +91,10 @@ public final class TJ { */ public static int getMCUWidth(int subsamp) { checkSubsampling(subsamp); - return mcuWidth[subsamp]; + return MCU_WIDTH[subsamp]; } - private static final int[] mcuWidth = { + private static final int[] MCU_WIDTH = { 8, 16, 16, 8, 8, 32 }; @@ -110,10 +111,10 @@ public final class TJ { */ public static int getMCUHeight(int subsamp) { checkSubsampling(subsamp); - return mcuHeight[subsamp]; + return MCU_HEIGHT[subsamp]; } - private static final int[] mcuHeight = { + private static final int[] MCU_HEIGHT = { 8, 8, 16, 8, 16, 8 }; @@ -218,10 +219,10 @@ public final class TJ { */ public static int getPixelSize(int pixelFormat) { checkPixelFormat(pixelFormat); - return pixelSize[pixelFormat]; + return PIXEL_SIZE[pixelFormat]; } - private static final int[] pixelSize = { + private static final int[] PIXEL_SIZE = { 3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4 }; @@ -235,15 +236,16 @@ public final class TJ { * * @param pixelFormat the pixel format (one of PF_*) * - * @return the red offset for the given pixel format. + * @return the red offset for the given pixel format, or -1 if the pixel + * format does not have a red component. */ public static int getRedOffset(int pixelFormat) { checkPixelFormat(pixelFormat); - return redOffset[pixelFormat]; + return RED_OFFSET[pixelFormat]; } - private static final int[] redOffset = { - 0, 2, 0, 2, 3, 1, 0, 0, 2, 3, 1, -1 + private static final int[] RED_OFFSET = { + 0, 2, 0, 2, 3, 1, -1, 0, 2, 3, 1, -1 }; @@ -256,15 +258,16 @@ public final class TJ { * * @param pixelFormat the pixel format (one of PF_*) * - * @return the green offset for the given pixel format. + * @return the green offset for the given pixel format, or -1 if the pixel + * format does not have a green component. */ public static int getGreenOffset(int pixelFormat) { checkPixelFormat(pixelFormat); - return greenOffset[pixelFormat]; + return GREEN_OFFSET[pixelFormat]; } - private static final int[] greenOffset = { - 1, 1, 1, 1, 2, 2, 0, 1, 1, 2, 2, -1 + private static final int[] GREEN_OFFSET = { + 1, 1, 1, 1, 2, 2, -1, 1, 1, 2, 2, -1 }; @@ -277,15 +280,38 @@ public final class TJ { * * @param pixelFormat the pixel format (one of PF_*) * - * @return the blue offset for the given pixel format. + * @return the blue offset for the given pixel format, or -1 if the pixel + * format does not have a blue component. */ public static int getBlueOffset(int pixelFormat) { checkPixelFormat(pixelFormat); - return blueOffset[pixelFormat]; + return BLUE_OFFSET[pixelFormat]; } - private static final int[] blueOffset = { - 2, 0, 2, 0, 1, 3, 0, 2, 0, 1, 3, -1 + private static final int[] BLUE_OFFSET = { + 2, 0, 2, 0, 1, 3, -1, 2, 0, 1, 3, -1 + }; + + + /** + * For the given pixel format, returns the number of bytes that the alpha + * component is offset from the start of the pixel. For instance, if a pixel + * of format TJ.PF_BGRA is stored in char pixel[], + * then the alpha component will be + * pixel[TJ.getAlphaOffset(TJ.PF_BGRA)]. + * + * @param pixelFormat the pixel format (one of PF_*) + * + * @return the alpha offset for the given pixel format, or -1 if the pixel + * format does not have a alpha component. + */ + public static int getAlphaOffset(int pixelFormat) { + checkPixelFormat(pixelFormat); + return ALPHA_OFFSET[pixelFormat]; + } + + private static final int[] ALPHA_OFFSET = { + -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1 }; @@ -316,6 +342,7 @@ public final class TJ { * can be compressed from and decompressed to any of the extended RGB pixel * formats or grayscale, or they can be decompressed to YUV planar images. */ + @SuppressWarnings("checkstyle:ConstantName") public static final int CS_YCbCr = 1; /** * Grayscale colorspace. The JPEG image retains only the luminance data (Y @@ -348,16 +375,20 @@ public final class TJ { * The uncompressed source/destination image is stored in bottom-up (Windows, * OpenGL) order, not top-down (X11) order. */ - public static final int FLAG_BOTTOMUP = 2; + public static final int FLAG_BOTTOMUP = 2; + @SuppressWarnings("checkstyle:JavadocVariable") @Deprecated - public static final int FLAG_FORCEMMX = 8; + public static final int FLAG_FORCEMMX = 8; + @SuppressWarnings("checkstyle:JavadocVariable") @Deprecated - public static final int FLAG_FORCESSE = 16; + public static final int FLAG_FORCESSE = 16; + @SuppressWarnings("checkstyle:JavadocVariable") @Deprecated - public static final int FLAG_FORCESSE2 = 32; + public static final int FLAG_FORCESSE2 = 32; + @SuppressWarnings("checkstyle:JavadocVariable") @Deprecated - public static final int FLAG_FORCESSE3 = 128; + public static final int FLAG_FORCESSE3 = 128; /** * When decompressing an image that was compressed using chrominance @@ -366,7 +397,7 @@ public final class TJ { * creates a smooth transition between neighboring chrominance components in * order to reduce upsampling artifacts in the decompressed image. */ - public static final int FLAG_FASTUPSAMPLE = 256; + public static final int FLAG_FASTUPSAMPLE = 256; /** * Use the fastest DCT/IDCT algorithm available in the underlying codec. The * default if this flag is not specified is implementation-specific. For @@ -375,7 +406,7 @@ public final class TJ { * only a very slight effect on accuracy, but it uses the accurate algorithm * when decompressing, because this has been shown to have a larger effect. */ - public static final int FLAG_FASTDCT = 2048; + public static final int FLAG_FASTDCT = 2048; /** * Use the most accurate DCT/IDCT algorithm available in the underlying * codec. The default if this flag is not specified is @@ -385,7 +416,46 @@ public final class TJ { * but it uses the accurate algorithm when decompressing, because this has * been shown to have a larger effect. */ - public static final int FLAG_ACCURATEDCT = 4096; + public static final int FLAG_ACCURATEDCT = 4096; + /** + * Immediately discontinue the current compression/decompression/transform + * operation if the underlying codec throws a warning (non-fatal error). The + * default behavior is to allow the operation to complete unless a fatal + * error is encountered. + *

    + * NOTE: due to the design of the TurboJPEG Java API, only certain methods + * (specifically, {@link TJDecompressor TJDecompressor.decompress*()} methods + * with a void return type) will complete and leave the output image in a + * fully recoverable state after a non-fatal error occurs. + */ + public static final int FLAG_STOPONWARNING = 8192; + /** + * Use progressive entropy coding in JPEG images generated by compression and + * transform operations. Progressive entropy coding will generally improve + * compression relative to baseline entropy coding (the default), but it will + * reduce compression and decompression performance considerably. + */ + public static final int FLAG_PROGRESSIVE = 16384; + + + /** + * The number of error codes + */ + public static final int NUMERR = 2; + /** + * The error was non-fatal and recoverable, but the image may still be + * corrupt. + *

    + * NOTE: due to the design of the TurboJPEG Java API, only certain methods + * (specifically, {@link TJDecompressor TJDecompressor.decompress*()} methods + * with a void return type) will complete and leave the output image in a + * fully recoverable state after a non-fatal error occurs. + */ + public static final int ERR_WARNING = 0; + /** + * The error was fatal and non-recoverable. + */ + public static final int ERR_FATAL = 1; /** @@ -427,6 +497,7 @@ public final class TJ { /** * @deprecated Use {@link #bufSizeYUV(int, int, int, int)} instead. */ + @SuppressWarnings("checkstyle:JavadocMethod") @Deprecated public static native int bufSizeYUV(int width, int height, int subsamp); diff --git a/java/org/libjpegturbo/turbojpeg/TJCompressor.java b/java/org/libjpegturbo/turbojpeg/TJCompressor.java index 2ff8e4d..74e5db9 100644 --- a/java/org/libjpegturbo/turbojpeg/TJCompressor.java +++ b/java/org/libjpegturbo/turbojpeg/TJCompressor.java @@ -1,5 +1,5 @@ /* - * Copyright (C)2011-2015 D. R. Commander. All Rights Reserved. + * Copyright (C)2011-2015, 2018 D. R. Commander. All Rights Reserved. * Copyright (C)2015 Viktor Szathmáry. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without @@ -77,6 +77,7 @@ public class TJCompressor implements Closeable { * @deprecated Use * {@link #TJCompressor(byte[], int, int, int, int, int, int)} instead. */ + @SuppressWarnings("checkstyle:JavadocMethod") @Deprecated public TJCompressor(byte[] srcImage, int width, int pitch, int height, int pixelFormat) throws TJException { @@ -164,6 +165,7 @@ public class TJCompressor implements Closeable { * @deprecated Use * {@link #setSourceImage(byte[], int, int, int, int, int, int)} instead. */ + @SuppressWarnings("checkstyle:JavadocMethod") @Deprecated public void setSourceImage(byte[] srcImage, int width, int pitch, int height, int pixelFormat) throws TJException { @@ -199,7 +201,7 @@ public class TJCompressor implements Closeable { throw new IllegalArgumentException("Invalid argument in setSourceImage()"); srcX = x; srcY = y; - srcWidth = (width == 0) ? srcImage.getWidth(): width; + srcWidth = (width == 0) ? srcImage.getWidth() : width; srcHeight = (height == 0) ? srcImage.getHeight() : height; if (x + width > srcImage.getWidth() || y + height > srcImage.getHeight()) throw new IllegalArgumentException("Compression region exceeds the bounds of the source image"); @@ -208,30 +210,30 @@ public class TJCompressor implements Closeable { boolean intPixels = false; if (byteOrder == null) byteOrder = ByteOrder.nativeOrder(); - switch(srcImage.getType()) { - case BufferedImage.TYPE_3BYTE_BGR: - pixelFormat = TJ.PF_BGR; break; - case BufferedImage.TYPE_4BYTE_ABGR: - case BufferedImage.TYPE_4BYTE_ABGR_PRE: - pixelFormat = TJ.PF_XBGR; break; - case BufferedImage.TYPE_BYTE_GRAY: - pixelFormat = TJ.PF_GRAY; break; - case BufferedImage.TYPE_INT_BGR: - if (byteOrder == ByteOrder.BIG_ENDIAN) - pixelFormat = TJ.PF_XBGR; - else - pixelFormat = TJ.PF_RGBX; - intPixels = true; break; - case BufferedImage.TYPE_INT_RGB: - case BufferedImage.TYPE_INT_ARGB: - case BufferedImage.TYPE_INT_ARGB_PRE: - if (byteOrder == ByteOrder.BIG_ENDIAN) - pixelFormat = TJ.PF_XRGB; - else - pixelFormat = TJ.PF_BGRX; - intPixels = true; break; - default: - throw new IllegalArgumentException("Unsupported BufferedImage format"); + switch (srcImage.getType()) { + case BufferedImage.TYPE_3BYTE_BGR: + pixelFormat = TJ.PF_BGR; break; + case BufferedImage.TYPE_4BYTE_ABGR: + case BufferedImage.TYPE_4BYTE_ABGR_PRE: + pixelFormat = TJ.PF_XBGR; break; + case BufferedImage.TYPE_BYTE_GRAY: + pixelFormat = TJ.PF_GRAY; break; + case BufferedImage.TYPE_INT_BGR: + if (byteOrder == ByteOrder.BIG_ENDIAN) + pixelFormat = TJ.PF_XBGR; + else + pixelFormat = TJ.PF_RGBX; + intPixels = true; break; + case BufferedImage.TYPE_INT_RGB: + case BufferedImage.TYPE_INT_ARGB: + case BufferedImage.TYPE_INT_ARGB_PRE: + if (byteOrder == ByteOrder.BIG_ENDIAN) + pixelFormat = TJ.PF_XRGB; + else + pixelFormat = TJ.PF_BGRX; + intPixels = true; break; + default: + throw new IllegalArgumentException("Unsupported BufferedImage format"); } srcPixelFormat = pixelFormat; @@ -285,8 +287,8 @@ public class TJCompressor implements Closeable { *

    * NOTE: This method has no effect when compressing a JPEG image from a YUV * planar source. In that case, the level of chrominance subsampling in - * the JPEG image is determined by the source. Further, this method has no - * effect when encoding to a pre-allocated {@link YUVImage} instance. In + * the JPEG image is determined by the source. Furthermore, this method has + * no effect when encoding to a pre-allocated {@link YUVImage} instance. In * that case, the level of chrominance subsampling is determined by the * destination. * @@ -386,6 +388,7 @@ public class TJCompressor implements Closeable { * {@link #setSourceImage(BufferedImage, int, int, int, int)} and * {@link #compress(byte[], int)} instead. */ + @SuppressWarnings("checkstyle:JavadocMethod") @Deprecated public void compress(BufferedImage srcImage, byte[] dstBuf, int flags) throws TJException { @@ -398,6 +401,7 @@ public class TJCompressor implements Closeable { * {@link #setSourceImage(BufferedImage, int, int, int, int)} and * {@link #compress(int)} instead. */ + @SuppressWarnings("checkstyle:JavadocMethod") @Deprecated public byte[] compress(BufferedImage srcImage, int flags) throws TJException { @@ -445,14 +449,16 @@ public class TJCompressor implements Closeable { /** * @deprecated Use {@link #encodeYUV(YUVImage, int)} instead. */ + @SuppressWarnings("checkstyle:JavadocMethod") @Deprecated public void encodeYUV(byte[] dstBuf, int flags) throws TJException { - if(dstBuf == null) + if (dstBuf == null) throw new IllegalArgumentException("Invalid argument in encodeYUV()"); checkSourceImage(); checkSubsampling(); - YUVImage yuvImage = new YUVImage(dstBuf, srcWidth, 4, srcHeight, subsamp); - encodeYUV(yuvImage, flags); + YUVImage dstYUVImage = new YUVImage(dstBuf, srcWidth, 4, srcHeight, + subsamp); + encodeYUV(dstYUVImage, flags); } /** @@ -475,11 +481,11 @@ public class TJCompressor implements Closeable { public YUVImage encodeYUV(int pad, int flags) throws TJException { checkSourceImage(); checkSubsampling(); - if(pad < 1 || ((pad & (pad - 1)) != 0)) + if (pad < 1 || ((pad & (pad - 1)) != 0)) throw new IllegalStateException("Invalid argument in encodeYUV()"); - YUVImage yuvImage = new YUVImage(srcWidth, pad, srcHeight, subsamp); - encodeYUV(yuvImage, flags); - return yuvImage; + YUVImage dstYUVImage = new YUVImage(srcWidth, pad, srcHeight, subsamp); + encodeYUV(dstYUVImage, flags); + return dstYUVImage; } /** @@ -506,21 +512,22 @@ public class TJCompressor implements Closeable { public YUVImage encodeYUV(int[] strides, int flags) throws TJException { checkSourceImage(); checkSubsampling(); - YUVImage yuvImage = new YUVImage(srcWidth, strides, srcHeight, subsamp); - encodeYUV(yuvImage, flags); - return yuvImage; + YUVImage dstYUVImage = new YUVImage(srcWidth, strides, srcHeight, subsamp); + encodeYUV(dstYUVImage, flags); + return dstYUVImage; } /** * @deprecated Use {@link #encodeYUV(int, int)} instead. */ + @SuppressWarnings("checkstyle:JavadocMethod") @Deprecated public byte[] encodeYUV(int flags) throws TJException { checkSourceImage(); checkSubsampling(); - YUVImage yuvImage = new YUVImage(srcWidth, 4, srcHeight, subsamp); - encodeYUV(yuvImage, flags); - return yuvImage.getBuf(); + YUVImage dstYUVImage = new YUVImage(srcWidth, 4, srcHeight, subsamp); + encodeYUV(dstYUVImage, flags); + return dstYUVImage.getBuf(); } /** @@ -528,6 +535,7 @@ public class TJCompressor implements Closeable { * {@link #setSourceImage(BufferedImage, int, int, int, int)} and * {@link #encodeYUV(byte[], int)} instead. */ + @SuppressWarnings("checkstyle:JavadocMethod") @Deprecated public void encodeYUV(BufferedImage srcImage, byte[] dstBuf, int flags) throws TJException { @@ -540,6 +548,7 @@ public class TJCompressor implements Closeable { * {@link #setSourceImage(BufferedImage, int, int, int, int)} and * {@link #encodeYUV(int, int)} instead. */ + @SuppressWarnings("checkstyle:JavadocMethod") @Deprecated public byte[] encodeYUV(BufferedImage srcImage, int flags) throws TJException { @@ -567,11 +576,12 @@ public class TJCompressor implements Closeable { destroy(); } + @SuppressWarnings("checkstyle:DesignForExtension") @Override protected void finalize() throws Throwable { try { close(); - } catch(TJException e) { + } catch (TJException e) { } finally { super.finalize(); } @@ -582,44 +592,53 @@ public class TJCompressor implements Closeable { private native void destroy() throws TJException; // JPEG size in bytes is returned + @SuppressWarnings("checkstyle:HiddenField") @Deprecated private native int compress(byte[] srcBuf, int width, int pitch, - int height, int pixelFormat, byte[] dstBuf, int jpegSubsamp, int jpegQual, + int height, int pixelFormat, byte[] jpegBuf, int jpegSubsamp, int jpegQual, int flags) throws TJException; + @SuppressWarnings("checkstyle:HiddenField") private native int compress(byte[] srcBuf, int x, int y, int width, - int pitch, int height, int pixelFormat, byte[] dstBuf, int jpegSubsamp, + int pitch, int height, int pixelFormat, byte[] jpegBuf, int jpegSubsamp, int jpegQual, int flags) throws TJException; + @SuppressWarnings("checkstyle:HiddenField") @Deprecated private native int compress(int[] srcBuf, int width, int stride, - int height, int pixelFormat, byte[] dstBuf, int jpegSubsamp, int jpegQual, + int height, int pixelFormat, byte[] jpegBuf, int jpegSubsamp, int jpegQual, int flags) throws TJException; + @SuppressWarnings("checkstyle:HiddenField") private native int compress(int[] srcBuf, int x, int y, int width, - int stride, int height, int pixelFormat, byte[] dstBuf, int jpegSubsamp, + int stride, int height, int pixelFormat, byte[] jpegBuf, int jpegSubsamp, int jpegQual, int flags) throws TJException; + @SuppressWarnings("checkstyle:HiddenField") private native int compressFromYUV(byte[][] srcPlanes, int[] srcOffsets, - int width, int[] srcStrides, int height, int subsamp, byte[] dstBuf, + int width, int[] srcStrides, int height, int subsamp, byte[] jpegBuf, int jpegQual, int flags) throws TJException; + @SuppressWarnings("checkstyle:HiddenField") @Deprecated private native void encodeYUV(byte[] srcBuf, int width, int pitch, int height, int pixelFormat, byte[] dstBuf, int subsamp, int flags) throws TJException; + @SuppressWarnings("checkstyle:HiddenField") private native void encodeYUV(byte[] srcBuf, int x, int y, int width, int pitch, int height, int pixelFormat, byte[][] dstPlanes, int[] dstOffsets, int[] dstStrides, int subsamp, int flags) throws TJException; + @SuppressWarnings("checkstyle:HiddenField") @Deprecated private native void encodeYUV(int[] srcBuf, int width, int stride, int height, int pixelFormat, byte[] dstBuf, int subsamp, int flags) throws TJException; + @SuppressWarnings("checkstyle:HiddenField") private native void encodeYUV(int[] srcBuf, int x, int y, int width, int srcStride, int height, int pixelFormat, byte[][] dstPlanes, int[] dstOffsets, int[] dstStrides, int subsamp, int flags) diff --git a/java/org/libjpegturbo/turbojpeg/TJDecompressor.java b/java/org/libjpegturbo/turbojpeg/TJDecompressor.java index bd0e694..cba9ff0 100644 --- a/java/org/libjpegturbo/turbojpeg/TJDecompressor.java +++ b/java/org/libjpegturbo/turbojpeg/TJDecompressor.java @@ -1,5 +1,5 @@ /* - * Copyright (C)2011-2015 D. R. Commander. All Rights Reserved. + * Copyright (C)2011-2015, 2018 D. R. Commander. All Rights Reserved. * Copyright (C)2015 Viktor Szathmáry. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without @@ -82,6 +82,7 @@ public class TJDecompressor implements Closeable { * @param yuvImage {@link YUVImage} instance containing a YUV planar * image to be decoded. This image is not modified. */ + @SuppressWarnings("checkstyle:HiddenField") public TJDecompressor(YUVImage yuvImage) throws TJException { init(); setSourceImage(yuvImage); @@ -109,6 +110,7 @@ public class TJDecompressor implements Closeable { /** * @deprecated Use {@link #setSourceImage(byte[], int)} instead. */ + @SuppressWarnings("checkstyle:JavadocMethod") @Deprecated public void setJPEGImage(byte[] jpegImage, int imageSize) throws TJException { @@ -308,6 +310,10 @@ public class TJDecompressor implements Closeable { * Decompress the JPEG source image or decode the YUV source image associated * with this decompressor instance and output a grayscale, RGB, or CMYK image * to the given destination buffer. + *

    + * NOTE: The output image is fully recoverable if this method throws a + * non-fatal {@link TJException} (unless + * {@link TJ#FLAG_STOPONWARNING TJ.FLAG_STOPONWARNING} is specified.) * * @param dstBuf buffer that will receive the decompressed/decoded image. * If the source image is a JPEG image, then this buffer should normally be @@ -396,6 +402,7 @@ public class TJDecompressor implements Closeable { * @deprecated Use * {@link #decompress(byte[], int, int, int, int, int, int, int)} instead. */ + @SuppressWarnings("checkstyle:JavadocMethod") @Deprecated public void decompress(byte[] dstBuf, int desiredWidth, int pitch, int desiredHeight, int pixelFormat, int flags) @@ -451,6 +458,10 @@ public class TJDecompressor implements Closeable { * but leaves out the color conversion step, so a planar YUV image is * generated instead of an RGB or grayscale image. This method cannot be * used to decompress JPEG source images with the CMYK or YCCK colorspace. + *

    + * NOTE: The YUV planar output image is fully recoverable if this method + * throws a non-fatal {@link TJException} (unless + * {@link TJ#FLAG_STOPONWARNING TJ.FLAG_STOPONWARNING} is specified.) * * @param dstImage {@link YUVImage} instance that will receive the YUV planar * image. The level of subsampling specified in this YUVImage @@ -486,11 +497,12 @@ public class TJDecompressor implements Closeable { /** * @deprecated Use {@link #decompressToYUV(YUVImage, int)} instead. */ + @SuppressWarnings("checkstyle:JavadocMethod") @Deprecated public void decompressToYUV(byte[] dstBuf, int flags) throws TJException { - YUVImage dstImage = new YUVImage(dstBuf, jpegWidth, 4, jpegHeight, - jpegSubsamp); - decompressToYUV(dstImage, flags); + YUVImage dstYUVImage = new YUVImage(dstBuf, jpegWidth, 4, jpegHeight, + jpegSubsamp); + decompressToYUV(dstYUVImage, flags); } /** @@ -545,10 +557,10 @@ public class TJDecompressor implements Closeable { int scaledWidth = getScaledWidth(desiredWidth, desiredHeight); int scaledHeight = getScaledHeight(desiredWidth, desiredHeight); - YUVImage yuvImage = new YUVImage(scaledWidth, null, scaledHeight, - jpegSubsamp); - decompressToYUV(yuvImage, flags); - return yuvImage; + YUVImage dstYUVImage = new YUVImage(scaledWidth, null, scaledHeight, + jpegSubsamp); + decompressToYUV(dstYUVImage, flags); + return dstYUVImage; } /** @@ -598,26 +610,31 @@ public class TJDecompressor implements Closeable { int scaledWidth = getScaledWidth(desiredWidth, desiredHeight); int scaledHeight = getScaledHeight(desiredWidth, desiredHeight); - YUVImage yuvImage = new YUVImage(scaledWidth, pad, scaledHeight, - jpegSubsamp); - decompressToYUV(yuvImage, flags); - return yuvImage; + YUVImage dstYUVImage = new YUVImage(scaledWidth, pad, scaledHeight, + jpegSubsamp); + decompressToYUV(dstYUVImage, flags); + return dstYUVImage; } /** * @deprecated Use {@link #decompressToYUV(int, int, int, int)} instead. */ + @SuppressWarnings("checkstyle:JavadocMethod") @Deprecated public byte[] decompressToYUV(int flags) throws TJException { - YUVImage dstImage = new YUVImage(jpegWidth, 4, jpegHeight, jpegSubsamp); - decompressToYUV(dstImage, flags); - return dstImage.getBuf(); + YUVImage dstYUVImage = new YUVImage(jpegWidth, 4, jpegHeight, jpegSubsamp); + decompressToYUV(dstYUVImage, flags); + return dstYUVImage.getBuf(); } /** * Decompress the JPEG source image or decode the YUV source image associated * with this decompressor instance and output a grayscale, RGB, or CMYK image * to the given destination buffer. + *

    + * NOTE: The output image is fully recoverable if this method throws a + * non-fatal {@link TJException} (unless + * {@link TJ#FLAG_STOPONWARNING TJ.FLAG_STOPONWARNING} is specified.) * * @param dstBuf buffer that will receive the decompressed/decoded image. * If the source image is a JPEG image, then this buffer should normally be @@ -699,6 +716,10 @@ public class TJDecompressor implements Closeable { * Decompress the JPEG source image or decode the YUV source image associated * with this decompressor instance and output a decompressed/decoded image to * the given BufferedImage instance. + *

    + * NOTE: The output image is fully recoverable if this method throws a + * non-fatal {@link TJException} (unless + * {@link TJ#FLAG_STOPONWARNING TJ.FLAG_STOPONWARNING} is specified.) * * @param dstImage a BufferedImage instance that will receive * the decompressed/decoded image. If the source image is a JPEG image, then @@ -734,35 +755,35 @@ public class TJDecompressor implements Closeable { int pixelFormat; boolean intPixels = false; if (byteOrder == null) byteOrder = ByteOrder.nativeOrder(); - switch(dstImage.getType()) { - case BufferedImage.TYPE_3BYTE_BGR: - pixelFormat = TJ.PF_BGR; break; - case BufferedImage.TYPE_4BYTE_ABGR: - case BufferedImage.TYPE_4BYTE_ABGR_PRE: - pixelFormat = TJ.PF_XBGR; break; - case BufferedImage.TYPE_BYTE_GRAY: - pixelFormat = TJ.PF_GRAY; break; - case BufferedImage.TYPE_INT_BGR: - if (byteOrder == ByteOrder.BIG_ENDIAN) - pixelFormat = TJ.PF_XBGR; - else - pixelFormat = TJ.PF_RGBX; - intPixels = true; break; - case BufferedImage.TYPE_INT_RGB: - if (byteOrder == ByteOrder.BIG_ENDIAN) - pixelFormat = TJ.PF_XRGB; - else - pixelFormat = TJ.PF_BGRX; - intPixels = true; break; - case BufferedImage.TYPE_INT_ARGB: - case BufferedImage.TYPE_INT_ARGB_PRE: - if (byteOrder == ByteOrder.BIG_ENDIAN) - pixelFormat = TJ.PF_ARGB; - else - pixelFormat = TJ.PF_BGRA; - intPixels = true; break; - default: - throw new IllegalArgumentException("Unsupported BufferedImage format"); + switch (dstImage.getType()) { + case BufferedImage.TYPE_3BYTE_BGR: + pixelFormat = TJ.PF_BGR; break; + case BufferedImage.TYPE_4BYTE_ABGR: + case BufferedImage.TYPE_4BYTE_ABGR_PRE: + pixelFormat = TJ.PF_XBGR; break; + case BufferedImage.TYPE_BYTE_GRAY: + pixelFormat = TJ.PF_GRAY; break; + case BufferedImage.TYPE_INT_BGR: + if (byteOrder == ByteOrder.BIG_ENDIAN) + pixelFormat = TJ.PF_XBGR; + else + pixelFormat = TJ.PF_RGBX; + intPixels = true; break; + case BufferedImage.TYPE_INT_RGB: + if (byteOrder == ByteOrder.BIG_ENDIAN) + pixelFormat = TJ.PF_XRGB; + else + pixelFormat = TJ.PF_BGRX; + intPixels = true; break; + case BufferedImage.TYPE_INT_ARGB: + case BufferedImage.TYPE_INT_ARGB_PRE: + if (byteOrder == ByteOrder.BIG_ENDIAN) + pixelFormat = TJ.PF_ARGB; + else + pixelFormat = TJ.PF_BGRA; + intPixels = true; break; + default: + throw new IllegalArgumentException("Unsupported BufferedImage format"); } WritableRaster wr = dstImage.getRaster(); if (intPixels) { @@ -842,11 +863,12 @@ public class TJDecompressor implements Closeable { destroy(); } + @SuppressWarnings("checkstyle:DesignForExtension") @Override protected void finalize() throws Throwable { try { close(); - } catch(TJException e) { + } catch (TJException e) { } finally { super.finalize(); } diff --git a/java/org/libjpegturbo/turbojpeg/TJException.java b/java/org/libjpegturbo/turbojpeg/TJException.java index 59c2041..d03a256 100644 --- a/java/org/libjpegturbo/turbojpeg/TJException.java +++ b/java/org/libjpegturbo/turbojpeg/TJException.java @@ -1,5 +1,6 @@ /* * Copyright (C)2015 Viktor Szathmáry. All Rights Reserved. + * Copyright (C)2017-2018 D. R. Commander. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,24 +31,48 @@ package org.libjpegturbo.turbojpeg; import java.io.IOException; +@SuppressWarnings("checkstyle:JavadocType") public class TJException extends IOException { private static final long serialVersionUID = 1L; + @SuppressWarnings("checkstyle:JavadocMethod") public TJException() { super(); } + @SuppressWarnings("checkstyle:JavadocMethod") public TJException(String message, Throwable cause) { super(message, cause); } + @SuppressWarnings("checkstyle:JavadocMethod") public TJException(String message) { super(message); } + @SuppressWarnings("checkstyle:JavadocMethod") + public TJException(String message, int code) { + super(message); + if (errorCode >= 0 && errorCode < TJ.NUMERR) + errorCode = code; + } + + @SuppressWarnings("checkstyle:JavadocMethod") public TJException(Throwable cause) { super(cause); } + /** + * Returns a code (one of {@link TJ TJ.ERR_*}) indicating the severity of the + * last error. + * + * @return a code (one of {@link TJ TJ.ERR_*}) indicating the severity of the + * last error. + */ + public int getErrorCode() { + return errorCode; + } + + private int errorCode = TJ.ERR_FATAL; } diff --git a/java/org/libjpegturbo/turbojpeg/TJLoader.java.tmpl b/java/org/libjpegturbo/turbojpeg/TJLoader-unix.java.in similarity index 78% rename from java/org/libjpegturbo/turbojpeg/TJLoader.java.tmpl rename to java/org/libjpegturbo/turbojpeg/TJLoader-unix.java.in index 5ef3118..65884e8 100644 --- a/java/org/libjpegturbo/turbojpeg/TJLoader.java.tmpl +++ b/java/org/libjpegturbo/turbojpeg/TJLoader-unix.java.in @@ -1,5 +1,5 @@ /* - * Copyright (C)2011-2013 D. R. Commander. All Rights Reserved. + * Copyright (C)2011-2013, 2016 D. R. Commander. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,19 +36,19 @@ final class TJLoader { String os = System.getProperty("os.name").toLowerCase(); if (os.indexOf("mac") >= 0) { try { - System.load("%{__libdir}/libturbojpeg.jnilib"); + System.load("@CMAKE_INSTALL_FULL_LIBDIR@/libturbojpeg.jnilib"); } catch (java.lang.UnsatisfiedLinkError e2) { System.load("/usr/lib/libturbojpeg.jnilib"); } } else { try { - System.load("%{__libdir}/libturbojpeg.so"); + System.load("@CMAKE_INSTALL_FULL_LIBDIR@/libturbojpeg.so"); } catch (java.lang.UnsatisfiedLinkError e3) { - String libdir = "%{__libdir}"; - if (libdir.equals("/opt/libjpeg-turbo/lib64")) { - System.load("/opt/libjpeg-turbo/lib32/libturbojpeg.so"); - } else if (libdir.equals("/opt/libjpeg-turbo/lib32")) { - System.load("/opt/libjpeg-turbo/lib64/libturbojpeg.so"); + String libdir = "@CMAKE_INSTALL_FULL_LIBDIR@"; + if (libdir.equals("@CMAKE_INSTALL_DEFAULT_PREFIX@/lib64")) { + System.load("@CMAKE_INSTALL_DEFAULT_PREFIX@/lib32/libturbojpeg.so"); + } else if (libdir.equals("@CMAKE_INSTALL_DEFAULT_PREFIX@/lib32")) { + System.load("@CMAKE_INSTALL_DEFAULT_PREFIX@/lib64/libturbojpeg.so"); } else { throw e3; } diff --git a/java/org/libjpegturbo/turbojpeg/TJLoader.java.in b/java/org/libjpegturbo/turbojpeg/TJLoader-win.java.in similarity index 100% rename from java/org/libjpegturbo/turbojpeg/TJLoader.java.in rename to java/org/libjpegturbo/turbojpeg/TJLoader-win.java.in diff --git a/java/org/libjpegturbo/turbojpeg/TJScalingFactor.java b/java/org/libjpegturbo/turbojpeg/TJScalingFactor.java index ddb1d75..ccf9179 100644 --- a/java/org/libjpegturbo/turbojpeg/TJScalingFactor.java +++ b/java/org/libjpegturbo/turbojpeg/TJScalingFactor.java @@ -1,5 +1,5 @@ /* - * Copyright (C)2011 D. R. Commander. All Rights Reserved. + * Copyright (C)2011, 2018 D. R. Commander. All Rights Reserved. * Copyright (C)2015 Viktor Szathmáry. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,6 +34,13 @@ package org.libjpegturbo.turbojpeg; */ public class TJScalingFactor { + /** + * Create a TurboJPEG scaling factor instance. + * + * @param num numerator + * @param denom denominator + */ + @SuppressWarnings("checkstyle:HiddenField") public TJScalingFactor(int num, int denom) { if (num < 1 || denom < 1) throw new IllegalArgumentException("Numerator and denominator must be >= 1"); @@ -64,6 +71,8 @@ public class TJScalingFactor { * performs the integer equivalent of * ceil(dimension * scalingFactor). * + * @param dimension width or height to multiply by this scaling factor + * * @return the scaled value of dimension. */ public int getScaled(int dimension) { @@ -74,6 +83,8 @@ public class TJScalingFactor { * Returns true or false, depending on whether this instance and * other have the same numerator and denominator. * + * @param other the scaling factor against which to compare this one + * * @return true or false, depending on whether this instance and * other have the same numerator and denominator. */ diff --git a/java/org/libjpegturbo/turbojpeg/TJTransform.java b/java/org/libjpegturbo/turbojpeg/TJTransform.java index 7381f36..41c4b45 100644 --- a/java/org/libjpegturbo/turbojpeg/TJTransform.java +++ b/java/org/libjpegturbo/turbojpeg/TJTransform.java @@ -1,5 +1,5 @@ /* - * Copyright (C)2011, 2013 D. R. Commander. All Rights Reserved. + * Copyright (C)2011, 2013, 2018 D. R. Commander. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -103,21 +103,21 @@ public class TJTransform extends Rectangle { * partial MCU blocks that cannot be transformed will be left in place, which * will create odd-looking strips on the right or bottom edge of the image. */ - public static final int OPT_PERFECT = 1; + public static final int OPT_PERFECT = 1; /** * This option will discard any partial MCU blocks that cannot be * transformed. */ - public static final int OPT_TRIM = 2; + public static final int OPT_TRIM = 2; /** * This option will enable lossless cropping. */ - public static final int OPT_CROP = 4; + public static final int OPT_CROP = 4; /** * This option will discard the color data in the input image and produce * a grayscale output image. */ - public static final int OPT_GRAY = 8; + public static final int OPT_GRAY = 8; /** * This option will prevent {@link TJTransformer#transform * TJTransformer.transform()} from outputting a JPEG image for this @@ -125,7 +125,21 @@ public class TJTransform extends Rectangle { * filter to capture the transformed DCT coefficients without transcoding * them. */ - public static final int OPT_NOOUTPUT = 16; + public static final int OPT_NOOUTPUT = 16; + /** + * This option will enable progressive entropy coding in the output image + * generated by this particular transform. Progressive entropy coding will + * generally improve compression relative to baseline entropy coding (the + * default), but it will reduce compression and decompression performance + * considerably. + */ + public static final int OPT_PROGRESSIVE = 32; + /** + * This option will prevent {@link TJTransformer#transform + * TJTransformer.transform()} from copying any extra markers (including EXIF + * and ICC profile data) from the source image to the output image. + */ + public static final int OPT_COPYNONE = 64; /** @@ -159,6 +173,7 @@ public class TJTransform extends Rectangle { * @param cf an instance of an object that implements the {@link * TJCustomFilter} interface, or null if no custom filter is needed */ + @SuppressWarnings("checkstyle:HiddenField") public TJTransform(int x, int y, int w, int h, int op, int options, TJCustomFilter cf) { super(x, y, w, h); @@ -183,6 +198,7 @@ public class TJTransform extends Rectangle { * @param cf an instance of an object that implements the {@link * TJCustomFilter} interface, or null if no custom filter is needed */ + @SuppressWarnings("checkstyle:HiddenField") public TJTransform(Rectangle r, int op, int options, TJCustomFilter cf) { super(r); @@ -194,15 +210,18 @@ public class TJTransform extends Rectangle { /** * Transform operation (one of OP_*) */ + @SuppressWarnings("checkstyle:VisibilityModifier") public int op = 0; /** * Transform options (bitwise OR of one or more of OPT_*) */ + @SuppressWarnings("checkstyle:VisibilityModifier") public int options = 0; /** * Custom filter instance */ + @SuppressWarnings("checkstyle:VisibilityModifier") public TJCustomFilter cf = null; } diff --git a/java/org/libjpegturbo/turbojpeg/TJTransformer.java b/java/org/libjpegturbo/turbojpeg/TJTransformer.java index d76647f..d7a56f3 100644 --- a/java/org/libjpegturbo/turbojpeg/TJTransformer.java +++ b/java/org/libjpegturbo/turbojpeg/TJTransformer.java @@ -111,11 +111,11 @@ public class TJTransformer extends TJDecompressor { * which specifies the transform parameters and/or cropping region for the * corresponding transformed output image * - * @return an array of {@link TJDecompressor} instances, each of - * which has a transformed JPEG image associated with it. - * * @param flags the bitwise OR of one or more of * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*} + * + * @return an array of {@link TJDecompressor} instances, each of + * which has a transformed JPEG image associated with it. */ public TJDecompressor[] transform(TJTransform[] transforms, int flags) throws TJException { diff --git a/java/org/libjpegturbo/turbojpeg/YUVImage.java b/java/org/libjpegturbo/turbojpeg/YUVImage.java index d123e37..4da9843 100644 --- a/java/org/libjpegturbo/turbojpeg/YUVImage.java +++ b/java/org/libjpegturbo/turbojpeg/YUVImage.java @@ -208,12 +208,12 @@ public class YUVImage { * @param subsamp the level of chrominance subsampling used in the YUV * image (one of {@link TJ#SAMP_444 TJ.SAMP_*}) */ - public void setBuf(byte[][] planes, int[] offsets, int width, int strides[], + public void setBuf(byte[][] planes, int[] offsets, int width, int[] strides, int height, int subsamp) { setBuf(planes, offsets, width, strides, height, subsamp, false); } - private void setBuf(byte[][] planes, int[] offsets, int width, int strides[], + private void setBuf(byte[][] planes, int[] offsets, int width, int[] strides, int height, int subsamp, boolean alloc) { if ((planes == null && !alloc) || width < 1 || height < 1 || subsamp < 0 || subsamp >= TJ.NUMSAMP) @@ -247,9 +247,11 @@ public class YUVImage { if (planes[i] == null || offsets[i] < 0) throw new IllegalArgumentException("Invalid argument in YUVImage::setBuf()"); if (strides[i] < 0 && offsets[i] - planeSize + pw < 0) - throw new IllegalArgumentException("Stride for plane " + i + " would cause memory to be accessed below plane boundary"); + throw new IllegalArgumentException("Stride for plane " + i + + " would cause memory to be accessed below plane boundary"); if (planes[i].length < offsets[i] + planeSize) - throw new IllegalArgumentException("Image plane " + i + " is not large enough"); + throw new IllegalArgumentException("Image plane " + i + + " is not large enough"); } yuvPlanes = planes; @@ -294,9 +296,9 @@ public class YUVImage { int[] offsets = new int[nc]; planes[0] = yuvImage; - strides[0] = PAD(TJ.planeWidth(0, width, subsamp), pad); + strides[0] = pad(TJ.planeWidth(0, width, subsamp), pad); if (subsamp != TJ.SAMP_GRAY) { - strides[1] = strides[2] = PAD(TJ.planeWidth(1, width, subsamp), pad); + strides[1] = strides[2] = pad(TJ.planeWidth(1, width, subsamp), pad); planes[1] = planes[2] = yuvImage; offsets[1] = offsets[0] + strides[0] * TJ.planeHeight(0, height, subsamp); @@ -428,7 +430,7 @@ public class YUVImage { return TJ.bufSizeYUV(yuvWidth, yuvPad, yuvHeight, yuvSubsamp); } - private static final int PAD(int v, int p) { + private static int pad(int v, int p) { return (v + p - 1) & (~(p - 1)); } diff --git a/jcapimin.c b/jcapimin.c index 15674be..178c55b 100644 --- a/jcapimin.c +++ b/jcapimin.c @@ -31,7 +31,7 @@ */ GLOBAL(void) -jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize) +jpeg_CreateCompress(j_compress_ptr cinfo, int version, size_t structsize) { int i; @@ -41,7 +41,7 @@ jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize) ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version); if (structsize != sizeof(struct jpeg_compress_struct)) ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, - (int) sizeof(struct jpeg_compress_struct), (int) structsize); + (int)sizeof(struct jpeg_compress_struct), (int)structsize); /* For debugging purposes, we zero the whole master structure. * But the application has already set the err pointer, and may have set @@ -59,7 +59,7 @@ jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize) cinfo->is_decompressor = FALSE; /* Initialize a memory manager instance for this object */ - jinit_memory_mgr((j_common_ptr) cinfo); + jinit_memory_mgr((j_common_ptr)cinfo); /* Zero out pointers to permanent structures. */ cinfo->progress = NULL; @@ -83,7 +83,7 @@ jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize) /* Must do it here for emit_dqt in case jpeg_write_tables is used */ cinfo->block_size = DCTSIZE; cinfo->natural_order = jpeg_natural_order; - cinfo->lim_Se = DCTSIZE2-1; + cinfo->lim_Se = DCTSIZE2 - 1; #endif cinfo->script_space = NULL; @@ -100,9 +100,9 @@ jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize) */ GLOBAL(void) -jpeg_destroy_compress (j_compress_ptr cinfo) +jpeg_destroy_compress(j_compress_ptr cinfo) { - jpeg_destroy((j_common_ptr) cinfo); /* use common routine */ + jpeg_destroy((j_common_ptr)cinfo); /* use common routine */ } @@ -112,9 +112,9 @@ jpeg_destroy_compress (j_compress_ptr cinfo) */ GLOBAL(void) -jpeg_abort_compress (j_compress_ptr cinfo) +jpeg_abort_compress(j_compress_ptr cinfo) { - jpeg_abort((j_common_ptr) cinfo); /* use common routine */ + jpeg_abort((j_common_ptr)cinfo); /* use common routine */ } @@ -131,7 +131,7 @@ jpeg_abort_compress (j_compress_ptr cinfo) */ GLOBAL(void) -jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress) +jpeg_suppress_tables(j_compress_ptr cinfo, boolean suppress) { int i; JQUANT_TBL *qtbl; @@ -159,7 +159,7 @@ jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress) */ GLOBAL(void) -jpeg_finish_compress (j_compress_ptr cinfo) +jpeg_finish_compress(j_compress_ptr cinfo) { JDIMENSION iMCU_row; @@ -172,18 +172,18 @@ jpeg_finish_compress (j_compress_ptr cinfo) } else if (cinfo->global_state != CSTATE_WRCOEFS) ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); /* Perform any remaining passes */ - while (! cinfo->master->is_last_pass) { + while (!cinfo->master->is_last_pass) { (*cinfo->master->prepare_for_pass) (cinfo); for (iMCU_row = 0; iMCU_row < cinfo->total_iMCU_rows; iMCU_row++) { if (cinfo->progress != NULL) { - cinfo->progress->pass_counter = (long) iMCU_row; - cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows; - (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); + cinfo->progress->pass_counter = (long)iMCU_row; + cinfo->progress->pass_limit = (long)cinfo->total_iMCU_rows; + (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo); } /* We bypass the main controller and invoke coef controller directly; * all work is being done from the coefficient buffer. */ - if (! (*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE) NULL)) + if (!(*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE)NULL)) ERREXIT(cinfo, JERR_CANT_SUSPEND); } (*cinfo->master->finish_pass) (cinfo); @@ -192,7 +192,7 @@ jpeg_finish_compress (j_compress_ptr cinfo) (*cinfo->marker->write_file_trailer) (cinfo); (*cinfo->dest->term_destination) (cinfo); /* We can use jpeg_abort to release memory and reset global_state */ - jpeg_abort((j_common_ptr) cinfo); + jpeg_abort((j_common_ptr)cinfo); } @@ -204,8 +204,8 @@ jpeg_finish_compress (j_compress_ptr cinfo) */ GLOBAL(void) -jpeg_write_marker (j_compress_ptr cinfo, int marker, - const JOCTET *dataptr, unsigned int datalen) +jpeg_write_marker(j_compress_ptr cinfo, int marker, const JOCTET *dataptr, + unsigned int datalen) { void (*write_marker_byte) (j_compress_ptr info, int val); @@ -226,7 +226,7 @@ jpeg_write_marker (j_compress_ptr cinfo, int marker, /* Same, but piecemeal. */ GLOBAL(void) -jpeg_write_m_header (j_compress_ptr cinfo, int marker, unsigned int datalen) +jpeg_write_m_header(j_compress_ptr cinfo, int marker, unsigned int datalen) { if (cinfo->next_scanline != 0 || (cinfo->global_state != CSTATE_SCANNING && @@ -238,7 +238,7 @@ jpeg_write_m_header (j_compress_ptr cinfo, int marker, unsigned int datalen) } GLOBAL(void) -jpeg_write_m_byte (j_compress_ptr cinfo, int val) +jpeg_write_m_byte(j_compress_ptr cinfo, int val) { (*cinfo->marker->write_marker_byte) (cinfo, val); } @@ -266,13 +266,13 @@ jpeg_write_m_byte (j_compress_ptr cinfo, int val) */ GLOBAL(void) -jpeg_write_tables (j_compress_ptr cinfo) +jpeg_write_tables(j_compress_ptr cinfo) { if (cinfo->global_state != CSTATE_START) ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); /* (Re)initialize error mgr and destination modules */ - (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo); + (*cinfo->err->reset_error_mgr) ((j_common_ptr)cinfo); (*cinfo->dest->init_destination) (cinfo); /* Initialize the marker writer ... bit of a crock to do it here. */ jinit_marker_writer(cinfo); diff --git a/jcapistd.c b/jcapistd.c index 5c6d0be..aa2aad9 100644 --- a/jcapistd.c +++ b/jcapistd.c @@ -36,7 +36,7 @@ */ GLOBAL(void) -jpeg_start_compress (j_compress_ptr cinfo, boolean write_all_tables) +jpeg_start_compress(j_compress_ptr cinfo, boolean write_all_tables) { if (cinfo->global_state != CSTATE_START) ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); @@ -45,7 +45,7 @@ jpeg_start_compress (j_compress_ptr cinfo, boolean write_all_tables) jpeg_suppress_tables(cinfo, FALSE); /* mark all tables to be written */ /* (Re)initialize error mgr and destination modules */ - (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo); + (*cinfo->err->reset_error_mgr) ((j_common_ptr)cinfo); (*cinfo->dest->init_destination) (cinfo); /* Perform master selection of active modules */ jinit_compress_master(cinfo); @@ -75,8 +75,8 @@ jpeg_start_compress (j_compress_ptr cinfo, boolean write_all_tables) */ GLOBAL(JDIMENSION) -jpeg_write_scanlines (j_compress_ptr cinfo, JSAMPARRAY scanlines, - JDIMENSION num_lines) +jpeg_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines, + JDIMENSION num_lines) { JDIMENSION row_ctr, rows_left; @@ -87,9 +87,9 @@ jpeg_write_scanlines (j_compress_ptr cinfo, JSAMPARRAY scanlines, /* Call progress monitor hook if present */ if (cinfo->progress != NULL) { - cinfo->progress->pass_counter = (long) cinfo->next_scanline; - cinfo->progress->pass_limit = (long) cinfo->image_height; - (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); + cinfo->progress->pass_counter = (long)cinfo->next_scanline; + cinfo->progress->pass_limit = (long)cinfo->image_height; + (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo); } /* Give master control module another chance if this is first call to @@ -118,8 +118,8 @@ jpeg_write_scanlines (j_compress_ptr cinfo, JSAMPARRAY scanlines, */ GLOBAL(JDIMENSION) -jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data, - JDIMENSION num_lines) +jpeg_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION num_lines) { JDIMENSION lines_per_iMCU_row; @@ -132,9 +132,9 @@ jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data, /* Call progress monitor hook if present */ if (cinfo->progress != NULL) { - cinfo->progress->pass_counter = (long) cinfo->next_scanline; - cinfo->progress->pass_limit = (long) cinfo->image_height; - (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); + cinfo->progress->pass_counter = (long)cinfo->next_scanline; + cinfo->progress->pass_limit = (long)cinfo->image_height; + (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo); } /* Give master control module another chance if this is first call to @@ -151,7 +151,7 @@ jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data, ERREXIT(cinfo, JERR_BUFFER_SIZE); /* Directly compress the row. */ - if (! (*cinfo->coef->compress_data) (cinfo, data)) { + if (!(*cinfo->coef->compress_data) (cinfo, data)) { /* If compressor did not consume the whole row, suspend processing. */ return 0; } diff --git a/jcarith.c b/jcarith.c index 6d3b8af..b6d093f 100644 --- a/jcarith.c +++ b/jcarith.c @@ -4,16 +4,19 @@ * This file was part of the Independent JPEG Group's software: * Developed 1997-2009 by Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2015, D. R. Commander. + * Copyright (C) 2015, 2018, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * * This file contains portable arithmetic entropy encoding routines for JPEG - * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81). + * (implementing Recommendation ITU-T T.81 | ISO/IEC 10918-1). * * Both sequential and progressive modes are supported in this single module. * * Suspension is not currently supported in this module. + * + * NOTE: All referenced figures are from + * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994. */ #define JPEG_INTERNALS @@ -63,8 +66,8 @@ typedef arith_entropy_encoder *arith_entropy_ptr; * in the lower bits (mask 0x7F). */ -#define DC_STAT_BINS 64 -#define AC_STAT_BINS 256 +#define DC_STAT_BINS 64 +#define AC_STAT_BINS 256 /* NOTE: Uncomment the following #define if you want to use the * given formula for calculating the AC conditioning parameter Kx @@ -105,25 +108,25 @@ typedef arith_entropy_encoder *arith_entropy_ptr; #ifdef RIGHT_SHIFT_IS_UNSIGNED #define ISHIFT_TEMPS int ishift_temp; -#define IRIGHT_SHIFT(x,shft) \ - ((ishift_temp = (x)) < 0 ? \ - (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \ - (ishift_temp >> (shft))) +#define IRIGHT_SHIFT(x, shft) \ + ((ishift_temp = (x)) < 0 ? \ + (ishift_temp >> (shft)) | ((~0) << (16 - (shft))) : \ + (ishift_temp >> (shft))) #else #define ISHIFT_TEMPS -#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) +#define IRIGHT_SHIFT(x, shft) ((x) >> (shft)) #endif LOCAL(void) -emit_byte (int val, j_compress_ptr cinfo) +emit_byte(int val, j_compress_ptr cinfo) /* Write next output byte; we do not support suspension in this module. */ { struct jpeg_destination_mgr *dest = cinfo->dest; - *dest->next_output_byte++ = (JOCTET) val; + *dest->next_output_byte++ = (JOCTET)val; if (--dest->free_in_buffer == 0) - if (! (*dest->empty_output_buffer) (cinfo)) + if (!(*dest->empty_output_buffer) (cinfo)) ERREXIT(cinfo, JERR_CANT_SUSPEND); } @@ -133,22 +136,22 @@ emit_byte (int val, j_compress_ptr cinfo) */ METHODDEF(void) -finish_pass (j_compress_ptr cinfo) +finish_pass(j_compress_ptr cinfo) { - arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy; + arith_entropy_ptr e = (arith_entropy_ptr)cinfo->entropy; JLONG temp; /* Section D.1.8: Termination of encoding */ /* Find the e->c in the coding interval with the largest * number of trailing zero bits */ - if ((temp = (e->a - 1 + e->c) & 0xFFFF0000L) < e->c) + if ((temp = (e->a - 1 + e->c) & 0xFFFF0000UL) < e->c) e->c = temp + 0x8000L; else e->c = temp; /* Send remaining bytes to output */ e->c <<= e->ct; - if (e->c & 0xF8000000L) { + if (e->c & 0xF8000000UL) { /* One final overflow has to be handled */ if (e->buffer >= 0) { if (e->zc) @@ -219,9 +222,9 @@ finish_pass (j_compress_ptr cinfo) */ LOCAL(void) -arith_encode (j_compress_ptr cinfo, unsigned char *st, int val) +arith_encode(j_compress_ptr cinfo, unsigned char *st, int val) { - register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy; + register arith_entropy_ptr e = (arith_entropy_ptr)cinfo->entropy; register unsigned char nl, nm; register JLONG qe, temp; register int sv; @@ -231,8 +234,8 @@ arith_encode (j_compress_ptr cinfo, unsigned char *st, int val) */ sv = *st; qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */ - nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ - nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ + nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ + nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ /* Encode & estimation procedures per sections D.1.4 & D.1.5 */ e->a -= qe; @@ -319,9 +322,9 @@ arith_encode (j_compress_ptr cinfo, unsigned char *st, int val) */ LOCAL(void) -emit_restart (j_compress_ptr cinfo, int restart_num) +emit_restart(j_compress_ptr cinfo, int restart_num) { - arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy; int ci; jpeg_component_info *compptr; @@ -362,9 +365,9 @@ emit_restart (j_compress_ptr cinfo, int restart_num) */ METHODDEF(boolean) -encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data) { - arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy; JBLOCKROW block; unsigned char *st; int blkn, ci, tbl; @@ -391,7 +394,7 @@ encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) /* Compute the DC value after the required point transform by Al. * This is simply an arithmetic right shift. */ - m = IRIGHT_SHIFT((int) ((*block)[0]), cinfo->Al); + m = IRIGHT_SHIFT((int)((*block)[0]), cinfo->Al); /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */ @@ -432,9 +435,9 @@ encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) } arith_encode(cinfo, st, 0); /* Section F.1.4.4.1.2: Establish dc_context conditioning category */ - if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1)) + if (m < (int)((1L << cinfo->arith_dc_L[tbl]) >> 1)) entropy->dc_context[ci] = 0; /* zero diff category */ - else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1)) + else if (m > (int)((1L << cinfo->arith_dc_U[tbl]) >> 1)) entropy->dc_context[ci] += 8; /* large diff category */ /* Figure F.9: Encoding the magnitude bit pattern of v */ st += 14; @@ -453,9 +456,9 @@ encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) */ METHODDEF(boolean) -encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data) { - arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy; JBLOCKROW block; unsigned char *st; int tbl, k, ke; @@ -510,7 +513,7 @@ encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) break; } } - arith_encode(cinfo, st + 1, 0); st += 3; k++; + arith_encode(cinfo, st + 1, 0); st += 3; k++; } st += 2; /* Figure F.8: Encoding the magnitude category of v */ @@ -552,9 +555,9 @@ encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) */ METHODDEF(boolean) -encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) { - arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy; unsigned char *st; int Al, blkn; @@ -587,9 +590,9 @@ encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) */ METHODDEF(boolean) -encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) { - arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy; JBLOCKROW block; unsigned char *st; int tbl, k, ke, kex; @@ -662,7 +665,7 @@ encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) break; } } - arith_encode(cinfo, st + 1, 0); st += 3; k++; + arith_encode(cinfo, st + 1, 0); st += 3; k++; } } /* Encode EOB decision only if k <= cinfo->Se */ @@ -680,9 +683,9 @@ encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) */ METHODDEF(boolean) -encode_mcu (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +encode_mcu(j_compress_ptr cinfo, JBLOCKROW *MCU_data) { - arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy; jpeg_component_info *compptr; JBLOCKROW block; unsigned char *st; @@ -747,9 +750,9 @@ encode_mcu (j_compress_ptr cinfo, JBLOCKROW *MCU_data) } arith_encode(cinfo, st, 0); /* Section F.1.4.4.1.2: Establish dc_context conditioning category */ - if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1)) + if (m < (int)((1L << cinfo->arith_dc_L[tbl]) >> 1)) entropy->dc_context[ci] = 0; /* zero diff category */ - else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1)) + else if (m > (int)((1L << cinfo->arith_dc_U[tbl]) >> 1)) entropy->dc_context[ci] += 8; /* large diff category */ /* Figure F.9: Encoding the magnitude bit pattern of v */ st += 14; @@ -770,7 +773,7 @@ encode_mcu (j_compress_ptr cinfo, JBLOCKROW *MCU_data) st = entropy->ac_stats[tbl] + 3 * (k - 1); arith_encode(cinfo, st, 0); /* EOB decision */ while ((v = (*block)[jpeg_natural_order[k]]) == 0) { - arith_encode(cinfo, st + 1, 0); st += 3; k++; + arith_encode(cinfo, st + 1, 0); st += 3; k++; } arith_encode(cinfo, st + 1, 1); /* Figure F.6: Encoding nonzero value v */ @@ -822,9 +825,9 @@ encode_mcu (j_compress_ptr cinfo, JBLOCKROW *MCU_data) */ METHODDEF(void) -start_pass (j_compress_ptr cinfo, boolean gather_statistics) +start_pass(j_compress_ptr cinfo, boolean gather_statistics) { - arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy; int ci, tbl; jpeg_component_info *compptr; @@ -862,8 +865,8 @@ start_pass (j_compress_ptr cinfo, boolean gather_statistics) if (tbl < 0 || tbl >= NUM_ARITH_TBLS) ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); if (entropy->dc_stats[tbl] == NULL) - entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS); + entropy->dc_stats[tbl] = (unsigned char *)(*cinfo->mem->alloc_small) + ((j_common_ptr)cinfo, JPOOL_IMAGE, DC_STAT_BINS); MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS); /* Initialize DC predictions to 0 */ entropy->last_dc_val[ci] = 0; @@ -875,13 +878,14 @@ start_pass (j_compress_ptr cinfo, boolean gather_statistics) if (tbl < 0 || tbl >= NUM_ARITH_TBLS) ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); if (entropy->ac_stats[tbl] == NULL) - entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS); + entropy->ac_stats[tbl] = (unsigned char *)(*cinfo->mem->alloc_small) + ((j_common_ptr)cinfo, JPOOL_IMAGE, AC_STAT_BINS); MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS); #ifdef CALCULATE_SPECTRAL_CONDITIONING if (cinfo->progressive_mode) /* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */ - cinfo->arith_ac_K[tbl] = cinfo->Ss + ((8 + cinfo->Se - cinfo->Ss) >> 4); + cinfo->arith_ac_K[tbl] = cinfo->Ss + + ((8 + cinfo->Se - cinfo->Ss) >> 4); #endif } } @@ -905,15 +909,15 @@ start_pass (j_compress_ptr cinfo, boolean gather_statistics) */ GLOBAL(void) -jinit_arith_encoder (j_compress_ptr cinfo) +jinit_arith_encoder(j_compress_ptr cinfo) { arith_entropy_ptr entropy; int i; entropy = (arith_entropy_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(arith_entropy_encoder)); - cinfo->entropy = (struct jpeg_entropy_encoder *) entropy; + cinfo->entropy = (struct jpeg_entropy_encoder *)entropy; entropy->pub.start_pass = start_pass; entropy->pub.finish_pass = finish_pass; diff --git a/jccoefct.c b/jccoefct.c index a08d6e3..068232a 100644 --- a/jccoefct.c +++ b/jccoefct.c @@ -58,21 +58,19 @@ typedef my_coef_controller *my_coef_ptr; /* Forward declarations */ -METHODDEF(boolean) compress_data - (j_compress_ptr cinfo, JSAMPIMAGE input_buf); +METHODDEF(boolean) compress_data(j_compress_ptr cinfo, JSAMPIMAGE input_buf); #ifdef FULL_COEF_BUFFER_SUPPORTED -METHODDEF(boolean) compress_first_pass - (j_compress_ptr cinfo, JSAMPIMAGE input_buf); -METHODDEF(boolean) compress_output - (j_compress_ptr cinfo, JSAMPIMAGE input_buf); +METHODDEF(boolean) compress_first_pass(j_compress_ptr cinfo, + JSAMPIMAGE input_buf); +METHODDEF(boolean) compress_output(j_compress_ptr cinfo, JSAMPIMAGE input_buf); #endif LOCAL(void) -start_iMCU_row (j_compress_ptr cinfo) +start_iMCU_row(j_compress_ptr cinfo) /* Reset within-iMCU-row counters for a new row */ { - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_coef_ptr coef = (my_coef_ptr)cinfo->coef; /* In an interleaved scan, an MCU row is the same as an iMCU row. * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows. @@ -81,7 +79,7 @@ start_iMCU_row (j_compress_ptr cinfo) if (cinfo->comps_in_scan > 1) { coef->MCU_rows_per_iMCU_row = 1; } else { - if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1)) + if (coef->iMCU_row_num < (cinfo->total_iMCU_rows - 1)) coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor; else coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height; @@ -97,9 +95,9 @@ start_iMCU_row (j_compress_ptr cinfo) */ METHODDEF(void) -start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode) +start_pass_coef(j_compress_ptr cinfo, J_BUF_MODE pass_mode) { - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_coef_ptr coef = (my_coef_ptr)cinfo->coef; coef->iMCU_row_num = 0; start_iMCU_row(cinfo); @@ -140,9 +138,9 @@ start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode) */ METHODDEF(boolean) -compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf) +compress_data(j_compress_ptr cinfo, JSAMPIMAGE input_buf) { - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_coef_ptr coef = (my_coef_ptr)cinfo->coef; JDIMENSION MCU_col_num; /* index of current MCU within row */ JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1; JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1; @@ -167,31 +165,33 @@ compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf) blkn = 0; for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; - blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width - : compptr->last_col_width; + blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width : + compptr->last_col_width; xpos = MCU_col_num * compptr->MCU_sample_width; ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */ for (yindex = 0; yindex < compptr->MCU_height; yindex++) { if (coef->iMCU_row_num < last_iMCU_row || - yoffset+yindex < compptr->last_row_height) { + yoffset + yindex < compptr->last_row_height) { (*cinfo->fdct->forward_DCT) (cinfo, compptr, input_buf[compptr->component_index], coef->MCU_buffer[blkn], - ypos, xpos, (JDIMENSION) blockcnt); + ypos, xpos, (JDIMENSION)blockcnt); if (blockcnt < compptr->MCU_width) { /* Create some dummy blocks at the right edge of the image. */ - jzero_far((void *) coef->MCU_buffer[blkn + blockcnt], + jzero_far((void *)coef->MCU_buffer[blkn + blockcnt], (compptr->MCU_width - blockcnt) * sizeof(JBLOCK)); for (bi = blockcnt; bi < compptr->MCU_width; bi++) { - coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn+bi-1][0][0]; + coef->MCU_buffer[blkn + bi][0][0] = + coef->MCU_buffer[blkn + bi - 1][0][0]; } } } else { /* Create a row of dummy blocks at the bottom of the image. */ - jzero_far((void *) coef->MCU_buffer[blkn], + jzero_far((void *)coef->MCU_buffer[blkn], compptr->MCU_width * sizeof(JBLOCK)); for (bi = 0; bi < compptr->MCU_width; bi++) { - coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn-1][0][0]; + coef->MCU_buffer[blkn + bi][0][0] = + coef->MCU_buffer[blkn - 1][0][0]; } } blkn += compptr->MCU_width; @@ -201,7 +201,7 @@ compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf) /* Try to write the MCU. In event of a suspension failure, we will * re-DCT the MCU on restart (a bit inefficient, could be fixed...) */ - if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) { + if (!(*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) { /* Suspension forced; update state counters and exit */ coef->MCU_vert_offset = yoffset; coef->mcu_ctr = MCU_col_num; @@ -242,9 +242,9 @@ compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf) */ METHODDEF(boolean) -compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) +compress_first_pass(j_compress_ptr cinfo, JSAMPIMAGE input_buf) { - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_coef_ptr coef = (my_coef_ptr)cinfo->coef; JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1; JDIMENSION blocks_across, MCUs_across, MCUindex; int bi, ci, h_samp_factor, block_row, block_rows, ndummy; @@ -257,21 +257,21 @@ compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) ci++, compptr++) { /* Align the virtual buffer for this component. */ buffer = (*cinfo->mem->access_virt_barray) - ((j_common_ptr) cinfo, coef->whole_image[ci], + ((j_common_ptr)cinfo, coef->whole_image[ci], coef->iMCU_row_num * compptr->v_samp_factor, - (JDIMENSION) compptr->v_samp_factor, TRUE); + (JDIMENSION)compptr->v_samp_factor, TRUE); /* Count non-dummy DCT block rows in this iMCU row. */ if (coef->iMCU_row_num < last_iMCU_row) block_rows = compptr->v_samp_factor; else { /* NB: can't use last_row_height here, since may not be set! */ - block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor); + block_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor); if (block_rows == 0) block_rows = compptr->v_samp_factor; } blocks_across = compptr->width_in_blocks; h_samp_factor = compptr->h_samp_factor; /* Count number of dummy blocks to be added at the right margin. */ - ndummy = (int) (blocks_across % h_samp_factor); + ndummy = (int)(blocks_across % h_samp_factor); if (ndummy > 0) ndummy = h_samp_factor - ndummy; /* Perform DCT for all non-dummy blocks in this iMCU row. Each call @@ -281,12 +281,12 @@ compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) thisblockrow = buffer[block_row]; (*cinfo->fdct->forward_DCT) (cinfo, compptr, input_buf[ci], thisblockrow, - (JDIMENSION) (block_row * DCTSIZE), - (JDIMENSION) 0, blocks_across); + (JDIMENSION)(block_row * DCTSIZE), + (JDIMENSION)0, blocks_across); if (ndummy > 0) { /* Create dummy blocks at the right edge of the image. */ thisblockrow += blocks_across; /* => first dummy block */ - jzero_far((void *) thisblockrow, ndummy * sizeof(JBLOCK)); + jzero_far((void *)thisblockrow, ndummy * sizeof(JBLOCK)); lastDC = thisblockrow[-1][0]; for (bi = 0; bi < ndummy; bi++) { thisblockrow[bi][0] = lastDC; @@ -304,11 +304,11 @@ compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) for (block_row = block_rows; block_row < compptr->v_samp_factor; block_row++) { thisblockrow = buffer[block_row]; - lastblockrow = buffer[block_row-1]; - jzero_far((void *) thisblockrow, - (size_t) (blocks_across * sizeof(JBLOCK))); + lastblockrow = buffer[block_row - 1]; + jzero_far((void *)thisblockrow, + (size_t)(blocks_across * sizeof(JBLOCK))); for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) { - lastDC = lastblockrow[h_samp_factor-1][0]; + lastDC = lastblockrow[h_samp_factor - 1][0]; for (bi = 0; bi < h_samp_factor; bi++) { thisblockrow[bi][0] = lastDC; } @@ -338,9 +338,9 @@ compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) */ METHODDEF(boolean) -compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf) +compress_output(j_compress_ptr cinfo, JSAMPIMAGE input_buf) { - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_coef_ptr coef = (my_coef_ptr)cinfo->coef; JDIMENSION MCU_col_num; /* index of current MCU within row */ int blkn, ci, xindex, yindex, yoffset; JDIMENSION start_col; @@ -355,9 +355,9 @@ compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf) for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; buffer[ci] = (*cinfo->mem->access_virt_barray) - ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index], + ((j_common_ptr)cinfo, coef->whole_image[compptr->component_index], coef->iMCU_row_num * compptr->v_samp_factor, - (JDIMENSION) compptr->v_samp_factor, FALSE); + (JDIMENSION)compptr->v_samp_factor, FALSE); } /* Loop to process one whole iMCU row */ @@ -371,14 +371,14 @@ compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf) compptr = cinfo->cur_comp_info[ci]; start_col = MCU_col_num * compptr->MCU_width; for (yindex = 0; yindex < compptr->MCU_height; yindex++) { - buffer_ptr = buffer[ci][yindex+yoffset] + start_col; + buffer_ptr = buffer[ci][yindex + yoffset] + start_col; for (xindex = 0; xindex < compptr->MCU_width; xindex++) { coef->MCU_buffer[blkn++] = buffer_ptr++; } } } /* Try to write the MCU. */ - if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) { + if (!(*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) { /* Suspension forced; update state counters and exit */ coef->MCU_vert_offset = yoffset; coef->mcu_ctr = MCU_col_num; @@ -402,14 +402,14 @@ compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf) */ GLOBAL(void) -jinit_c_coef_controller (j_compress_ptr cinfo, boolean need_full_buffer) +jinit_c_coef_controller(j_compress_ptr cinfo, boolean need_full_buffer) { my_coef_ptr coef; coef = (my_coef_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_coef_controller)); - cinfo->coef = (struct jpeg_c_coef_controller *) coef; + cinfo->coef = (struct jpeg_c_coef_controller *)coef; coef->pub.start_pass = start_pass_coef; /* Create the coefficient buffer. */ @@ -423,12 +423,12 @@ jinit_c_coef_controller (j_compress_ptr cinfo, boolean need_full_buffer) for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { coef->whole_image[ci] = (*cinfo->mem->request_virt_barray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, - (JDIMENSION) jround_up((long) compptr->width_in_blocks, - (long) compptr->h_samp_factor), - (JDIMENSION) jround_up((long) compptr->height_in_blocks, - (long) compptr->v_samp_factor), - (JDIMENSION) compptr->v_samp_factor); + ((j_common_ptr)cinfo, JPOOL_IMAGE, FALSE, + (JDIMENSION)jround_up((long)compptr->width_in_blocks, + (long)compptr->h_samp_factor), + (JDIMENSION)jround_up((long)compptr->height_in_blocks, + (long)compptr->v_samp_factor), + (JDIMENSION)compptr->v_samp_factor); } #else ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); @@ -439,7 +439,7 @@ jinit_c_coef_controller (j_compress_ptr cinfo, boolean need_full_buffer) int i; buffer = (JBLOCKROW) - (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE, C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK)); for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) { coef->MCU_buffer[i] = buffer + i; diff --git a/jccolext.c b/jccolext.c index 479b320..19c955c 100644 --- a/jccolext.c +++ b/jccolext.c @@ -29,13 +29,13 @@ INLINE LOCAL(void) -rgb_ycc_convert_internal (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) +rgb_ycc_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) { - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert; register int r, g, b; - register JLONG * ctab = cconvert->rgb_ycc_tab; + register JLONG *ctab = cconvert->rgb_ycc_tab; register JSAMPROW inptr; register JSAMPROW outptr0, outptr1, outptr2; register JDIMENSION col; @@ -58,17 +58,14 @@ rgb_ycc_convert_internal (j_compress_ptr cinfo, * need the general RIGHT_SHIFT macro. */ /* Y */ - outptr0[col] = (JSAMPLE) - ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) - >> SCALEBITS); + outptr0[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] + + ctab[b + B_Y_OFF]) >> SCALEBITS); /* Cb */ - outptr1[col] = (JSAMPLE) - ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) - >> SCALEBITS); + outptr1[col] = (JSAMPLE)((ctab[r + R_CB_OFF] + ctab[g + G_CB_OFF] + + ctab[b + B_CB_OFF]) >> SCALEBITS); /* Cr */ - outptr2[col] = (JSAMPLE) - ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) - >> SCALEBITS); + outptr2[col] = (JSAMPLE)((ctab[r + R_CR_OFF] + ctab[g + G_CR_OFF] + + ctab[b + B_CR_OFF]) >> SCALEBITS); } } } @@ -86,13 +83,13 @@ rgb_ycc_convert_internal (j_compress_ptr cinfo, INLINE LOCAL(void) -rgb_gray_convert_internal (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) +rgb_gray_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) { - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert; register int r, g, b; - register JLONG * ctab = cconvert->rgb_ycc_tab; + register JLONG *ctab = cconvert->rgb_ycc_tab; register JSAMPROW inptr; register JSAMPROW outptr; register JDIMENSION col; @@ -108,9 +105,8 @@ rgb_gray_convert_internal (j_compress_ptr cinfo, b = GETJSAMPLE(inptr[RGB_BLUE]); inptr += RGB_PIXELSIZE; /* Y */ - outptr[col] = (JSAMPLE) - ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) - >> SCALEBITS); + outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] + + ctab[b + B_Y_OFF]) >> SCALEBITS); } } } @@ -123,9 +119,9 @@ rgb_gray_convert_internal (j_compress_ptr cinfo, INLINE LOCAL(void) -rgb_rgb_convert_internal (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) +rgb_rgb_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) { register JSAMPROW inptr; register JSAMPROW outptr0, outptr1, outptr2; diff --git a/jccolor.c b/jccolor.c index b973d10..036f601 100644 --- a/jccolor.c +++ b/jccolor.c @@ -63,9 +63,9 @@ typedef my_color_converter *my_cconvert_ptr; */ #define SCALEBITS 16 /* speediest right-shift on some machines */ -#define CBCR_OFFSET ((JLONG) CENTERJSAMPLE << SCALEBITS) -#define ONE_HALF ((JLONG) 1 << (SCALEBITS-1)) -#define FIX(x) ((JLONG) ((x) * (1L< Y section */ -#define G_Y_OFF (1*(MAXJSAMPLE+1)) /* offset to G => Y section */ -#define B_Y_OFF (2*(MAXJSAMPLE+1)) /* etc. */ -#define R_CB_OFF (3*(MAXJSAMPLE+1)) -#define G_CB_OFF (4*(MAXJSAMPLE+1)) -#define B_CB_OFF (5*(MAXJSAMPLE+1)) +#define G_Y_OFF (1 * (MAXJSAMPLE + 1)) /* offset to G => Y section */ +#define B_Y_OFF (2 * (MAXJSAMPLE + 1)) /* etc. */ +#define R_CB_OFF (3 * (MAXJSAMPLE + 1)) +#define G_CB_OFF (4 * (MAXJSAMPLE + 1)) +#define B_CB_OFF (5 * (MAXJSAMPLE + 1)) #define R_CR_OFF B_CB_OFF /* B=>Cb, R=>Cr are the same */ -#define G_CR_OFF (6*(MAXJSAMPLE+1)) -#define B_CR_OFF (7*(MAXJSAMPLE+1)) -#define TABLE_SIZE (8*(MAXJSAMPLE+1)) +#define G_CR_OFF (6 * (MAXJSAMPLE + 1)) +#define B_CR_OFF (7 * (MAXJSAMPLE + 1)) +#define TABLE_SIZE (8 * (MAXJSAMPLE + 1)) /* Include inline routines for colorspace extensions */ @@ -93,13 +93,13 @@ typedef my_color_converter *my_cconvert_ptr; #undef RGB_BLUE #undef RGB_PIXELSIZE -#define RGB_RED EXT_RGB_RED -#define RGB_GREEN EXT_RGB_GREEN -#define RGB_BLUE EXT_RGB_BLUE -#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -#define rgb_ycc_convert_internal extrgb_ycc_convert_internal -#define rgb_gray_convert_internal extrgb_gray_convert_internal -#define rgb_rgb_convert_internal extrgb_rgb_convert_internal +#define RGB_RED EXT_RGB_RED +#define RGB_GREEN EXT_RGB_GREEN +#define RGB_BLUE EXT_RGB_BLUE +#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +#define rgb_ycc_convert_internal extrgb_ycc_convert_internal +#define rgb_gray_convert_internal extrgb_gray_convert_internal +#define rgb_rgb_convert_internal extrgb_rgb_convert_internal #include "jccolext.c" #undef RGB_RED #undef RGB_GREEN @@ -109,13 +109,13 @@ typedef my_color_converter *my_cconvert_ptr; #undef rgb_gray_convert_internal #undef rgb_rgb_convert_internal -#define RGB_RED EXT_RGBX_RED -#define RGB_GREEN EXT_RGBX_GREEN -#define RGB_BLUE EXT_RGBX_BLUE -#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -#define rgb_ycc_convert_internal extrgbx_ycc_convert_internal -#define rgb_gray_convert_internal extrgbx_gray_convert_internal -#define rgb_rgb_convert_internal extrgbx_rgb_convert_internal +#define RGB_RED EXT_RGBX_RED +#define RGB_GREEN EXT_RGBX_GREEN +#define RGB_BLUE EXT_RGBX_BLUE +#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +#define rgb_ycc_convert_internal extrgbx_ycc_convert_internal +#define rgb_gray_convert_internal extrgbx_gray_convert_internal +#define rgb_rgb_convert_internal extrgbx_rgb_convert_internal #include "jccolext.c" #undef RGB_RED #undef RGB_GREEN @@ -125,13 +125,13 @@ typedef my_color_converter *my_cconvert_ptr; #undef rgb_gray_convert_internal #undef rgb_rgb_convert_internal -#define RGB_RED EXT_BGR_RED -#define RGB_GREEN EXT_BGR_GREEN -#define RGB_BLUE EXT_BGR_BLUE -#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -#define rgb_ycc_convert_internal extbgr_ycc_convert_internal -#define rgb_gray_convert_internal extbgr_gray_convert_internal -#define rgb_rgb_convert_internal extbgr_rgb_convert_internal +#define RGB_RED EXT_BGR_RED +#define RGB_GREEN EXT_BGR_GREEN +#define RGB_BLUE EXT_BGR_BLUE +#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +#define rgb_ycc_convert_internal extbgr_ycc_convert_internal +#define rgb_gray_convert_internal extbgr_gray_convert_internal +#define rgb_rgb_convert_internal extbgr_rgb_convert_internal #include "jccolext.c" #undef RGB_RED #undef RGB_GREEN @@ -141,13 +141,13 @@ typedef my_color_converter *my_cconvert_ptr; #undef rgb_gray_convert_internal #undef rgb_rgb_convert_internal -#define RGB_RED EXT_BGRX_RED -#define RGB_GREEN EXT_BGRX_GREEN -#define RGB_BLUE EXT_BGRX_BLUE -#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -#define rgb_ycc_convert_internal extbgrx_ycc_convert_internal -#define rgb_gray_convert_internal extbgrx_gray_convert_internal -#define rgb_rgb_convert_internal extbgrx_rgb_convert_internal +#define RGB_RED EXT_BGRX_RED +#define RGB_GREEN EXT_BGRX_GREEN +#define RGB_BLUE EXT_BGRX_BLUE +#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +#define rgb_ycc_convert_internal extbgrx_ycc_convert_internal +#define rgb_gray_convert_internal extbgrx_gray_convert_internal +#define rgb_rgb_convert_internal extbgrx_rgb_convert_internal #include "jccolext.c" #undef RGB_RED #undef RGB_GREEN @@ -157,13 +157,13 @@ typedef my_color_converter *my_cconvert_ptr; #undef rgb_gray_convert_internal #undef rgb_rgb_convert_internal -#define RGB_RED EXT_XBGR_RED -#define RGB_GREEN EXT_XBGR_GREEN -#define RGB_BLUE EXT_XBGR_BLUE -#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -#define rgb_ycc_convert_internal extxbgr_ycc_convert_internal -#define rgb_gray_convert_internal extxbgr_gray_convert_internal -#define rgb_rgb_convert_internal extxbgr_rgb_convert_internal +#define RGB_RED EXT_XBGR_RED +#define RGB_GREEN EXT_XBGR_GREEN +#define RGB_BLUE EXT_XBGR_BLUE +#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +#define rgb_ycc_convert_internal extxbgr_ycc_convert_internal +#define rgb_gray_convert_internal extxbgr_gray_convert_internal +#define rgb_rgb_convert_internal extxbgr_rgb_convert_internal #include "jccolext.c" #undef RGB_RED #undef RGB_GREEN @@ -173,13 +173,13 @@ typedef my_color_converter *my_cconvert_ptr; #undef rgb_gray_convert_internal #undef rgb_rgb_convert_internal -#define RGB_RED EXT_XRGB_RED -#define RGB_GREEN EXT_XRGB_GREEN -#define RGB_BLUE EXT_XRGB_BLUE -#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -#define rgb_ycc_convert_internal extxrgb_ycc_convert_internal -#define rgb_gray_convert_internal extxrgb_gray_convert_internal -#define rgb_rgb_convert_internal extxrgb_rgb_convert_internal +#define RGB_RED EXT_XRGB_RED +#define RGB_GREEN EXT_XRGB_GREEN +#define RGB_BLUE EXT_XRGB_BLUE +#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +#define rgb_ycc_convert_internal extxrgb_ycc_convert_internal +#define rgb_gray_convert_internal extxrgb_gray_convert_internal +#define rgb_rgb_convert_internal extxrgb_rgb_convert_internal #include "jccolext.c" #undef RGB_RED #undef RGB_GREEN @@ -195,33 +195,33 @@ typedef my_color_converter *my_cconvert_ptr; */ METHODDEF(void) -rgb_ycc_start (j_compress_ptr cinfo) +rgb_ycc_start(j_compress_ptr cinfo) { - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert; JLONG *rgb_ycc_tab; JLONG i; /* Allocate and fill in the conversion tables. */ cconvert->rgb_ycc_tab = rgb_ycc_tab = (JLONG *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, (TABLE_SIZE * sizeof(JLONG))); for (i = 0; i <= MAXJSAMPLE; i++) { - rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i; - rgb_ycc_tab[i+G_Y_OFF] = FIX(0.58700) * i; - rgb_ycc_tab[i+B_Y_OFF] = FIX(0.11400) * i + ONE_HALF; - rgb_ycc_tab[i+R_CB_OFF] = (-FIX(0.16874)) * i; - rgb_ycc_tab[i+G_CB_OFF] = (-FIX(0.33126)) * i; + rgb_ycc_tab[i + R_Y_OFF] = FIX(0.29900) * i; + rgb_ycc_tab[i + G_Y_OFF] = FIX(0.58700) * i; + rgb_ycc_tab[i + B_Y_OFF] = FIX(0.11400) * i + ONE_HALF; + rgb_ycc_tab[i + R_CB_OFF] = (-FIX(0.16874)) * i; + rgb_ycc_tab[i + G_CB_OFF] = (-FIX(0.33126)) * i; /* We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr. * This ensures that the maximum output will round to MAXJSAMPLE * not MAXJSAMPLE+1, and thus that we don't have to range-limit. */ - rgb_ycc_tab[i+B_CB_OFF] = FIX(0.50000) * i + CBCR_OFFSET + ONE_HALF-1; + rgb_ycc_tab[i + B_CB_OFF] = FIX(0.50000) * i + CBCR_OFFSET + ONE_HALF - 1; /* B=>Cb and R=>Cr tables are the same - rgb_ycc_tab[i+R_CR_OFF] = FIX(0.50000) * i + CBCR_OFFSET + ONE_HALF-1; + rgb_ycc_tab[i + R_CR_OFF] = FIX(0.50000) * i + CBCR_OFFSET + ONE_HALF - 1; */ - rgb_ycc_tab[i+G_CR_OFF] = (-FIX(0.41869)) * i; - rgb_ycc_tab[i+B_CR_OFF] = (-FIX(0.08131)) * i; + rgb_ycc_tab[i + G_CR_OFF] = (-FIX(0.41869)) * i; + rgb_ycc_tab[i + B_CR_OFF] = (-FIX(0.08131)) * i; } } @@ -231,43 +231,42 @@ rgb_ycc_start (j_compress_ptr cinfo) */ METHODDEF(void) -rgb_ycc_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) +rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows) { switch (cinfo->in_color_space) { - case JCS_EXT_RGB: - extrgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - extrgbx_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - case JCS_EXT_BGR: - extbgr_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - extbgrx_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - extxbgr_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - extxrgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - default: - rgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; + case JCS_EXT_RGB: + extrgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + extrgbx_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_BGR: + extbgr_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + extbgrx_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + extxbgr_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + extxrgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + default: + rgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; } } @@ -280,43 +279,42 @@ rgb_ycc_convert (j_compress_ptr cinfo, */ METHODDEF(void) -rgb_gray_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) +rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows) { switch (cinfo->in_color_space) { - case JCS_EXT_RGB: - extrgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - extrgbx_gray_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - case JCS_EXT_BGR: - extbgr_gray_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - extbgrx_gray_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - extxbgr_gray_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - extxrgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - default: - rgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; + case JCS_EXT_RGB: + extrgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + extrgbx_gray_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_BGR: + extbgr_gray_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + extbgrx_gray_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + extxbgr_gray_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + extxrgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + default: + rgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; } } @@ -326,43 +324,42 @@ rgb_gray_convert (j_compress_ptr cinfo, */ METHODDEF(void) -rgb_rgb_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) +rgb_rgb_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows) { switch (cinfo->in_color_space) { - case JCS_EXT_RGB: - extrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - extrgbx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - case JCS_EXT_BGR: - extbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - extbgrx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - extxbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - extxrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; - default: - rgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, - num_rows); - break; + case JCS_EXT_RGB: + extrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + extrgbx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_BGR: + extbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + extbgrx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + extxbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + extxrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; + default: + rgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, + num_rows); + break; } } @@ -376,11 +373,10 @@ rgb_rgb_convert (j_compress_ptr cinfo, */ METHODDEF(void) -cmyk_ycck_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) +cmyk_ycck_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows) { - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert; register int r, g, b; register JLONG *ctab = cconvert->rgb_ycc_tab; register JSAMPROW inptr; @@ -408,17 +404,14 @@ cmyk_ycck_convert (j_compress_ptr cinfo, * need the general RIGHT_SHIFT macro. */ /* Y */ - outptr0[col] = (JSAMPLE) - ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) - >> SCALEBITS); + outptr0[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] + + ctab[b + B_Y_OFF]) >> SCALEBITS); /* Cb */ - outptr1[col] = (JSAMPLE) - ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) - >> SCALEBITS); + outptr1[col] = (JSAMPLE)((ctab[r + R_CB_OFF] + ctab[g + G_CB_OFF] + + ctab[b + B_CB_OFF]) >> SCALEBITS); /* Cr */ - outptr2[col] = (JSAMPLE) - ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) - >> SCALEBITS); + outptr2[col] = (JSAMPLE)((ctab[r + R_CR_OFF] + ctab[g + G_CR_OFF] + + ctab[b + B_CR_OFF]) >> SCALEBITS); } } } @@ -431,9 +424,8 @@ cmyk_ycck_convert (j_compress_ptr cinfo, */ METHODDEF(void) -grayscale_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) +grayscale_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows) { register JSAMPROW inptr; register JSAMPROW outptr; @@ -460,9 +452,8 @@ grayscale_convert (j_compress_ptr cinfo, */ METHODDEF(void) -null_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) +null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) { register JSAMPROW inptr; register JSAMPROW outptr, outptr0, outptr1, outptr2, outptr3; @@ -522,7 +513,7 @@ null_convert (j_compress_ptr cinfo, */ METHODDEF(void) -null_method (j_compress_ptr cinfo) +null_method(j_compress_ptr cinfo) { /* no work needed */ } @@ -533,14 +524,14 @@ null_method (j_compress_ptr cinfo) */ GLOBAL(void) -jinit_color_converter (j_compress_ptr cinfo) +jinit_color_converter(j_compress_ptr cinfo) { my_cconvert_ptr cconvert; cconvert = (my_cconvert_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_color_converter)); - cinfo->cconvert = (struct jpeg_color_converter *) cconvert; + cinfo->cconvert = (struct jpeg_color_converter *)cconvert; /* set start_pass to null method until we find out differently */ cconvert->pub.start_pass = null_method; diff --git a/jcdctmgr.c b/jcdctmgr.c index 6e3b19b..c04058e 100644 --- a/jcdctmgr.c +++ b/jcdctmgr.c @@ -41,7 +41,7 @@ typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); -METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *); +METHODDEF(void) quantize(JCOEFPTR, DCTELEM *, DCTELEM *); typedef struct { struct jpeg_forward_dct pub; /* public fields */ @@ -80,7 +80,7 @@ typedef my_fdct_controller *my_fdct_ptr; */ LOCAL(int) -flss (UINT16 val) +flss(UINT16 val) { int bit; @@ -170,7 +170,7 @@ flss (UINT16 val) */ LOCAL(int) -compute_reciprocal (UINT16 divisor, DCTELEM *dtbl) +compute_reciprocal(UINT16 divisor, DCTELEM *dtbl) { UDCTELEM2 fq, fr; UDCTELEM c; @@ -182,10 +182,10 @@ compute_reciprocal (UINT16 divisor, DCTELEM *dtbl) * identity function. Since only the C quantization algorithm is used in * these cases, the scale value is irrelevant. */ - dtbl[DCTSIZE2 * 0] = (DCTELEM) 1; /* reciprocal */ - dtbl[DCTSIZE2 * 1] = (DCTELEM) 0; /* correction */ - dtbl[DCTSIZE2 * 2] = (DCTELEM) 1; /* scale */ - dtbl[DCTSIZE2 * 3] = -(DCTELEM) (sizeof(DCTELEM) * 8); /* shift */ + dtbl[DCTSIZE2 * 0] = (DCTELEM)1; /* reciprocal */ + dtbl[DCTSIZE2 * 1] = (DCTELEM)0; /* correction */ + dtbl[DCTSIZE2 * 2] = (DCTELEM)1; /* scale */ + dtbl[DCTSIZE2 * 3] = -(DCTELEM)(sizeof(DCTELEM) * 8); /* shift */ return 0; } @@ -195,26 +195,26 @@ compute_reciprocal (UINT16 divisor, DCTELEM *dtbl) fq = ((UDCTELEM2)1 << r) / divisor; fr = ((UDCTELEM2)1 << r) % divisor; - c = divisor / 2; /* for rounding */ + c = divisor / 2; /* for rounding */ - if (fr == 0) { /* divisor is power of two */ + if (fr == 0) { /* divisor is power of two */ /* fq will be one bit too large to fit in DCTELEM, so adjust */ fq >>= 1; r--; - } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */ + } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */ c++; - } else { /* fractional part is > 0.5 */ + } else { /* fractional part is > 0.5 */ fq++; } - dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */ - dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */ + dtbl[DCTSIZE2 * 0] = (DCTELEM)fq; /* reciprocal */ + dtbl[DCTSIZE2 * 1] = (DCTELEM)c; /* correction + roundfactor */ #ifdef WITH_SIMD - dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */ + dtbl[DCTSIZE2 * 2] = (DCTELEM)(1 << (sizeof(DCTELEM) * 8 * 2 - r)); /* scale */ #else dtbl[DCTSIZE2 * 2] = 1; #endif - dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */ + dtbl[DCTSIZE2 * 3] = (DCTELEM)r - sizeof(DCTELEM) * 8; /* shift */ if (r <= 16) return 0; else return 1; @@ -233,9 +233,9 @@ compute_reciprocal (UINT16 divisor, DCTELEM *dtbl) */ METHODDEF(void) -start_pass_fdctmgr (j_compress_ptr cinfo) +start_pass_fdctmgr(j_compress_ptr cinfo) { - my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; + my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct; int ci, qtblno, i; jpeg_component_info *compptr; JQUANT_TBL *qtbl; @@ -259,7 +259,7 @@ start_pass_fdctmgr (j_compress_ptr cinfo) */ if (fdct->divisors[qtblno] == NULL) { fdct->divisors[qtblno] = (DCTELEM *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, (DCTSIZE2 * 4) * sizeof(DCTELEM)); } dtbl = fdct->divisors[qtblno]; @@ -269,7 +269,7 @@ start_pass_fdctmgr (j_compress_ptr cinfo) fdct->quantize == jsimd_quantize) fdct->quantize = quantize; #else - dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3; + dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3; #endif } break; @@ -283,7 +283,7 @@ start_pass_fdctmgr (j_compress_ptr cinfo) * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 * We apply a further scale factor of 8. */ -#define CONST_BITS 14 +#define CONST_BITS 14 static const INT16 aanscales[DCTSIZE2] = { /* precomputed values scaled up by 14 bits */ 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, @@ -299,23 +299,23 @@ start_pass_fdctmgr (j_compress_ptr cinfo) if (fdct->divisors[qtblno] == NULL) { fdct->divisors[qtblno] = (DCTELEM *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, (DCTSIZE2 * 4) * sizeof(DCTELEM)); } dtbl = fdct->divisors[qtblno]; for (i = 0; i < DCTSIZE2; i++) { #if BITS_IN_JSAMPLE == 8 if (!compute_reciprocal( - DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i], - (JLONG) aanscales[i]), - CONST_BITS-3), &dtbl[i]) && + DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i], + (JLONG)aanscales[i]), + CONST_BITS - 3), &dtbl[i]) && fdct->quantize == jsimd_quantize) fdct->quantize = quantize; #else - dtbl[i] = (DCTELEM) - DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i], - (JLONG) aanscales[i]), - CONST_BITS-3); + dtbl[i] = (DCTELEM) + DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i], + (JLONG)aanscales[i]), + CONST_BITS - 3); #endif } } @@ -341,7 +341,7 @@ start_pass_fdctmgr (j_compress_ptr cinfo) if (fdct->float_divisors[qtblno] == NULL) { fdct->float_divisors[qtblno] = (FAST_FLOAT *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, DCTSIZE2 * sizeof(FAST_FLOAT)); } fdtbl = fdct->float_divisors[qtblno]; @@ -349,7 +349,7 @@ start_pass_fdctmgr (j_compress_ptr cinfo) for (row = 0; row < DCTSIZE; row++) { for (col = 0; col < DCTSIZE; col++) { fdtbl[i] = (FAST_FLOAT) - (1.0 / (((double) qtbl->quantval[i] * + (1.0 / (((double)qtbl->quantval[i] * aanscalefactor[row] * aanscalefactor[col] * 8.0))); i++; } @@ -370,7 +370,7 @@ start_pass_fdctmgr (j_compress_ptr cinfo) */ METHODDEF(void) -convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace) +convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace) { register DCTELEM *workspaceptr; register JSAMPROW elemptr; @@ -405,7 +405,7 @@ convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace) */ METHODDEF(void) -quantize (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) +quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) { int i; DCTELEM temp; @@ -426,15 +426,15 @@ quantize (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) if (temp < 0) { temp = -temp; product = (UDCTELEM2)(temp + corr) * recip; - product >>= shift + sizeof(DCTELEM)*8; + product >>= shift + sizeof(DCTELEM) * 8; temp = (DCTELEM)product; temp = -temp; } else { product = (UDCTELEM2)(temp + corr) * recip; - product >>= shift + sizeof(DCTELEM)*8; + product >>= shift + sizeof(DCTELEM) * 8; temp = (DCTELEM)product; } - output_ptr[i] = (JCOEF) temp; + output_ptr[i] = (JCOEF)temp; } #else @@ -457,20 +457,20 @@ quantize (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) * If your machine's division is fast enough, define FAST_DIVIDE. */ #ifdef FAST_DIVIDE -#define DIVIDE_BY(a,b) a /= b +#define DIVIDE_BY(a, b) a /= b #else -#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0 +#define DIVIDE_BY(a, b) if (a >= b) a /= b; else a = 0 #endif if (temp < 0) { temp = -temp; - temp += qval>>1; /* for rounding */ + temp += qval >> 1; /* for rounding */ DIVIDE_BY(temp, qval); temp = -temp; } else { - temp += qval>>1; /* for rounding */ + temp += qval >> 1; /* for rounding */ DIVIDE_BY(temp, qval); } - output_ptr[i] = (JCOEF) temp; + output_ptr[i] = (JCOEF)temp; } #endif @@ -487,14 +487,13 @@ quantize (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) */ METHODDEF(void) -forward_DCT (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY sample_data, JBLOCKROW coef_blocks, - JDIMENSION start_row, JDIMENSION start_col, - JDIMENSION num_blocks) +forward_DCT(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY sample_data, JBLOCKROW coef_blocks, + JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks) /* This version is used for integer DCT implementations. */ { /* This routine is heavily used, so it's worth coding it tightly. */ - my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; + my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct; DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no]; DCTELEM *workspace; JDIMENSION bi; @@ -522,9 +521,9 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info *compptr, #ifdef DCT_FLOAT_SUPPORTED - METHODDEF(void) -convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace) +convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) { register FAST_FLOAT *workspaceptr; register JSAMPROW elemptr; @@ -555,7 +554,8 @@ convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *worksp METHODDEF(void) -quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace) +quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) { register FAST_FLOAT temp; register int i; @@ -571,20 +571,20 @@ quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace * The maximum coefficient size is +-16K (for 12-bit data), so this * code should work for either 16-bit or 32-bit ints. */ - output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384); + output_ptr[i] = (JCOEF)((int)(temp + (FAST_FLOAT)16384.5) - 16384); } } METHODDEF(void) -forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY sample_data, JBLOCKROW coef_blocks, - JDIMENSION start_row, JDIMENSION start_col, - JDIMENSION num_blocks) +forward_DCT_float(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY sample_data, JBLOCKROW coef_blocks, + JDIMENSION start_row, JDIMENSION start_col, + JDIMENSION num_blocks) /* This version is used for floating-point DCT implementations. */ { /* This routine is heavily used, so it's worth coding it tightly. */ - my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; + my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct; FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no]; FAST_FLOAT *workspace; JDIMENSION bi; @@ -618,15 +618,15 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jinit_forward_dct (j_compress_ptr cinfo) +jinit_forward_dct(j_compress_ptr cinfo) { my_fdct_ptr fdct; int i; fdct = (my_fdct_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_fdct_controller)); - cinfo->fdct = (struct jpeg_forward_dct *) fdct; + cinfo->fdct = (struct jpeg_forward_dct *)fdct; fdct->pub.start_pass = start_pass_fdctmgr; /* First determine the DCT... */ @@ -703,12 +703,12 @@ jinit_forward_dct (j_compress_ptr cinfo) #ifdef DCT_FLOAT_SUPPORTED if (cinfo->dct_method == JDCT_FLOAT) fdct->float_workspace = (FAST_FLOAT *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(FAST_FLOAT) * DCTSIZE2); else #endif fdct->workspace = (DCTELEM *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(DCTELEM) * DCTSIZE2); /* Mark divisor tables unallocated */ diff --git a/jchuff.c b/jchuff.c index fffaace..939b3e7 100644 --- a/jchuff.c +++ b/jchuff.c @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2009-2011, 2014-2016, D. R. Commander. + * Copyright (C) 2009-2011, 2014-2016, 2018, D. R. Commander. * Copyright (C) 2015, Matthieu Darbois. * For conditions of distribution and use, see the accompanying README.ijg * file. @@ -16,6 +16,9 @@ * back up to the start of the current MCU. To do this, we copy state * variables into local working storage, and update them back to the * permanent JPEG objects only upon successful completion of an MCU. + * + * NOTE: All referenced figures are from + * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994. */ #define JPEG_INTERNALS @@ -47,16 +50,12 @@ #endif #ifdef USE_CLZ_INTRINSIC -#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) -#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) +#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) +#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) #else #include "jpeg_nbits_table.h" -#define JPEG_NBITS(x) (jpeg_nbits_table[x]) -#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x) -#endif - -#ifndef min - #define min(a,b) ((a)<(b)?(a):(b)) +#define JPEG_NBITS(x) (jpeg_nbits_table[x]) +#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x) #endif @@ -67,9 +66,9 @@ */ typedef struct { - size_t put_buffer; /* current bit-accumulation buffer */ - int put_bits; /* # of bits now in it */ - int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ + size_t put_buffer; /* current bit-accumulation buffer */ + int put_bits; /* # of bits now in it */ + int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ } savable_state; /* This macro is to work around compilers with missing or broken @@ -78,16 +77,16 @@ typedef struct { */ #ifndef NO_STRUCT_ASSIGN -#define ASSIGN_STATE(dest,src) ((dest) = (src)) +#define ASSIGN_STATE(dest, src) ((dest) = (src)) #else #if MAX_COMPS_IN_SCAN == 4 -#define ASSIGN_STATE(dest,src) \ - ((dest).put_buffer = (src).put_buffer, \ - (dest).put_bits = (src).put_bits, \ - (dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) +#define ASSIGN_STATE(dest, src) \ + ((dest).put_buffer = (src).put_buffer, \ + (dest).put_bits = (src).put_bits, \ + (dest).last_dc_val[0] = (src).last_dc_val[0], \ + (dest).last_dc_val[1] = (src).last_dc_val[1], \ + (dest).last_dc_val[2] = (src).last_dc_val[2], \ + (dest).last_dc_val[3] = (src).last_dc_val[3]) #endif #endif @@ -128,12 +127,12 @@ typedef struct { /* Forward declarations */ -METHODDEF(boolean) encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data); -METHODDEF(void) finish_pass_huff (j_compress_ptr cinfo); +METHODDEF(boolean) encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data); +METHODDEF(void) finish_pass_huff(j_compress_ptr cinfo); #ifdef ENTROPY_OPT_SUPPORTED -METHODDEF(boolean) encode_mcu_gather (j_compress_ptr cinfo, - JBLOCKROW *MCU_data); -METHODDEF(void) finish_pass_gather (j_compress_ptr cinfo); +METHODDEF(boolean) encode_mcu_gather(j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(void) finish_pass_gather(j_compress_ptr cinfo); #endif @@ -144,9 +143,9 @@ METHODDEF(void) finish_pass_gather (j_compress_ptr cinfo); */ METHODDEF(void) -start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics) +start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics) { - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy; int ci, dctbl, actbl; jpeg_component_info *compptr; @@ -180,12 +179,12 @@ start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics) /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ if (entropy->dc_count_ptrs[dctbl] == NULL) entropy->dc_count_ptrs[dctbl] = (long *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, 257 * sizeof(long)); MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * sizeof(long)); if (entropy->ac_count_ptrs[actbl] == NULL) entropy->ac_count_ptrs[actbl] = (long *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, 257 * sizeof(long)); MEMZERO(entropy->ac_count_ptrs[actbl], 257 * sizeof(long)); #endif @@ -193,9 +192,9 @@ start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics) /* Compute derived values for Huffman tables */ /* We may do this more than once for a table, but it's not expensive */ jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl, - & entropy->dc_derived_tbls[dctbl]); + &entropy->dc_derived_tbls[dctbl]); jpeg_make_c_derived_tbl(cinfo, FALSE, actbl, - & entropy->ac_derived_tbls[actbl]); + &entropy->ac_derived_tbls[actbl]); } /* Initialize DC predictions to 0 */ entropy->saved.last_dc_val[ci] = 0; @@ -219,8 +218,8 @@ start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics) */ GLOBAL(void) -jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno, - c_derived_tbl **pdtbl) +jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, int tblno, + c_derived_tbl **pdtbl) { JHUFF_TBL *htbl; c_derived_tbl *dtbl; @@ -244,7 +243,7 @@ jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno, /* Allocate a workspace if we haven't already done so. */ if (*pdtbl == NULL) *pdtbl = (c_derived_tbl *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(c_derived_tbl)); dtbl = *pdtbl; @@ -252,11 +251,11 @@ jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno, p = 0; for (l = 1; l <= 16; l++) { - i = (int) htbl->bits[l]; + i = (int)htbl->bits[l]; if (i < 0 || p + i > 256) /* protect against table overrun */ ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); while (i--) - huffsize[p++] = (char) l; + huffsize[p++] = (char)l; } huffsize[p] = 0; lastp = p; @@ -268,14 +267,14 @@ jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno, si = huffsize[0]; p = 0; while (huffsize[p]) { - while (((int) huffsize[p]) == si) { + while (((int)huffsize[p]) == si) { huffcode[p++] = code; code++; } /* code is now 1 more than the last code used for codelength si; but * it must still fit in si bits, since no code is allowed to be all ones. */ - if (((JLONG) code) >= (((JLONG) 1) << si)) + if (((JLONG)code) >= (((JLONG)1) << si)) ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); code <<= 1; si++; @@ -310,20 +309,21 @@ jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno, /* Outputting bytes to the file */ /* Emit a byte, taking 'action' if must suspend. */ -#define emit_byte(state,val,action) \ - { *(state)->next_output_byte++ = (JOCTET) (val); \ - if (--(state)->free_in_buffer == 0) \ - if (! dump_buffer(state)) \ - { action; } } +#define emit_byte(state, val, action) { \ + *(state)->next_output_byte++ = (JOCTET)(val); \ + if (--(state)->free_in_buffer == 0) \ + if (!dump_buffer(state)) \ + { action; } \ +} LOCAL(boolean) -dump_buffer (working_state *state) +dump_buffer(working_state *state) /* Empty the output buffer; return TRUE if successful, FALSE if must suspend */ { struct jpeg_destination_mgr *dest = state->cinfo->dest; - if (! (*dest->empty_output_buffer) (state->cinfo)) + if (!(*dest->empty_output_buffer) (state->cinfo)) return FALSE; /* After a successful buffer dump, must reset buffer pointers */ state->next_output_byte = dest->next_output_byte; @@ -349,7 +349,7 @@ dump_buffer (working_state *state) *buffer++ = c; \ if (c == 0xFF) /* need to stuff a zero byte? */ \ *buffer++ = 0; \ - } +} #define PUT_BITS(code, size) { \ put_bits += size; \ @@ -387,7 +387,7 @@ dump_buffer (working_state *state) #error Cannot determine word size #endif -#if SIZEOF_SIZE_T==8 || defined(_WIN64) +#if SIZEOF_SIZE_T == 8 || defined(_WIN64) #define EMIT_BITS(code, size) { \ CHECKBUF47() \ @@ -395,11 +395,11 @@ dump_buffer (working_state *state) } #define EMIT_CODE(code, size) { \ - temp2 &= (((JLONG) 1)<free_in_buffer < BUFSIZE) { \ localbuf = 1; \ buffer = _buffer; \ - } \ - else buffer = state->next_output_byte; \ - } + } else \ + buffer = state->next_output_byte; \ +} #define STORE_BUFFER() { \ if (localbuf) { \ bytes = buffer - _buffer; \ buffer = _buffer; \ while (bytes > 0) { \ - bytestocopy = min(bytes, state->free_in_buffer); \ + bytestocopy = MIN(bytes, state->free_in_buffer); \ MEMCOPY(state->next_output_byte, buffer, bytestocopy); \ state->next_output_byte += bytestocopy; \ buffer += bytestocopy; \ state->free_in_buffer -= bytestocopy; \ if (state->free_in_buffer == 0) \ - if (! dump_buffer(state)) return FALSE; \ + if (!dump_buffer(state)) return FALSE; \ bytes -= bytestocopy; \ } \ - } \ - else { \ + } else { \ state->free_in_buffer -= (buffer - state->next_output_byte); \ state->next_output_byte = buffer; \ } \ - } +} LOCAL(boolean) -flush_bits (working_state *state) +flush_bits(working_state *state) { JOCTET _buffer[BUFSIZE], *buffer; size_t put_buffer; int put_bits; @@ -486,8 +485,8 @@ flush_bits (working_state *state) /* Encode a single block's worth of coefficients */ LOCAL(boolean) -encode_one_block_simd (working_state *state, JCOEFPTR block, int last_dc_val, - c_derived_tbl *dctbl, c_derived_tbl *actbl) +encode_one_block_simd(working_state *state, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl) { JOCTET _buffer[BUFSIZE], *buffer; size_t bytes, bytestocopy; int localbuf = 0; @@ -503,8 +502,8 @@ encode_one_block_simd (working_state *state, JCOEFPTR block, int last_dc_val, } LOCAL(boolean) -encode_one_block (working_state *state, JCOEFPTR block, int last_dc_val, - c_derived_tbl *dctbl, c_derived_tbl *actbl) +encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl) { int temp, temp2, temp3; int nbits; @@ -522,11 +521,11 @@ encode_one_block (working_state *state, JCOEFPTR block, int last_dc_val, temp = temp2 = block[0] - last_dc_val; - /* This is a well-known technique for obtaining the absolute value without a - * branch. It is derived from an assembly language technique presented in - * "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by - * Agner Fog. - */ + /* This is a well-known technique for obtaining the absolute value without a + * branch. It is derived from an assembly language technique presented in + * "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by + * Agner Fog. + */ temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); temp ^= temp3; temp -= temp3; @@ -544,7 +543,7 @@ encode_one_block (working_state *state, JCOEFPTR block, int last_dc_val, EMIT_BITS(code, size) /* Mask off any extra bits in code */ - temp2 &= (((JLONG) 1)<ehufco[temp3]; \ size = actbl->ehufsi[temp3]; \ EMIT_CODE(code, size) \ - r = 0; \ + r = 0; \ } \ } @@ -616,11 +615,11 @@ encode_one_block (working_state *state, JCOEFPTR block, int last_dc_val, */ LOCAL(boolean) -emit_restart (working_state *state, int restart_num) +emit_restart(working_state *state, int restart_num) { int ci; - if (! flush_bits(state)) + if (!flush_bits(state)) return FALSE; emit_byte(state, 0xFF, return FALSE); @@ -641,9 +640,9 @@ emit_restart (working_state *state, int restart_num) */ METHODDEF(boolean) -encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data) { - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy; working_state state; int blkn, ci; jpeg_component_info *compptr; @@ -657,7 +656,7 @@ encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data) /* Emit restart marker if needed */ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) - if (! emit_restart(&state, entropy->next_restart_num)) + if (!emit_restart(&state, entropy->next_restart_num)) return FALSE; } @@ -666,10 +665,10 @@ encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data) for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { ci = cinfo->MCU_membership[blkn]; compptr = cinfo->cur_comp_info[ci]; - if (! encode_one_block_simd(&state, - MCU_data[blkn][0], state.cur.last_dc_val[ci], - entropy->dc_derived_tbls[compptr->dc_tbl_no], - entropy->ac_derived_tbls[compptr->ac_tbl_no])) + if (!encode_one_block_simd(&state, + MCU_data[blkn][0], state.cur.last_dc_val[ci], + entropy->dc_derived_tbls[compptr->dc_tbl_no], + entropy->ac_derived_tbls[compptr->ac_tbl_no])) return FALSE; /* Update last_dc_val */ state.cur.last_dc_val[ci] = MCU_data[blkn][0][0]; @@ -678,10 +677,10 @@ encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data) for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { ci = cinfo->MCU_membership[blkn]; compptr = cinfo->cur_comp_info[ci]; - if (! encode_one_block(&state, - MCU_data[blkn][0], state.cur.last_dc_val[ci], - entropy->dc_derived_tbls[compptr->dc_tbl_no], - entropy->ac_derived_tbls[compptr->ac_tbl_no])) + if (!encode_one_block(&state, + MCU_data[blkn][0], state.cur.last_dc_val[ci], + entropy->dc_derived_tbls[compptr->dc_tbl_no], + entropy->ac_derived_tbls[compptr->ac_tbl_no])) return FALSE; /* Update last_dc_val */ state.cur.last_dc_val[ci] = MCU_data[blkn][0][0]; @@ -712,9 +711,9 @@ encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data) */ METHODDEF(void) -finish_pass_huff (j_compress_ptr cinfo) +finish_pass_huff(j_compress_ptr cinfo) { - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy; working_state state; /* Load up working state ... flush_bits needs it */ @@ -724,7 +723,7 @@ finish_pass_huff (j_compress_ptr cinfo) state.cinfo = cinfo; /* Flush out the last data */ - if (! flush_bits(&state)) + if (!flush_bits(&state)) ERREXIT(cinfo, JERR_CANT_SUSPEND); /* Update state */ @@ -751,8 +750,8 @@ finish_pass_huff (j_compress_ptr cinfo) /* Process a single block's worth of coefficients */ LOCAL(void) -htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val, - long dc_counts[], long ac_counts[]) +htest_one_block(j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val, + long dc_counts[], long ac_counts[]) { register int temp; register int nbits; @@ -773,7 +772,7 @@ htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val, /* Check for out-of-range coefficient values. * Since we're encoding a difference, the range limit is twice as much. */ - if (nbits > MAX_COEF_BITS+1) + if (nbits > MAX_COEF_BITS + 1) ERREXIT(cinfo, JERR_BAD_DCT_COEF); /* Count the Huffman symbol for the number of bits */ @@ -824,9 +823,9 @@ htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val, */ METHODDEF(boolean) -encode_mcu_gather (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +encode_mcu_gather(j_compress_ptr cinfo, JBLOCKROW *MCU_data) { - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy; int blkn, ci; jpeg_component_info *compptr; @@ -863,13 +862,14 @@ encode_mcu_gather (j_compress_ptr cinfo, JBLOCKROW *MCU_data) * one bits (so that padding bits added at the end of a compressed segment * can't look like a valid code). Because of the canonical ordering of * codewords, this just means that there must be an unused slot in the - * longest codeword length category. Section K.2 of the JPEG spec suggests - * reserving such a slot by pretending that symbol 256 is a valid symbol - * with count 1. In theory that's not optimal; giving it count zero but - * including it in the symbol set anyway should give a better Huffman code. - * But the theoretically better code actually seems to come out worse in - * practice, because it produces more all-ones bytes (which incur stuffed - * zero bytes in the final file). In any case the difference is tiny. + * longest codeword length category. Annex K (Clause K.2) of + * Rec. ITU-T T.81 (1992) | ISO/IEC 10918-1:1994 suggests reserving such a slot + * by pretending that symbol 256 is a valid symbol with count 1. In theory + * that's not optimal; giving it count zero but including it in the symbol set + * anyway should give a better Huffman code. But the theoretically better code + * actually seems to come out worse in practice, because it produces more + * all-ones bytes (which incur stuffed zero bytes in the final file). In any + * case the difference is tiny. * * The JPEG standard requires Huffman codes to be no more than 16 bits long. * If some symbols have a very small but nonzero probability, the Huffman tree @@ -884,10 +884,10 @@ encode_mcu_gather (j_compress_ptr cinfo, JBLOCKROW *MCU_data) */ GLOBAL(void) -jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[]) +jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[]) { -#define MAX_CLEN 32 /* assumed maximum initial code length */ - UINT8 bits[MAX_CLEN+1]; /* bits[k] = # of symbols with code length k */ +#define MAX_CLEN 32 /* assumed maximum initial code length */ + UINT8 bits[MAX_CLEN + 1]; /* bits[k] = # of symbols with code length k */ int codesize[257]; /* codesize[k] = code length of symbol k */ int others[257]; /* next symbol in current branch of tree */ int c1, c2; @@ -971,13 +971,13 @@ jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[]) /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure * Huffman procedure assigned any such lengths, we must adjust the coding. - * Here is what the JPEG spec says about how this next bit works: - * Since symbols are paired for the longest Huffman code, the symbols are - * removed from this length category two at a time. The prefix for the pair - * (which is one bit shorter) is allocated to one of the pair; then, - * skipping the BITS entry for that prefix length, a code word from the next - * shortest nonzero BITS entry is converted into a prefix for two code words - * one bit longer. + * Here is what Rec. ITU-T T.81 | ISO/IEC 10918-1 says about how this next + * bit works: Since symbols are paired for the longest Huffman code, the + * symbols are removed from this length category two at a time. The prefix + * for the pair (which is one bit shorter) is allocated to one of the pair; + * then, skipping the BITS entry for that prefix length, a code word from the + * next shortest nonzero BITS entry is converted into a prefix for two code + * words one bit longer. */ for (i = MAX_CLEN; i > 16; i--) { @@ -987,8 +987,8 @@ jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[]) j--; bits[i] -= 2; /* remove two symbols */ - bits[i-1]++; /* one goes in this length */ - bits[j+1] += 2; /* two new symbols in this length */ + bits[i - 1]++; /* one goes in this length */ + bits[j + 1] += 2; /* two new symbols in this length */ bits[j]--; /* symbol of this length is now a prefix */ } } @@ -1003,13 +1003,14 @@ jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[]) /* Return a list of the symbols sorted by code length */ /* It's not real clear to me why we don't need to consider the codelength - * changes made above, but the JPEG spec seems to think this works. + * changes made above, but Rec. ITU-T T.81 | ISO/IEC 10918-1 seems to think + * this works. */ p = 0; for (i = 1; i <= MAX_CLEN; i++) { for (j = 0; j <= 255; j++) { if (codesize[j] == i) { - htbl->huffval[p] = (UINT8) j; + htbl->huffval[p] = (UINT8)j; p++; } } @@ -1025,9 +1026,9 @@ jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[]) */ METHODDEF(void) -finish_pass_gather (j_compress_ptr cinfo) +finish_pass_gather(j_compress_ptr cinfo) { - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy; int ci, dctbl, actbl; jpeg_component_info *compptr; JHUFF_TBL **htblptr; @@ -1044,17 +1045,17 @@ finish_pass_gather (j_compress_ptr cinfo) compptr = cinfo->cur_comp_info[ci]; dctbl = compptr->dc_tbl_no; actbl = compptr->ac_tbl_no; - if (! did_dc[dctbl]) { - htblptr = & cinfo->dc_huff_tbl_ptrs[dctbl]; + if (!did_dc[dctbl]) { + htblptr = &cinfo->dc_huff_tbl_ptrs[dctbl]; if (*htblptr == NULL) - *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); + *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo); jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[dctbl]); did_dc[dctbl] = TRUE; } - if (! did_ac[actbl]) { - htblptr = & cinfo->ac_huff_tbl_ptrs[actbl]; + if (!did_ac[actbl]) { + htblptr = &cinfo->ac_huff_tbl_ptrs[actbl]; if (*htblptr == NULL) - *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); + *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo); jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[actbl]); did_ac[actbl] = TRUE; } @@ -1070,15 +1071,15 @@ finish_pass_gather (j_compress_ptr cinfo) */ GLOBAL(void) -jinit_huff_encoder (j_compress_ptr cinfo) +jinit_huff_encoder(j_compress_ptr cinfo) { huff_entropy_ptr entropy; int i; entropy = (huff_entropy_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(huff_entropy_encoder)); - cinfo->entropy = (struct jpeg_entropy_encoder *) entropy; + cinfo->entropy = (struct jpeg_entropy_encoder *)entropy; entropy->pub.start_pass = start_pass_huff; /* Mark tables unallocated */ diff --git a/jchuff.h b/jchuff.h index 4236089..314a232 100644 --- a/jchuff.h +++ b/jchuff.h @@ -20,9 +20,9 @@ */ #if BITS_IN_JSAMPLE == 8 -#define MAX_COEF_BITS 10 +#define MAX_COEF_BITS 10 #else -#define MAX_COEF_BITS 14 +#define MAX_COEF_BITS 14 #endif /* Derived data constructed for each Huffman table */ @@ -34,10 +34,9 @@ typedef struct { } c_derived_tbl; /* Expand a Huffman table definition into the derived format */ -EXTERN(void) jpeg_make_c_derived_tbl - (j_compress_ptr cinfo, boolean isDC, int tblno, - c_derived_tbl ** pdtbl); +EXTERN(void) jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, + int tblno, c_derived_tbl **pdtbl); /* Generate an optimal table definition given the specified counts */ -EXTERN(void) jpeg_gen_optimal_table - (j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[]); +EXTERN(void) jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl, + long freq[]); diff --git a/jcicc.c b/jcicc.c new file mode 100644 index 0000000..11037ff --- /dev/null +++ b/jcicc.c @@ -0,0 +1,105 @@ +/* + * jcicc.c + * + * Copyright (C) 1997-1998, Thomas G. Lane, Todd Newman. + * Copyright (C) 2017, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file provides code to write International Color Consortium (ICC) device + * profiles embedded in JFIF JPEG image files. The ICC has defined a standard + * for including such data in JPEG "APP2" markers. The code given here does + * not know anything about the internal structure of the ICC profile data; it + * just knows how to embed the profile data in a JPEG file while writing it. + */ + +#define JPEG_INTERNALS +#include "jinclude.h" +#include "jpeglib.h" +#include "jerror.h" + + +/* + * Since an ICC profile can be larger than the maximum size of a JPEG marker + * (64K), we need provisions to split it into multiple markers. The format + * defined by the ICC specifies one or more APP2 markers containing the + * following data: + * Identifying string ASCII "ICC_PROFILE\0" (12 bytes) + * Marker sequence number 1 for first APP2, 2 for next, etc (1 byte) + * Number of markers Total number of APP2's used (1 byte) + * Profile data (remainder of APP2 data) + * Decoders should use the marker sequence numbers to reassemble the profile, + * rather than assuming that the APP2 markers appear in the correct sequence. + */ + +#define ICC_MARKER (JPEG_APP0 + 2) /* JPEG marker code for ICC */ +#define ICC_OVERHEAD_LEN 14 /* size of non-profile data in APP2 */ +#define MAX_BYTES_IN_MARKER 65533 /* maximum data len of a JPEG marker */ +#define MAX_DATA_BYTES_IN_MARKER (MAX_BYTES_IN_MARKER - ICC_OVERHEAD_LEN) + + +/* + * This routine writes the given ICC profile data into a JPEG file. It *must* + * be called AFTER calling jpeg_start_compress() and BEFORE the first call to + * jpeg_write_scanlines(). (This ordering ensures that the APP2 marker(s) will + * appear after the SOI and JFIF or Adobe markers, but before all else.) + */ + +GLOBAL(void) +jpeg_write_icc_profile(j_compress_ptr cinfo, const JOCTET *icc_data_ptr, + unsigned int icc_data_len) +{ + unsigned int num_markers; /* total number of markers we'll write */ + int cur_marker = 1; /* per spec, counting starts at 1 */ + unsigned int length; /* number of bytes to write in this marker */ + + if (icc_data_ptr == NULL || icc_data_len == 0) + ERREXIT(cinfo, JERR_BUFFER_SIZE); + if (cinfo->global_state < CSTATE_SCANNING) + ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); + + /* Calculate the number of markers we'll need, rounding up of course */ + num_markers = icc_data_len / MAX_DATA_BYTES_IN_MARKER; + if (num_markers * MAX_DATA_BYTES_IN_MARKER != icc_data_len) + num_markers++; + + while (icc_data_len > 0) { + /* length of profile to put in this marker */ + length = icc_data_len; + if (length > MAX_DATA_BYTES_IN_MARKER) + length = MAX_DATA_BYTES_IN_MARKER; + icc_data_len -= length; + + /* Write the JPEG marker header (APP2 code and marker length) */ + jpeg_write_m_header(cinfo, ICC_MARKER, + (unsigned int)(length + ICC_OVERHEAD_LEN)); + + /* Write the marker identifying string "ICC_PROFILE" (null-terminated). We + * code it in this less-than-transparent way so that the code works even if + * the local character set is not ASCII. + */ + jpeg_write_m_byte(cinfo, 0x49); + jpeg_write_m_byte(cinfo, 0x43); + jpeg_write_m_byte(cinfo, 0x43); + jpeg_write_m_byte(cinfo, 0x5F); + jpeg_write_m_byte(cinfo, 0x50); + jpeg_write_m_byte(cinfo, 0x52); + jpeg_write_m_byte(cinfo, 0x4F); + jpeg_write_m_byte(cinfo, 0x46); + jpeg_write_m_byte(cinfo, 0x49); + jpeg_write_m_byte(cinfo, 0x4C); + jpeg_write_m_byte(cinfo, 0x45); + jpeg_write_m_byte(cinfo, 0x0); + + /* Add the sequencing info */ + jpeg_write_m_byte(cinfo, cur_marker); + jpeg_write_m_byte(cinfo, (int)num_markers); + + /* Add the profile data */ + while (length--) { + jpeg_write_m_byte(cinfo, *icc_data_ptr); + icc_data_ptr++; + } + cur_marker++; + } +} diff --git a/jcinit.c b/jcinit.c index 463bd8c..78aa465 100644 --- a/jcinit.c +++ b/jcinit.c @@ -28,13 +28,13 @@ */ GLOBAL(void) -jinit_compress_master (j_compress_ptr cinfo) +jinit_compress_master(j_compress_ptr cinfo) { /* Initialize master control (includes parameter checking/processing) */ jinit_c_master_control(cinfo, FALSE /* full compression */); /* Preprocessing */ - if (! cinfo->raw_data_in) { + if (!cinfo->raw_data_in) { jinit_color_converter(cinfo); jinit_downsampler(cinfo); jinit_c_prep_controller(cinfo, FALSE /* never need full buffer here */); @@ -60,14 +60,14 @@ jinit_compress_master (j_compress_ptr cinfo) } /* Need a full-image coefficient buffer in any multi-pass mode. */ - jinit_c_coef_controller(cinfo, - (boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding)); + jinit_c_coef_controller(cinfo, (boolean)(cinfo->num_scans > 1 || + cinfo->optimize_coding)); jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */); jinit_marker_writer(cinfo); /* We can now tell the memory manager to allocate virtual arrays. */ - (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo); + (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo); /* Write the datastream header (SOI) immediately. * Frame and scan headers are postponed till later. diff --git a/jcmainct.c b/jcmainct.c index d01f463..3f23028 100644 --- a/jcmainct.c +++ b/jcmainct.c @@ -39,9 +39,10 @@ typedef my_main_controller *my_main_ptr; /* Forward declarations */ -METHODDEF(void) process_data_simple_main - (j_compress_ptr cinfo, JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail); +METHODDEF(void) process_data_simple_main(j_compress_ptr cinfo, + JSAMPARRAY input_buf, + JDIMENSION *in_row_ctr, + JDIMENSION in_rows_avail); /* @@ -49,9 +50,9 @@ METHODDEF(void) process_data_simple_main */ METHODDEF(void) -start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode) +start_pass_main(j_compress_ptr cinfo, J_BUF_MODE pass_mode) { - my_main_ptr main_ptr = (my_main_ptr) cinfo->main; + my_main_ptr main_ptr = (my_main_ptr)cinfo->main; /* Do nothing in raw-data mode. */ if (cinfo->raw_data_in) @@ -75,19 +76,18 @@ start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode) */ METHODDEF(void) -process_data_simple_main (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail) +process_data_simple_main(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail) { - my_main_ptr main_ptr = (my_main_ptr) cinfo->main; + my_main_ptr main_ptr = (my_main_ptr)cinfo->main; while (main_ptr->cur_iMCU_row < cinfo->total_iMCU_rows) { /* Read input data if we haven't filled the main buffer yet */ if (main_ptr->rowgroup_ctr < DCTSIZE) - (*cinfo->prep->pre_process_data) (cinfo, - input_buf, in_row_ctr, in_rows_avail, - main_ptr->buffer, &main_ptr->rowgroup_ctr, - (JDIMENSION) DCTSIZE); + (*cinfo->prep->pre_process_data) (cinfo, input_buf, in_row_ctr, + in_rows_avail, main_ptr->buffer, + &main_ptr->rowgroup_ctr, + (JDIMENSION)DCTSIZE); /* If we don't have a full iMCU row buffered, return to application for * more data. Note that preprocessor will always pad to fill the iMCU row @@ -97,14 +97,14 @@ process_data_simple_main (j_compress_ptr cinfo, return; /* Send the completed row to the compressor */ - if (! (*cinfo->coef->compress_data) (cinfo, main_ptr->buffer)) { + if (!(*cinfo->coef->compress_data) (cinfo, main_ptr->buffer)) { /* If compressor did not consume the whole row, then we must need to * suspend processing and return to the application. In this situation * we pretend we didn't yet consume the last input row; otherwise, if * it happened to be the last row of the image, the application would * think we were done. */ - if (! main_ptr->suspended) { + if (!main_ptr->suspended) { (*in_row_ctr)--; main_ptr->suspended = TRUE; } @@ -128,16 +128,16 @@ process_data_simple_main (j_compress_ptr cinfo, */ GLOBAL(void) -jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer) +jinit_c_main_controller(j_compress_ptr cinfo, boolean need_full_buffer) { my_main_ptr main_ptr; int ci; jpeg_component_info *compptr; main_ptr = (my_main_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_main_controller)); - cinfo->main = (struct jpeg_c_main_controller *) main_ptr; + cinfo->main = (struct jpeg_c_main_controller *)main_ptr; main_ptr->pub.start_pass = start_pass_main; /* We don't need to create a buffer in raw-data mode. */ @@ -154,9 +154,9 @@ jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer) for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, + ((j_common_ptr)cinfo, JPOOL_IMAGE, compptr->width_in_blocks * DCTSIZE, - (JDIMENSION) (compptr->v_samp_factor * DCTSIZE)); + (JDIMENSION)(compptr->v_samp_factor * DCTSIZE)); } } } diff --git a/jcmarker.c b/jcmarker.c index 463f665..801fbab 100644 --- a/jcmarker.c +++ b/jcmarker.c @@ -110,30 +110,30 @@ typedef my_marker_writer *my_marker_ptr; */ LOCAL(void) -emit_byte (j_compress_ptr cinfo, int val) +emit_byte(j_compress_ptr cinfo, int val) /* Emit a byte */ { struct jpeg_destination_mgr *dest = cinfo->dest; - *(dest->next_output_byte)++ = (JOCTET) val; + *(dest->next_output_byte)++ = (JOCTET)val; if (--dest->free_in_buffer == 0) { - if (! (*dest->empty_output_buffer) (cinfo)) + if (!(*dest->empty_output_buffer) (cinfo)) ERREXIT(cinfo, JERR_CANT_SUSPEND); } } LOCAL(void) -emit_marker (j_compress_ptr cinfo, JPEG_MARKER mark) +emit_marker(j_compress_ptr cinfo, JPEG_MARKER mark) /* Emit a marker code */ { emit_byte(cinfo, 0xFF); - emit_byte(cinfo, (int) mark); + emit_byte(cinfo, (int)mark); } LOCAL(void) -emit_2bytes (j_compress_ptr cinfo, int value) +emit_2bytes(j_compress_ptr cinfo, int value) /* Emit a 2-byte integer; these are always MSB first in JPEG files */ { emit_byte(cinfo, (value >> 8) & 0xFF); @@ -146,7 +146,7 @@ emit_2bytes (j_compress_ptr cinfo, int value) */ LOCAL(int) -emit_dqt (j_compress_ptr cinfo, int index) +emit_dqt(j_compress_ptr cinfo, int index) /* Emit a DQT marker */ /* Returns the precision used (0 = 8bits, 1 = 16bits) for baseline checking */ { @@ -163,19 +163,19 @@ emit_dqt (j_compress_ptr cinfo, int index) prec = 1; } - if (! qtbl->sent_table) { + if (!qtbl->sent_table) { emit_marker(cinfo, M_DQT); - emit_2bytes(cinfo, prec ? DCTSIZE2*2 + 1 + 2 : DCTSIZE2 + 1 + 2); + emit_2bytes(cinfo, prec ? DCTSIZE2 * 2 + 1 + 2 : DCTSIZE2 + 1 + 2); - emit_byte(cinfo, index + (prec<<4)); + emit_byte(cinfo, index + (prec << 4)); for (i = 0; i < DCTSIZE2; i++) { /* The table entries must be emitted in zigzag order. */ unsigned int qval = qtbl->quantval[jpeg_natural_order[i]]; if (prec) - emit_byte(cinfo, (int) (qval >> 8)); - emit_byte(cinfo, (int) (qval & 0xFF)); + emit_byte(cinfo, (int)(qval >> 8)); + emit_byte(cinfo, (int)(qval & 0xFF)); } qtbl->sent_table = TRUE; @@ -186,7 +186,7 @@ emit_dqt (j_compress_ptr cinfo, int index) LOCAL(void) -emit_dht (j_compress_ptr cinfo, int index, boolean is_ac) +emit_dht(j_compress_ptr cinfo, int index, boolean is_ac) /* Emit a DHT marker */ { JHUFF_TBL *htbl; @@ -202,7 +202,7 @@ emit_dht (j_compress_ptr cinfo, int index, boolean is_ac) if (htbl == NULL) ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, index); - if (! htbl->sent_table) { + if (!htbl->sent_table) { emit_marker(cinfo, M_DHT); length = 0; @@ -224,7 +224,7 @@ emit_dht (j_compress_ptr cinfo, int index, boolean is_ac) LOCAL(void) -emit_dac (j_compress_ptr cinfo) +emit_dac(j_compress_ptr cinfo) /* Emit a DAC marker */ /* Since the useful info is so small, we want to emit all the tables in */ /* one DAC marker. Therefore this routine does its own scan of the table. */ @@ -255,12 +255,12 @@ emit_dac (j_compress_ptr cinfo) if (length) { emit_marker(cinfo, M_DAC); - emit_2bytes(cinfo, length*2 + 2); + emit_2bytes(cinfo, length * 2 + 2); for (i = 0; i < NUM_ARITH_TBLS; i++) { if (dc_in_use[i]) { emit_byte(cinfo, i); - emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4)); + emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i] << 4)); } if (ac_in_use[i]) { emit_byte(cinfo, i + 0x10); @@ -273,19 +273,19 @@ emit_dac (j_compress_ptr cinfo) LOCAL(void) -emit_dri (j_compress_ptr cinfo) +emit_dri(j_compress_ptr cinfo) /* Emit a DRI marker */ { emit_marker(cinfo, M_DRI); emit_2bytes(cinfo, 4); /* fixed length */ - emit_2bytes(cinfo, (int) cinfo->restart_interval); + emit_2bytes(cinfo, (int)cinfo->restart_interval); } LOCAL(void) -emit_sof (j_compress_ptr cinfo, JPEG_MARKER code) +emit_sof(j_compress_ptr cinfo, JPEG_MARKER code) /* Emit a SOF marker */ { int ci; @@ -296,13 +296,12 @@ emit_sof (j_compress_ptr cinfo, JPEG_MARKER code) emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */ /* Make sure image isn't bigger than SOF field can handle */ - if ((long) cinfo->_jpeg_height > 65535L || - (long) cinfo->_jpeg_width > 65535L) - ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) 65535); + if ((long)cinfo->_jpeg_height > 65535L || (long)cinfo->_jpeg_width > 65535L) + ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int)65535); emit_byte(cinfo, cinfo->data_precision); - emit_2bytes(cinfo, (int) cinfo->_jpeg_height); - emit_2bytes(cinfo, (int) cinfo->_jpeg_width); + emit_2bytes(cinfo, (int)cinfo->_jpeg_height); + emit_2bytes(cinfo, (int)cinfo->_jpeg_width); emit_byte(cinfo, cinfo->num_components); @@ -316,7 +315,7 @@ emit_sof (j_compress_ptr cinfo, JPEG_MARKER code) LOCAL(void) -emit_sos (j_compress_ptr cinfo) +emit_sos(j_compress_ptr cinfo) /* Emit a SOS marker */ { int i, td, ta; @@ -351,7 +350,7 @@ emit_sos (j_compress_ptr cinfo) LOCAL(void) -emit_jfif_app0 (j_compress_ptr cinfo) +emit_jfif_app0(j_compress_ptr cinfo) /* Emit a JFIF-compliant APP0 marker */ { /* @@ -378,15 +377,15 @@ emit_jfif_app0 (j_compress_ptr cinfo) emit_byte(cinfo, cinfo->JFIF_major_version); /* Version fields */ emit_byte(cinfo, cinfo->JFIF_minor_version); emit_byte(cinfo, cinfo->density_unit); /* Pixel size information */ - emit_2bytes(cinfo, (int) cinfo->X_density); - emit_2bytes(cinfo, (int) cinfo->Y_density); + emit_2bytes(cinfo, (int)cinfo->X_density); + emit_2bytes(cinfo, (int)cinfo->Y_density); emit_byte(cinfo, 0); /* No thumbnail image */ emit_byte(cinfo, 0); } LOCAL(void) -emit_adobe_app14 (j_compress_ptr cinfo) +emit_adobe_app14(j_compress_ptr cinfo) /* Emit an Adobe APP14 marker */ { /* @@ -440,19 +439,19 @@ emit_adobe_app14 (j_compress_ptr cinfo) */ METHODDEF(void) -write_marker_header (j_compress_ptr cinfo, int marker, unsigned int datalen) +write_marker_header(j_compress_ptr cinfo, int marker, unsigned int datalen) /* Emit an arbitrary marker header */ { - if (datalen > (unsigned int) 65533) /* safety check */ + if (datalen > (unsigned int)65533) /* safety check */ ERREXIT(cinfo, JERR_BAD_LENGTH); - emit_marker(cinfo, (JPEG_MARKER) marker); + emit_marker(cinfo, (JPEG_MARKER)marker); - emit_2bytes(cinfo, (int) (datalen + 2)); /* total length */ + emit_2bytes(cinfo, (int)(datalen + 2)); /* total length */ } METHODDEF(void) -write_marker_byte (j_compress_ptr cinfo, int val) +write_marker_byte(j_compress_ptr cinfo, int val) /* Emit one byte of marker parameters following write_marker_header */ { emit_byte(cinfo, val); @@ -471,9 +470,9 @@ write_marker_byte (j_compress_ptr cinfo, int val) */ METHODDEF(void) -write_file_header (j_compress_ptr cinfo) +write_file_header(j_compress_ptr cinfo) { - my_marker_ptr marker = (my_marker_ptr) cinfo->marker; + my_marker_ptr marker = (my_marker_ptr)cinfo->marker; emit_marker(cinfo, M_SOI); /* first the SOI */ @@ -496,7 +495,7 @@ write_file_header (j_compress_ptr cinfo) */ METHODDEF(void) -write_frame_header (j_compress_ptr cinfo) +write_frame_header(j_compress_ptr cinfo) { int ci, prec; boolean is_baseline; @@ -556,9 +555,9 @@ write_frame_header (j_compress_ptr cinfo) */ METHODDEF(void) -write_scan_header (j_compress_ptr cinfo) +write_scan_header(j_compress_ptr cinfo) { - my_marker_ptr marker = (my_marker_ptr) cinfo->marker; + my_marker_ptr marker = (my_marker_ptr)cinfo->marker; int i; jpeg_component_info *compptr; @@ -600,7 +599,7 @@ write_scan_header (j_compress_ptr cinfo) */ METHODDEF(void) -write_file_trailer (j_compress_ptr cinfo) +write_file_trailer(j_compress_ptr cinfo) { emit_marker(cinfo, M_EOI); } @@ -614,7 +613,7 @@ write_file_trailer (j_compress_ptr cinfo) */ METHODDEF(void) -write_tables_only (j_compress_ptr cinfo) +write_tables_only(j_compress_ptr cinfo) { int i; @@ -622,10 +621,10 @@ write_tables_only (j_compress_ptr cinfo) for (i = 0; i < NUM_QUANT_TBLS; i++) { if (cinfo->quant_tbl_ptrs[i] != NULL) - (void) emit_dqt(cinfo, i); + (void)emit_dqt(cinfo, i); } - if (! cinfo->arith_code) { + if (!cinfo->arith_code) { for (i = 0; i < NUM_HUFF_TBLS; i++) { if (cinfo->dc_huff_tbl_ptrs[i] != NULL) emit_dht(cinfo, i, FALSE); @@ -643,15 +642,15 @@ write_tables_only (j_compress_ptr cinfo) */ GLOBAL(void) -jinit_marker_writer (j_compress_ptr cinfo) +jinit_marker_writer(j_compress_ptr cinfo) { my_marker_ptr marker; /* Create the subobject */ marker = (my_marker_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_marker_writer)); - cinfo->marker = (struct jpeg_marker_writer *) marker; + cinfo->marker = (struct jpeg_marker_writer *)marker; /* Initialize method pointers */ marker->pub.write_file_header = write_file_header; marker->pub.write_frame_header = write_frame_header; diff --git a/jcmaster.c b/jcmaster.c index 03a8b40..93b3de6 100644 --- a/jcmaster.c +++ b/jcmaster.c @@ -5,7 +5,7 @@ * Copyright (C) 1991-1997, Thomas G. Lane. * Modified 2003-2010 by Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2010, 2016, D. R. Commander. + * Copyright (C) 2010, 2016, 2018, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -25,9 +25,9 @@ /* Private state */ typedef enum { - main_pass, /* input data, also do first output step */ - huff_opt_pass, /* Huffman code optimization pass */ - output_pass /* data output pass */ + main_pass, /* input data, also do first output step */ + huff_opt_pass, /* Huffman code optimization pass */ + output_pass /* data output pass */ } c_pass_type; typedef struct { @@ -66,7 +66,7 @@ typedef my_comp_master *my_master_ptr; */ GLOBAL(void) -jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo) +jpeg_calc_jpeg_dimensions(j_compress_ptr cinfo) /* Do computations that are needed before master selection phase */ { /* Hardwire it to "no scaling" */ @@ -79,7 +79,7 @@ jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo) LOCAL(void) -initial_setup (j_compress_ptr cinfo, boolean transcode_only) +initial_setup(j_compress_ptr cinfo, boolean transcode_only) /* Do computations that are needed before master selection phase */ { int ci; @@ -95,19 +95,19 @@ initial_setup (j_compress_ptr cinfo, boolean transcode_only) #endif /* Sanity check on image dimensions */ - if (cinfo->_jpeg_height <= 0 || cinfo->_jpeg_width <= 0 - || cinfo->num_components <= 0 || cinfo->input_components <= 0) + if (cinfo->_jpeg_height <= 0 || cinfo->_jpeg_width <= 0 || + cinfo->num_components <= 0 || cinfo->input_components <= 0) ERREXIT(cinfo, JERR_EMPTY_IMAGE); /* Make sure image isn't bigger than I can handle */ - if ((long) cinfo->_jpeg_height > (long) JPEG_MAX_DIMENSION || - (long) cinfo->_jpeg_width > (long) JPEG_MAX_DIMENSION) - ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION); + if ((long)cinfo->_jpeg_height > (long)JPEG_MAX_DIMENSION || + (long)cinfo->_jpeg_width > (long)JPEG_MAX_DIMENSION) + ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int)JPEG_MAX_DIMENSION); /* Width of an input scanline must be representable as JDIMENSION. */ - samplesperrow = (long) cinfo->image_width * (long) cinfo->input_components; - jd_samplesperrow = (JDIMENSION) samplesperrow; - if ((long) jd_samplesperrow != samplesperrow) + samplesperrow = (long)cinfo->image_width * (long)cinfo->input_components; + jd_samplesperrow = (JDIMENSION)samplesperrow; + if ((long)jd_samplesperrow != samplesperrow) ERREXIT(cinfo, JERR_WIDTH_OVERFLOW); /* For now, precision must match compiled-in value... */ @@ -124,8 +124,10 @@ initial_setup (j_compress_ptr cinfo, boolean transcode_only) cinfo->max_v_samp_factor = 1; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { - if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR || - compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR) + if (compptr->h_samp_factor <= 0 || + compptr->h_samp_factor > MAX_SAMP_FACTOR || + compptr->v_samp_factor <= 0 || + compptr->v_samp_factor > MAX_SAMP_FACTOR) ERREXIT(cinfo, JERR_BAD_SAMPLING); cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor, compptr->h_samp_factor); @@ -146,18 +148,18 @@ initial_setup (j_compress_ptr cinfo, boolean transcode_only) #endif /* Size in DCT blocks */ compptr->width_in_blocks = (JDIMENSION) - jdiv_round_up((long) cinfo->_jpeg_width * (long) compptr->h_samp_factor, - (long) (cinfo->max_h_samp_factor * DCTSIZE)); + jdiv_round_up((long)cinfo->_jpeg_width * (long)compptr->h_samp_factor, + (long)(cinfo->max_h_samp_factor * DCTSIZE)); compptr->height_in_blocks = (JDIMENSION) - jdiv_round_up((long) cinfo->_jpeg_height * (long) compptr->v_samp_factor, - (long) (cinfo->max_v_samp_factor * DCTSIZE)); + jdiv_round_up((long)cinfo->_jpeg_height * (long)compptr->v_samp_factor, + (long)(cinfo->max_v_samp_factor * DCTSIZE)); /* Size in samples */ compptr->downsampled_width = (JDIMENSION) - jdiv_round_up((long) cinfo->_jpeg_width * (long) compptr->h_samp_factor, - (long) cinfo->max_h_samp_factor); + jdiv_round_up((long)cinfo->_jpeg_width * (long)compptr->h_samp_factor, + (long)cinfo->max_h_samp_factor); compptr->downsampled_height = (JDIMENSION) - jdiv_round_up((long) cinfo->_jpeg_height * (long) compptr->v_samp_factor, - (long) cinfo->max_v_samp_factor); + jdiv_round_up((long)cinfo->_jpeg_height * (long)compptr->v_samp_factor, + (long)cinfo->max_v_samp_factor); /* Mark component needed (this flag isn't actually used for compression) */ compptr->component_needed = TRUE; } @@ -166,15 +168,15 @@ initial_setup (j_compress_ptr cinfo, boolean transcode_only) * main controller will call coefficient controller). */ cinfo->total_iMCU_rows = (JDIMENSION) - jdiv_round_up((long) cinfo->_jpeg_height, - (long) (cinfo->max_v_samp_factor*DCTSIZE)); + jdiv_round_up((long)cinfo->_jpeg_height, + (long)(cinfo->max_v_samp_factor * DCTSIZE)); } #ifdef C_MULTISCAN_FILES_SUPPORTED LOCAL(void) -validate_script (j_compress_ptr cinfo) +validate_script(j_compress_ptr cinfo) /* Verify that the scan script in cinfo->scan_info[] is valid; also * determine whether it uses progressive JPEG, and set cinfo->progressive_mode. */ @@ -196,10 +198,10 @@ validate_script (j_compress_ptr cinfo) * for progressive JPEG, no scan can have this. */ scanptr = cinfo->scan_info; - if (scanptr->Ss != 0 || scanptr->Se != DCTSIZE2-1) { + if (scanptr->Ss != 0 || scanptr->Se != DCTSIZE2 - 1) { #ifdef C_PROGRESSIVE_SUPPORTED cinfo->progressive_mode = TRUE; - last_bitpos_ptr = & last_bitpos[0][0]; + last_bitpos_ptr = &last_bitpos[0][0]; for (ci = 0; ci < cinfo->num_components; ci++) for (coefi = 0; coefi < DCTSIZE2; coefi++) *last_bitpos_ptr++ = -1; @@ -222,7 +224,7 @@ validate_script (j_compress_ptr cinfo) if (thisi < 0 || thisi >= cinfo->num_components) ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno); /* Components must appear in SOF order within each scan */ - if (ci > 0 && thisi <= scanptr->component_index[ci-1]) + if (ci > 0 && thisi <= scanptr->component_index[ci - 1]) ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno); } /* Validate progression parameters */ @@ -232,17 +234,17 @@ validate_script (j_compress_ptr cinfo) Al = scanptr->Al; if (cinfo->progressive_mode) { #ifdef C_PROGRESSIVE_SUPPORTED - /* The JPEG spec simply gives the ranges 0..13 for Ah and Al, but that - * seems wrong: the upper bound ought to depend on data precision. - * Perhaps they really meant 0..N+1 for N-bit precision. + /* Rec. ITU-T T.81 | ISO/IEC 10918-1 simply gives the ranges 0..13 for Ah + * and Al, but that seems wrong: the upper bound ought to depend on data + * precision. Perhaps they really meant 0..N+1 for N-bit precision. * Here we allow 0..10 for 8-bit data; Al larger than 10 results in * out-of-range reconstructed DC values during the first DC scan, * which might cause problems for some decoders. */ #if BITS_IN_JSAMPLE == 8 -#define MAX_AH_AL 10 +#define MAX_AH_AL 10 #else -#define MAX_AH_AL 13 +#define MAX_AH_AL 13 #endif if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 || Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL) @@ -255,7 +257,7 @@ validate_script (j_compress_ptr cinfo) ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); } for (ci = 0; ci < ncomps; ci++) { - last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0]; + last_bitpos_ptr = &last_bitpos[scanptr->component_index[ci]][0]; if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */ ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); for (coefi = Ss; coefi <= Se; coefi++) { @@ -265,7 +267,7 @@ validate_script (j_compress_ptr cinfo) ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); } else { /* not first scan */ - if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1) + if (Ah != last_bitpos_ptr[coefi] || Al != Ah - 1) ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); } last_bitpos_ptr[coefi] = Al; @@ -274,7 +276,7 @@ validate_script (j_compress_ptr cinfo) #endif } else { /* For sequential JPEG, all progression parameters must be these: */ - if (Ss != 0 || Se != DCTSIZE2-1 || Ah != 0 || Al != 0) + if (Ss != 0 || Se != DCTSIZE2 - 1 || Ah != 0 || Al != 0) ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno); /* Make sure components are not sent twice */ for (ci = 0; ci < ncomps; ci++) { @@ -301,7 +303,7 @@ validate_script (j_compress_ptr cinfo) #endif } else { for (ci = 0; ci < cinfo->num_components; ci++) { - if (! component_sent[ci]) + if (!component_sent[ci]) ERREXIT(cinfo, JERR_MISSING_DATA); } } @@ -311,7 +313,7 @@ validate_script (j_compress_ptr cinfo) LOCAL(void) -select_scan_parameters (j_compress_ptr cinfo) +select_scan_parameters(j_compress_ptr cinfo) /* Set up the scan parameters for the current scan */ { int ci; @@ -319,7 +321,7 @@ select_scan_parameters (j_compress_ptr cinfo) #ifdef C_MULTISCAN_FILES_SUPPORTED if (cinfo->scan_info != NULL) { /* Prepare for current scan --- the script is already validated */ - my_master_ptr master = (my_master_ptr) cinfo->master; + my_master_ptr master = (my_master_ptr)cinfo->master; const jpeg_scan_info *scanptr = cinfo->scan_info + master->scan_number; cinfo->comps_in_scan = scanptr->comps_in_scan; @@ -331,8 +333,7 @@ select_scan_parameters (j_compress_ptr cinfo) cinfo->Se = scanptr->Se; cinfo->Ah = scanptr->Ah; cinfo->Al = scanptr->Al; - } - else + } else #endif { /* Prepare for single sequential-JPEG scan containing all components */ @@ -344,7 +345,7 @@ select_scan_parameters (j_compress_ptr cinfo) cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci]; } cinfo->Ss = 0; - cinfo->Se = DCTSIZE2-1; + cinfo->Se = DCTSIZE2 - 1; cinfo->Ah = 0; cinfo->Al = 0; } @@ -352,7 +353,7 @@ select_scan_parameters (j_compress_ptr cinfo) LOCAL(void) -per_scan_setup (j_compress_ptr cinfo) +per_scan_setup(j_compress_ptr cinfo) /* Do computations that are needed before processing a JPEG scan */ /* cinfo->comps_in_scan and cinfo->cur_comp_info[] are already set */ { @@ -377,7 +378,7 @@ per_scan_setup (j_compress_ptr cinfo) /* For noninterleaved scans, it is convenient to define last_row_height * as the number of block rows present in the last iMCU row. */ - tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor); + tmp = (int)(compptr->height_in_blocks % compptr->v_samp_factor); if (tmp == 0) tmp = compptr->v_samp_factor; compptr->last_row_height = tmp; @@ -394,11 +395,11 @@ per_scan_setup (j_compress_ptr cinfo) /* Overall image size in MCUs */ cinfo->MCUs_per_row = (JDIMENSION) - jdiv_round_up((long) cinfo->_jpeg_width, - (long) (cinfo->max_h_samp_factor*DCTSIZE)); + jdiv_round_up((long)cinfo->_jpeg_width, + (long)(cinfo->max_h_samp_factor * DCTSIZE)); cinfo->MCU_rows_in_scan = (JDIMENSION) - jdiv_round_up((long) cinfo->_jpeg_height, - (long) (cinfo->max_v_samp_factor*DCTSIZE)); + jdiv_round_up((long)cinfo->_jpeg_height, + (long)(cinfo->max_v_samp_factor * DCTSIZE)); cinfo->blocks_in_MCU = 0; @@ -410,10 +411,10 @@ per_scan_setup (j_compress_ptr cinfo) compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height; compptr->MCU_sample_width = compptr->MCU_width * DCTSIZE; /* Figure number of non-dummy blocks in last MCU column & row */ - tmp = (int) (compptr->width_in_blocks % compptr->MCU_width); + tmp = (int)(compptr->width_in_blocks % compptr->MCU_width); if (tmp == 0) tmp = compptr->MCU_width; compptr->last_col_width = tmp; - tmp = (int) (compptr->height_in_blocks % compptr->MCU_height); + tmp = (int)(compptr->height_in_blocks % compptr->MCU_height); if (tmp == 0) tmp = compptr->MCU_height; compptr->last_row_height = tmp; /* Prepare array describing MCU composition */ @@ -430,8 +431,8 @@ per_scan_setup (j_compress_ptr cinfo) /* Convert restart specified in rows to actual MCU count. */ /* Note that count must fit in 16 bits, so we provide limiting. */ if (cinfo->restart_in_rows > 0) { - long nominal = (long) cinfo->restart_in_rows * (long) cinfo->MCUs_per_row; - cinfo->restart_interval = (unsigned int) MIN(nominal, 65535L); + long nominal = (long)cinfo->restart_in_rows * (long)cinfo->MCUs_per_row; + cinfo->restart_interval = (unsigned int)MIN(nominal, 65535L); } } @@ -445,9 +446,9 @@ per_scan_setup (j_compress_ptr cinfo) */ METHODDEF(void) -prepare_for_pass (j_compress_ptr cinfo) +prepare_for_pass(j_compress_ptr cinfo) { - my_master_ptr master = (my_master_ptr) cinfo->master; + my_master_ptr master = (my_master_ptr)cinfo->master; switch (master->pass_type) { case main_pass: @@ -456,7 +457,7 @@ prepare_for_pass (j_compress_ptr cinfo) */ select_scan_parameters(cinfo); per_scan_setup(cinfo); - if (! cinfo->raw_data_in) { + if (!cinfo->raw_data_in) { (*cinfo->cconvert->start_pass) (cinfo); (*cinfo->downsample->start_pass) (cinfo); (*cinfo->prep->start_pass) (cinfo, JBUF_PASS_THRU); @@ -496,7 +497,7 @@ prepare_for_pass (j_compress_ptr cinfo) case output_pass: /* Do a data-output pass. */ /* We need not repeat per-scan setup if prior optimization pass did it. */ - if (! cinfo->optimize_coding) { + if (!cinfo->optimize_coding) { select_scan_parameters(cinfo); per_scan_setup(cinfo); } @@ -512,7 +513,7 @@ prepare_for_pass (j_compress_ptr cinfo) ERREXIT(cinfo, JERR_NOT_COMPILED); } - master->pub.is_last_pass = (master->pass_number == master->total_passes-1); + master->pub.is_last_pass = (master->pass_number == master->total_passes - 1); /* Set up progress monitor's pass info if present */ if (cinfo->progress != NULL) { @@ -533,7 +534,7 @@ prepare_for_pass (j_compress_ptr cinfo) */ METHODDEF(void) -pass_startup (j_compress_ptr cinfo) +pass_startup(j_compress_ptr cinfo) { cinfo->master->call_pass_startup = FALSE; /* reset flag so call only once */ @@ -547,9 +548,9 @@ pass_startup (j_compress_ptr cinfo) */ METHODDEF(void) -finish_pass_master (j_compress_ptr cinfo) +finish_pass_master(j_compress_ptr cinfo) { - my_master_ptr master = (my_master_ptr) cinfo->master; + my_master_ptr master = (my_master_ptr)cinfo->master; /* The entropy coder always needs an end-of-pass call, * either to analyze statistics or to flush its output buffer. @@ -563,7 +564,7 @@ finish_pass_master (j_compress_ptr cinfo) * or output of scan 1 (if no optimization). */ master->pass_type = output_pass; - if (! cinfo->optimize_coding) + if (!cinfo->optimize_coding) master->scan_number++; break; case huff_opt_pass: @@ -587,14 +588,14 @@ finish_pass_master (j_compress_ptr cinfo) */ GLOBAL(void) -jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only) +jinit_c_master_control(j_compress_ptr cinfo, boolean transcode_only) { my_master_ptr master; master = (my_master_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - sizeof(my_comp_master)); - cinfo->master = (struct jpeg_comp_master *) master; + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + sizeof(my_comp_master)); + cinfo->master = (struct jpeg_comp_master *)master; master->pub.prepare_for_pass = prepare_for_pass; master->pub.pass_startup = pass_startup; master->pub.finish_pass = finish_pass_master; diff --git a/jcomapi.c b/jcomapi.c index 6e5bf3d..efbb835 100644 --- a/jcomapi.c +++ b/jcomapi.c @@ -29,7 +29,7 @@ */ GLOBAL(void) -jpeg_abort (j_common_ptr cinfo) +jpeg_abort(j_common_ptr cinfo) { int pool; @@ -40,7 +40,7 @@ jpeg_abort (j_common_ptr cinfo) /* Releasing pools in reverse order might help avoid fragmentation * with some (brain-damaged) malloc libraries. */ - for (pool = JPOOL_NUMPOOLS-1; pool > JPOOL_PERMANENT; pool--) { + for (pool = JPOOL_NUMPOOLS - 1; pool > JPOOL_PERMANENT; pool--) { (*cinfo->mem->free_pool) (cinfo, pool); } @@ -50,7 +50,7 @@ jpeg_abort (j_common_ptr cinfo) /* Try to keep application from accessing now-deleted marker list. * A bit kludgy to do it here, but this is the most central place. */ - ((j_decompress_ptr) cinfo)->marker_list = NULL; + ((j_decompress_ptr)cinfo)->marker_list = NULL; } else { cinfo->global_state = CSTATE_START; } @@ -69,7 +69,7 @@ jpeg_abort (j_common_ptr cinfo) */ GLOBAL(void) -jpeg_destroy (j_common_ptr cinfo) +jpeg_destroy(j_common_ptr cinfo) { /* We need only tell the memory manager to release everything. */ /* NB: mem pointer is NULL if memory mgr failed to initialize. */ @@ -86,7 +86,7 @@ jpeg_destroy (j_common_ptr cinfo) */ GLOBAL(JQUANT_TBL *) -jpeg_alloc_quant_table (j_common_ptr cinfo) +jpeg_alloc_quant_table(j_common_ptr cinfo) { JQUANT_TBL *tbl; @@ -98,7 +98,7 @@ jpeg_alloc_quant_table (j_common_ptr cinfo) GLOBAL(JHUFF_TBL *) -jpeg_alloc_huff_table (j_common_ptr cinfo) +jpeg_alloc_huff_table(j_common_ptr cinfo) { JHUFF_TBL *tbl; diff --git a/jconfig.h.in b/jconfig.h.in index 02c12cc..18a69a4 100644 --- a/jconfig.h.in +++ b/jconfig.h.in @@ -1,19 +1,25 @@ /* Version ID for the JPEG library. * Might be useful for tests like "#if JPEG_LIB_VERSION >= 60". */ -#define JPEG_LIB_VERSION 62 /* Version 6b */ +#define JPEG_LIB_VERSION @JPEG_LIB_VERSION@ /* libjpeg-turbo version */ -#define LIBJPEG_TURBO_VERSION 0 +#define LIBJPEG_TURBO_VERSION @VERSION@ /* libjpeg-turbo version in integer form */ -#define LIBJPEG_TURBO_VERSION_NUMBER 0 +#define LIBJPEG_TURBO_VERSION_NUMBER @LIBJPEG_TURBO_VERSION_NUMBER@ /* Support arithmetic encoding */ -#undef C_ARITH_CODING_SUPPORTED +#cmakedefine C_ARITH_CODING_SUPPORTED 1 /* Support arithmetic decoding */ -#undef D_ARITH_CODING_SUPPORTED +#cmakedefine D_ARITH_CODING_SUPPORTED 1 + +/* Support in-memory source/destination managers */ +#cmakedefine MEM_SRCDST_SUPPORTED 1 + +/* Use accelerated SIMD routines. */ +#cmakedefine WITH_SIMD 1 /* * Define BITS_IN_JSAMPLE as either @@ -24,50 +30,44 @@ * We do not support run-time selection of data precision, sorry. */ -#define BITS_IN_JSAMPLE 8 /* use 8 or 12 */ +#define BITS_IN_JSAMPLE @BITS_IN_JSAMPLE@ /* use 8 or 12 */ /* Define to 1 if you have the header file. */ -#undef HAVE_LOCALE_H +#cmakedefine HAVE_LOCALE_H 1 /* Define to 1 if you have the header file. */ -#undef HAVE_STDDEF_H +#cmakedefine HAVE_STDDEF_H 1 /* Define to 1 if you have the header file. */ -#undef HAVE_STDLIB_H +#cmakedefine HAVE_STDLIB_H 1 + +/* Define if you need to include to get size_t. */ +#cmakedefine NEED_SYS_TYPES_H 1 + +/* Define if you have BSD-like bzero and bcopy in rather than + memset/memcpy in . */ +#cmakedefine NEED_BSD_STRINGS 1 /* Define to 1 if the system has the type `unsigned char'. */ -#undef HAVE_UNSIGNED_CHAR +#cmakedefine HAVE_UNSIGNED_CHAR 1 /* Define to 1 if the system has the type `unsigned short'. */ -#undef HAVE_UNSIGNED_SHORT +#cmakedefine HAVE_UNSIGNED_SHORT 1 /* Compiler does not support pointers to undefined structures. */ -#undef INCOMPLETE_TYPES_BROKEN - -/* Support in-memory source/destination managers */ -#undef MEM_SRCDST_SUPPORTED - -/* Define if you have BSD-like bzero and bcopy in rather than - memset/memcpy in . */ -#undef NEED_BSD_STRINGS - -/* Define if you need to include to get size_t. */ -#undef NEED_SYS_TYPES_H +#cmakedefine INCOMPLETE_TYPES_BROKEN 1 /* Define if your (broken) compiler shifts signed values as if they were unsigned. */ -#undef RIGHT_SHIFT_IS_UNSIGNED - -/* Use accelerated SIMD routines. */ -#undef WITH_SIMD +#cmakedefine RIGHT_SHIFT_IS_UNSIGNED 1 /* Define to 1 if type `char' is unsigned and you are not using gcc. */ #ifndef __CHAR_UNSIGNED__ -# undef __CHAR_UNSIGNED__ + #cmakedefine __CHAR_UNSIGNED__ 1 #endif /* Define to empty if `const' does not conform to ANSI C. */ -#undef const +/* #undef const */ /* Define to `unsigned int' if does not define. */ -#undef size_t +/* #undef size_t */ diff --git a/jconfig.txt b/jconfig.txt index 808f87f..90cd724 100644 --- a/jconfig.txt +++ b/jconfig.txt @@ -128,7 +128,7 @@ typedef unsigned char boolean; */ #undef TWO_FILE_COMMANDLINE -/* By default, we open image files with fopen(...,"rb") or fopen(...,"wb"). +/* By default, we open image files with fopen(..., "rb") or fopen(..., "wb"). * This is necessary on systems that distinguish text files from binary files, * and is harmless on most systems that don't. If you have one of the rare * systems that complains about the "b" spec, define this symbol. diff --git a/jconfigint.h.in b/jconfigint.h.in index 963e760..55df053 100644 --- a/jconfigint.h.in +++ b/jconfigint.h.in @@ -1,17 +1,31 @@ /* libjpeg-turbo build number */ -#undef BUILD +#define BUILD "@BUILD@" /* Compiler's inline keyword */ #undef inline /* How to obtain function inlining. */ -#undef INLINE +#define INLINE @INLINE@ /* Define to the full name of this package. */ -#undef PACKAGE_NAME +#define PACKAGE_NAME "@CMAKE_PROJECT_NAME@" /* Version number of package */ -#undef VERSION +#define VERSION "@VERSION@" /* The size of `size_t', as computed by sizeof. */ -#undef SIZEOF_SIZE_T +#define SIZEOF_SIZE_T @SIZE_T@ + +/* Define if your compiler has __builtin_ctzl() and sizeof(unsigned long) == sizeof(size_t). */ +#cmakedefine HAVE_BUILTIN_CTZL + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_INTRIN_H + +#if defined(_MSC_VER) && defined(HAVE_INTRIN_H) +#if (SIZEOF_SIZE_T == 8) +#define HAVE_BITSCANFORWARD64 +#elif (SIZEOF_SIZE_T == 4) +#define HAVE_BITSCANFORWARD +#endif +#endif diff --git a/jcparam.c b/jcparam.c index 18b2d48..5bc7174 100644 --- a/jcparam.c +++ b/jcparam.c @@ -5,7 +5,7 @@ * Copyright (C) 1991-1998, Thomas G. Lane. * Modified 2003-2008 by Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2009-2011, D. R. Commander. + * Copyright (C) 2009-2011, 2018, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -25,9 +25,9 @@ */ GLOBAL(void) -jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl, - const unsigned int *basic_table, - int scale_factor, boolean force_baseline) +jpeg_add_quant_table(j_compress_ptr cinfo, int which_tbl, + const unsigned int *basic_table, int scale_factor, + boolean force_baseline) /* Define a quantization table equal to the basic_table times * a scale factor (given as a percentage). * If force_baseline is TRUE, the computed quantization table entries @@ -45,19 +45,19 @@ jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl, if (which_tbl < 0 || which_tbl >= NUM_QUANT_TBLS) ERREXIT1(cinfo, JERR_DQT_INDEX, which_tbl); - qtblptr = & cinfo->quant_tbl_ptrs[which_tbl]; + qtblptr = &cinfo->quant_tbl_ptrs[which_tbl]; if (*qtblptr == NULL) - *qtblptr = jpeg_alloc_quant_table((j_common_ptr) cinfo); + *qtblptr = jpeg_alloc_quant_table((j_common_ptr)cinfo); for (i = 0; i < DCTSIZE2; i++) { - temp = ((long) basic_table[i] * scale_factor + 50L) / 100L; + temp = ((long)basic_table[i] * scale_factor + 50L) / 100L; /* limit the values to the valid range */ if (temp <= 0L) temp = 1L; if (temp > 32767L) temp = 32767L; /* max quantizer needed for 12 bits */ if (force_baseline && temp > 255L) temp = 255L; /* limit to baseline range if requested */ - (*qtblptr)->quantval[i] = (UINT16) temp; + (*qtblptr)->quantval[i] = (UINT16)temp; } /* Initialize sent_table FALSE so table will be written to JPEG file. */ @@ -65,7 +65,8 @@ jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl, } -/* These are the sample quantization tables given in JPEG spec section K.1. +/* These are the sample quantization tables given in Annex K (Clause K.1) of + * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994. * The spec says that the values given produce "good" quality, and * when divided by 2, "very good" quality. */ @@ -93,7 +94,7 @@ static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = { #if JPEG_LIB_VERSION >= 70 GLOBAL(void) -jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline) +jpeg_default_qtables(j_compress_ptr cinfo, boolean force_baseline) /* Set or change the 'quality' (quantization) setting, using default tables * and straight percentage-scaling quality scales. * This entry point allows different scalings for luminance and chrominance. @@ -109,8 +110,8 @@ jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline) GLOBAL(void) -jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor, - boolean force_baseline) +jpeg_set_linear_quality(j_compress_ptr cinfo, int scale_factor, + boolean force_baseline) /* Set or change the 'quality' (quantization) setting, using default tables * and a straight percentage-scaling quality scale. In most cases it's better * to use jpeg_set_quality (below); this entry point is provided for @@ -126,7 +127,7 @@ jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor, GLOBAL(int) -jpeg_quality_scaling (int quality) +jpeg_quality_scaling(int quality) /* Convert a user-specified quality rating to a percentage scaling factor * for an underlying quantization table, using our recommended scaling curve. * The input 'quality' factor should be 0 (terrible) to 100 (very good). @@ -145,14 +146,14 @@ jpeg_quality_scaling (int quality) if (quality < 50) quality = 5000 / quality; else - quality = 200 - quality*2; + quality = 200 - quality * 2; return quality; } GLOBAL(void) -jpeg_set_quality (j_compress_ptr cinfo, int quality, boolean force_baseline) +jpeg_set_quality(j_compress_ptr cinfo, int quality, boolean force_baseline) /* Set or change the 'quality' (quantization) setting, using default tables. * This is the standard quality-adjusting entry point for typical user * interfaces; only those who want detailed control over quantization tables @@ -178,7 +179,7 @@ jpeg_set_quality (j_compress_ptr cinfo, int quality, boolean force_baseline) */ GLOBAL(void) -jpeg_set_defaults (j_compress_ptr cinfo) +jpeg_set_defaults(j_compress_ptr cinfo) { int i; @@ -192,7 +193,7 @@ jpeg_set_defaults (j_compress_ptr cinfo) */ if (cinfo->comp_info == NULL) cinfo->comp_info = (jpeg_component_info *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT, MAX_COMPONENTS * sizeof(jpeg_component_info)); /* Initialize everything not dependent on the color space */ @@ -205,7 +206,7 @@ jpeg_set_defaults (j_compress_ptr cinfo) /* Set up two quantization tables using default quality of 75 */ jpeg_set_quality(cinfo, 75, TRUE); /* Set up two Huffman tables */ - std_huff_tables((j_common_ptr) cinfo); + std_huff_tables((j_common_ptr)cinfo); /* Initialize default arithmetic coding conditioning */ for (i = 0; i < NUM_ARITH_TBLS; i++) { @@ -278,7 +279,7 @@ jpeg_set_defaults (j_compress_ptr cinfo) */ GLOBAL(void) -jpeg_default_colorspace (j_compress_ptr cinfo) +jpeg_default_colorspace(j_compress_ptr cinfo) { switch (cinfo->in_color_space) { case JCS_GRAYSCALE: @@ -320,12 +321,12 @@ jpeg_default_colorspace (j_compress_ptr cinfo) */ GLOBAL(void) -jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace) +jpeg_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace) { jpeg_component_info *compptr; int ci; -#define SET_COMP(index,id,hsamp,vsamp,quant,dctbl,actbl) \ +#define SET_COMP(index, id, hsamp, vsamp, quant, dctbl, actbl) \ (compptr = &cinfo->comp_info[index], \ compptr->component_id = (id), \ compptr->h_samp_factor = (hsamp), \ @@ -352,39 +353,39 @@ jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace) cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */ cinfo->num_components = 1; /* JFIF specifies component ID 1 */ - SET_COMP(0, 1, 1,1, 0, 0,0); + SET_COMP(0, 1, 1, 1, 0, 0, 0); break; case JCS_RGB: cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag RGB */ cinfo->num_components = 3; - SET_COMP(0, 0x52 /* 'R' */, 1,1, 0, 0,0); - SET_COMP(1, 0x47 /* 'G' */, 1,1, 0, 0,0); - SET_COMP(2, 0x42 /* 'B' */, 1,1, 0, 0,0); + SET_COMP(0, 0x52 /* 'R' */, 1, 1, 0, 0, 0); + SET_COMP(1, 0x47 /* 'G' */, 1, 1, 0, 0, 0); + SET_COMP(2, 0x42 /* 'B' */, 1, 1, 0, 0, 0); break; case JCS_YCbCr: cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */ cinfo->num_components = 3; /* JFIF specifies component IDs 1,2,3 */ /* We default to 2x2 subsamples of chrominance */ - SET_COMP(0, 1, 2,2, 0, 0,0); - SET_COMP(1, 2, 1,1, 1, 1,1); - SET_COMP(2, 3, 1,1, 1, 1,1); + SET_COMP(0, 1, 2, 2, 0, 0, 0); + SET_COMP(1, 2, 1, 1, 1, 1, 1); + SET_COMP(2, 3, 1, 1, 1, 1, 1); break; case JCS_CMYK: cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag CMYK */ cinfo->num_components = 4; - SET_COMP(0, 0x43 /* 'C' */, 1,1, 0, 0,0); - SET_COMP(1, 0x4D /* 'M' */, 1,1, 0, 0,0); - SET_COMP(2, 0x59 /* 'Y' */, 1,1, 0, 0,0); - SET_COMP(3, 0x4B /* 'K' */, 1,1, 0, 0,0); + SET_COMP(0, 0x43 /* 'C' */, 1, 1, 0, 0, 0); + SET_COMP(1, 0x4D /* 'M' */, 1, 1, 0, 0, 0); + SET_COMP(2, 0x59 /* 'Y' */, 1, 1, 0, 0, 0); + SET_COMP(3, 0x4B /* 'K' */, 1, 1, 0, 0, 0); break; case JCS_YCCK: cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag YCCK */ cinfo->num_components = 4; - SET_COMP(0, 1, 2,2, 0, 0,0); - SET_COMP(1, 2, 1,1, 1, 1,1); - SET_COMP(2, 3, 1,1, 1, 1,1); - SET_COMP(3, 4, 2,2, 0, 0,0); + SET_COMP(0, 1, 2, 2, 0, 0, 0); + SET_COMP(1, 2, 1, 1, 1, 1, 1); + SET_COMP(2, 3, 1, 1, 1, 1, 1); + SET_COMP(3, 4, 2, 2, 0, 0, 0); break; case JCS_UNKNOWN: cinfo->num_components = cinfo->input_components; @@ -392,7 +393,7 @@ jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace) ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components, MAX_COMPONENTS); for (ci = 0; ci < cinfo->num_components; ci++) { - SET_COMP(ci, ci, 1,1, 0, 0,0); + SET_COMP(ci, ci, 1, 1, 0, 0, 0); } break; default: @@ -404,8 +405,7 @@ jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace) #ifdef C_PROGRESSIVE_SUPPORTED LOCAL(jpeg_scan_info *) -fill_a_scan (jpeg_scan_info *scanptr, int ci, - int Ss, int Se, int Ah, int Al) +fill_a_scan(jpeg_scan_info *scanptr, int ci, int Ss, int Se, int Ah, int Al) /* Support routine: generate one scan for specified component */ { scanptr->comps_in_scan = 1; @@ -419,8 +419,7 @@ fill_a_scan (jpeg_scan_info *scanptr, int ci, } LOCAL(jpeg_scan_info *) -fill_scans (jpeg_scan_info *scanptr, int ncomps, - int Ss, int Se, int Ah, int Al) +fill_scans(jpeg_scan_info *scanptr, int ncomps, int Ss, int Se, int Ah, int Al) /* Support routine: generate one scan for each component */ { int ci; @@ -438,7 +437,7 @@ fill_scans (jpeg_scan_info *scanptr, int ncomps, } LOCAL(jpeg_scan_info *) -fill_dc_scans (jpeg_scan_info *scanptr, int ncomps, int Ah, int Al) +fill_dc_scans(jpeg_scan_info *scanptr, int ncomps, int Ah, int Al) /* Support routine: generate interleaved DC scan if possible, else N scans */ { int ci; @@ -466,7 +465,7 @@ fill_dc_scans (jpeg_scan_info *scanptr, int ncomps, int Ah, int Al) */ GLOBAL(void) -jpeg_simple_progression (j_compress_ptr cinfo) +jpeg_simple_progression(j_compress_ptr cinfo) { int ncomps = cinfo->num_components; int nscans; @@ -498,7 +497,7 @@ jpeg_simple_progression (j_compress_ptr cinfo) if (cinfo->script_space == NULL || cinfo->script_space_size < nscans) { cinfo->script_space_size = MAX(nscans, 10); cinfo->script_space = (jpeg_scan_info *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT, cinfo->script_space_size * sizeof(jpeg_scan_info)); } scanptr = cinfo->script_space; diff --git a/jcphuff.c b/jcphuff.c index 046e2e1..024d3af 100644 --- a/jcphuff.c +++ b/jcphuff.c @@ -4,7 +4,8 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1995-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2015, D. R. Commander. + * Copyright (C) 2011, 2015, 2018, D. R. Commander. + * Copyright (C) 2016, 2018, Matthieu Darbois. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -18,15 +19,69 @@ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" -#include "jchuff.h" /* Declarations shared with jchuff.c */ +#include "jsimd.h" +#include "jconfigint.h" +#include + +#ifdef HAVE_INTRIN_H +#include +#ifdef _MSC_VER +#ifdef HAVE_BITSCANFORWARD64 +#pragma intrinsic(_BitScanForward64) +#endif +#ifdef HAVE_BITSCANFORWARD +#pragma intrinsic(_BitScanForward) +#endif +#endif +#endif #ifdef C_PROGRESSIVE_SUPPORTED +/* + * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be + * used for bit counting rather than the lookup table. This will reduce the + * memory footprint by 64k, which is important for some mobile applications + * that create many isolated instances of libjpeg-turbo (web browsers, for + * instance.) This may improve performance on some mobile platforms as well. + * This feature is enabled by default only on ARM processors, because some x86 + * chips have a slow implementation of bsr, and the use of clz/bsr cannot be + * shown to have a significant performance impact even on the x86 chips that + * have a fast implementation of it. When building for ARMv6, you can + * explicitly disable the use of clz/bsr by adding -mthumb to the compiler + * flags (this defines __thumb__). + */ + +/* NOTE: Both GCC and Clang define __GNUC__ */ +#if defined __GNUC__ && (defined __arm__ || defined __aarch64__) +#if !defined __thumb__ || defined __thumb2__ +#define USE_CLZ_INTRINSIC +#endif +#endif + +#ifdef USE_CLZ_INTRINSIC +#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) +#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) +#else +#include "jpeg_nbits_table.h" +#define JPEG_NBITS(x) (jpeg_nbits_table[x]) +#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x) +#endif + + /* Expanded entropy encoder object for progressive Huffman encoding. */ typedef struct { struct jpeg_entropy_encoder pub; /* public fields */ + /* Pointer to routine to prepare data for encode_mcu_AC_first() */ + void (*AC_first_prepare) (const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *values, size_t *zerobits); + /* Pointer to routine to prepare data for encode_mcu_AC_refine() */ + int (*AC_refine_prepare) (const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *absvalues, size_t *bits); + /* Mode flag: TRUE for optimization, FALSE for actual data output */ boolean gather_statistics; @@ -79,26 +134,60 @@ typedef phuff_entropy_encoder *phuff_entropy_ptr; #ifdef RIGHT_SHIFT_IS_UNSIGNED #define ISHIFT_TEMPS int ishift_temp; -#define IRIGHT_SHIFT(x,shft) \ - ((ishift_temp = (x)) < 0 ? \ - (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \ - (ishift_temp >> (shft))) +#define IRIGHT_SHIFT(x, shft) \ + ((ishift_temp = (x)) < 0 ? \ + (ishift_temp >> (shft)) | ((~0) << (16 - (shft))) : \ + (ishift_temp >> (shft))) #else #define ISHIFT_TEMPS -#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) +#define IRIGHT_SHIFT(x, shft) ((x) >> (shft)) #endif +#define PAD(v, p) ((v + (p) - 1) & (~((p) - 1))) + /* Forward declarations */ -METHODDEF(boolean) encode_mcu_DC_first (j_compress_ptr cinfo, +METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(void) encode_mcu_AC_first_prepare + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, + JCOEF *values, size_t *zerobits); +METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data); -METHODDEF(boolean) encode_mcu_AC_first (j_compress_ptr cinfo, +METHODDEF(int) encode_mcu_AC_refine_prepare + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, + JCOEF *absvalues, size_t *bits); +METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data); -METHODDEF(boolean) encode_mcu_DC_refine (j_compress_ptr cinfo, - JBLOCKROW *MCU_data); -METHODDEF(boolean) encode_mcu_AC_refine (j_compress_ptr cinfo, - JBLOCKROW *MCU_data); -METHODDEF(void) finish_pass_phuff (j_compress_ptr cinfo); -METHODDEF(void) finish_pass_gather_phuff (j_compress_ptr cinfo); +METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo); +METHODDEF(void) finish_pass_gather_phuff(j_compress_ptr cinfo); + + +/* Count bit loop zeroes */ +INLINE +METHODDEF(int) +count_zeroes(size_t *x) +{ + int result; +#if defined(HAVE_BUILTIN_CTZL) + result = __builtin_ctzl(*x); + *x >>= result; +#elif defined(HAVE_BITSCANFORWARD64) + _BitScanForward64(&result, *x); + *x >>= result; +#elif defined(HAVE_BITSCANFORWARD) + _BitScanForward(&result, *x); + *x >>= result; +#else + result = 0; + while ((*x & 1) == 0) { + ++result; + *x >>= 1; + } +#endif + return result; +} /* @@ -106,9 +195,9 @@ METHODDEF(void) finish_pass_gather_phuff (j_compress_ptr cinfo); */ METHODDEF(void) -start_pass_phuff (j_compress_ptr cinfo, boolean gather_statistics) +start_pass_phuff(j_compress_ptr cinfo, boolean gather_statistics) { - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; boolean is_DC_band; int ci, tbl; jpeg_component_info *compptr; @@ -126,15 +215,23 @@ start_pass_phuff (j_compress_ptr cinfo, boolean gather_statistics) entropy->pub.encode_mcu = encode_mcu_DC_first; else entropy->pub.encode_mcu = encode_mcu_AC_first; + if (jsimd_can_encode_mcu_AC_first_prepare()) + entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare; + else + entropy->AC_first_prepare = encode_mcu_AC_first_prepare; } else { if (is_DC_band) entropy->pub.encode_mcu = encode_mcu_DC_refine; else { entropy->pub.encode_mcu = encode_mcu_AC_refine; + if (jsimd_can_encode_mcu_AC_refine_prepare()) + entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare; + else + entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare; /* AC refinement needs a correction bit buffer */ if (entropy->bit_buffer == NULL) entropy->bit_buffer = (char *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, MAX_CORR_BITS * sizeof(char)); } } @@ -167,14 +264,14 @@ start_pass_phuff (j_compress_ptr cinfo, boolean gather_statistics) /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ if (entropy->count_ptrs[tbl] == NULL) entropy->count_ptrs[tbl] = (long *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, 257 * sizeof(long)); MEMZERO(entropy->count_ptrs[tbl], 257 * sizeof(long)); } else { /* Compute derived values for Huffman table */ /* We may do this more than once for a table, but it's not expensive */ jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl, - & entropy->derived_tbls[tbl]); + &entropy->derived_tbls[tbl]); } } @@ -198,19 +295,20 @@ start_pass_phuff (j_compress_ptr cinfo, boolean gather_statistics) */ /* Emit a byte */ -#define emit_byte(entropy,val) \ - { *(entropy)->next_output_byte++ = (JOCTET) (val); \ - if (--(entropy)->free_in_buffer == 0) \ - dump_buffer(entropy); } +#define emit_byte(entropy, val) { \ + *(entropy)->next_output_byte++ = (JOCTET)(val); \ + if (--(entropy)->free_in_buffer == 0) \ + dump_buffer(entropy); \ +} LOCAL(void) -dump_buffer (phuff_entropy_ptr entropy) +dump_buffer(phuff_entropy_ptr entropy) /* Empty the output buffer; we do not support suspension in this module. */ { struct jpeg_destination_mgr *dest = entropy->cinfo->dest; - if (! (*dest->empty_output_buffer) (entropy->cinfo)) + if (!(*dest->empty_output_buffer) (entropy->cinfo)) ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND); /* After a successful buffer dump, must reset buffer pointers */ entropy->next_output_byte = dest->next_output_byte; @@ -227,11 +325,11 @@ dump_buffer (phuff_entropy_ptr entropy) */ LOCAL(void) -emit_bits (phuff_entropy_ptr entropy, unsigned int code, int size) +emit_bits(phuff_entropy_ptr entropy, unsigned int code, int size) /* Emit some bits, unless we are in gather mode */ { /* This routine is heavily used, so it's worth coding tightly. */ - register size_t put_buffer = (size_t) code; + register size_t put_buffer = (size_t)code; register int put_bits = entropy->put_bits; /* if size is 0, caller used an invalid Huffman table entry */ @@ -241,7 +339,7 @@ emit_bits (phuff_entropy_ptr entropy, unsigned int code, int size) if (entropy->gather_statistics) return; /* do nothing if we're only getting stats */ - put_buffer &= (((size_t) 1)<put_buffer; /* and merge with old buffer contents */ while (put_bits >= 8) { - int c = (int) ((put_buffer >> 16) & 0xFF); + int c = (int)((put_buffer >> 16) & 0xFF); emit_byte(entropy, c); if (c == 0xFF) { /* need to stuff a zero byte? */ @@ -266,7 +364,7 @@ emit_bits (phuff_entropy_ptr entropy, unsigned int code, int size) LOCAL(void) -flush_bits (phuff_entropy_ptr entropy) +flush_bits(phuff_entropy_ptr entropy) { emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */ entropy->put_buffer = 0; /* and reset bit-buffer to empty */ @@ -279,7 +377,7 @@ flush_bits (phuff_entropy_ptr entropy) */ LOCAL(void) -emit_symbol (phuff_entropy_ptr entropy, int tbl_no, int symbol) +emit_symbol(phuff_entropy_ptr entropy, int tbl_no, int symbol) { if (entropy->gather_statistics) entropy->count_ptrs[tbl_no][symbol]++; @@ -295,14 +393,14 @@ emit_symbol (phuff_entropy_ptr entropy, int tbl_no, int symbol) */ LOCAL(void) -emit_buffered_bits (phuff_entropy_ptr entropy, char *bufstart, - unsigned int nbits) +emit_buffered_bits(phuff_entropy_ptr entropy, char *bufstart, + unsigned int nbits) { if (entropy->gather_statistics) return; /* no real work */ while (nbits > 0) { - emit_bits(entropy, (unsigned int) (*bufstart), 1); + emit_bits(entropy, (unsigned int)(*bufstart), 1); bufstart++; nbits--; } @@ -314,15 +412,13 @@ emit_buffered_bits (phuff_entropy_ptr entropy, char *bufstart, */ LOCAL(void) -emit_eobrun (phuff_entropy_ptr entropy) +emit_eobrun(phuff_entropy_ptr entropy) { register int temp, nbits; if (entropy->EOBRUN > 0) { /* if there is any pending EOBRUN */ temp = entropy->EOBRUN; - nbits = 0; - while ((temp >>= 1)) - nbits++; + nbits = JPEG_NBITS_NONZERO(temp) - 1; /* safety check: shouldn't happen given limited correction-bit buffer */ if (nbits > 14) ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE); @@ -345,13 +441,13 @@ emit_eobrun (phuff_entropy_ptr entropy) */ LOCAL(void) -emit_restart (phuff_entropy_ptr entropy, int restart_num) +emit_restart(phuff_entropy_ptr entropy, int restart_num) { int ci; emit_eobrun(entropy); - if (! entropy->gather_statistics) { + if (!entropy->gather_statistics) { flush_bits(entropy); emit_byte(entropy, 0xFF); emit_byte(entropy, JPEG_RST0 + restart_num); @@ -375,10 +471,10 @@ emit_restart (phuff_entropy_ptr entropy, int restart_num) */ METHODDEF(boolean) -encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data) { - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - register int temp, temp2; + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; + register int temp, temp2, temp3; register int nbits; int blkn, ci; int Al = cinfo->Al; @@ -403,31 +499,31 @@ encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) /* Compute the DC value after the required point transform by Al. * This is simply an arithmetic right shift. */ - temp2 = IRIGHT_SHIFT((int) ((*block)[0]), Al); + temp2 = IRIGHT_SHIFT((int)((*block)[0]), Al); /* DC differences are figured on the point-transformed values. */ temp = temp2 - entropy->last_dc_val[ci]; entropy->last_dc_val[ci] = temp2; /* Encode the DC coefficient difference per section G.1.2.1 */ - temp2 = temp; - if (temp < 0) { - temp = -temp; /* temp is abs value of input */ - /* For a negative input, want temp2 = bitwise complement of abs(input) */ - /* This code assumes we are on a two's complement machine */ - temp2--; - } + + /* This is a well-known technique for obtaining the absolute value without + * a branch. It is derived from an assembly language technique presented + * in "How to Optimize for the Pentium Processors", Copyright (c) 1996, + * 1997 by Agner Fog. + */ + temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); + temp ^= temp3; + temp -= temp3; /* temp is abs value of input */ + /* For a negative input, want temp2 = bitwise complement of abs(input) */ + temp2 = temp ^ temp3; /* Find the number of bits needed for the magnitude of the coefficient */ - nbits = 0; - while (temp) { - nbits++; - temp >>= 1; - } + nbits = JPEG_NBITS(temp); /* Check for out-of-range coefficient values. * Since we're encoding a difference, the range limit is twice as much. */ - if (nbits > MAX_COEF_BITS+1) + if (nbits > MAX_COEF_BITS + 1) ERREXIT(cinfo, JERR_BAD_DCT_COEF); /* Count/emit the Huffman-coded symbol for the number of bits */ @@ -436,7 +532,7 @@ encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) /* Emit that number of bits of the value, if positive, */ /* or the complement of its magnitude, if negative. */ if (nbits) /* emit_bits rejects calls with size 0 */ - emit_bits(entropy, (unsigned int) temp2, nbits); + emit_bits(entropy, (unsigned int)temp2, nbits); } cinfo->dest->next_output_byte = entropy->next_output_byte; @@ -457,20 +553,115 @@ encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) /* + * Data preparation for encode_mcu_AC_first(). + */ + +#define COMPUTE_ABSVALUES_AC_FIRST(Sl) { \ + for (k = 0; k < Sl; k++) { \ + temp = block[jpeg_natural_order_start[k]]; \ + if (temp == 0) \ + continue; \ + /* We must apply the point transform by Al. For AC coefficients this \ + * is an integer division with rounding towards 0. To do this portably \ + * in C, we shift after obtaining the absolute value; so the code is \ + * interwoven with finding the abs value (temp) and output bits (temp2). \ + */ \ + temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \ + temp ^= temp2; \ + temp -= temp2; /* temp is abs value of input */ \ + temp >>= Al; /* apply the point transform */ \ + /* Watch out for case that nonzero coef is zero after point transform */ \ + if (temp == 0) \ + continue; \ + /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \ + temp2 ^= temp; \ + values[k] = temp; \ + values[k + DCTSIZE2] = temp2; \ + zerobits |= ((size_t)1U) << k; \ + } \ +} + +METHODDEF(void) +encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *values, size_t *bits) +{ + register int k, temp, temp2; + size_t zerobits = 0U; + int Sl0 = Sl; + +#if SIZEOF_SIZE_T == 4 + if (Sl0 > 32) + Sl0 = 32; +#endif + + COMPUTE_ABSVALUES_AC_FIRST(Sl0); + + bits[0] = zerobits; +#if SIZEOF_SIZE_T == 4 + zerobits = 0U; + + if (Sl > 32) { + Sl -= 32; + jpeg_natural_order_start += 32; + values += 32; + + COMPUTE_ABSVALUES_AC_FIRST(Sl); + } + bits[1] = zerobits; +#endif +} + +/* * MCU encoding for AC initial scan (either spectral selection, * or first pass of successive approximation). */ +#define ENCODE_COEFS_AC_FIRST(label) { \ + while (zerobits) { \ + r = count_zeroes(&zerobits); \ + cvalue += r; \ +label \ + temp = cvalue[0]; \ + temp2 = cvalue[DCTSIZE2]; \ + \ + /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \ + while (r > 15) { \ + emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \ + r -= 16; \ + } \ + \ + /* Find the number of bits needed for the magnitude of the coefficient */ \ + nbits = JPEG_NBITS_NONZERO(temp); /* there must be at least one 1 bit */ \ + /* Check for out-of-range coefficient values */ \ + if (nbits > MAX_COEF_BITS) \ + ERREXIT(cinfo, JERR_BAD_DCT_COEF); \ + \ + /* Count/emit Huffman symbol for run length / number of bits */ \ + emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); \ + \ + /* Emit that number of bits of the value, if positive, */ \ + /* or the complement of its magnitude, if negative. */ \ + emit_bits(entropy, (unsigned int)temp2, nbits); \ + \ + cvalue++; \ + zerobits >>= 1; \ + } \ +} + METHODDEF(boolean) -encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data) { - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; register int temp, temp2; - register int nbits; - register int r, k; - int Se = cinfo->Se; + register int nbits, r; + int Sl = cinfo->Se - cinfo->Ss + 1; int Al = cinfo->Al; - JBLOCKROW block; + JCOEF values_unaligned[2 * DCTSIZE2 + 15]; + JCOEF *values; + const JCOEF *cvalue; + size_t zerobits; + size_t bits[8 / SIZEOF_SIZE_T]; entropy->next_output_byte = cinfo->dest->next_output_byte; entropy->free_in_buffer = cinfo->dest->free_in_buffer; @@ -480,66 +671,48 @@ encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) if (entropy->restarts_to_go == 0) emit_restart(entropy, entropy->next_restart_num); - /* Encode the MCU data block */ - block = MCU_data[0]; +#ifdef WITH_SIMD + cvalue = values = (JCOEF *)PAD((size_t)values_unaligned, 16); +#else + /* Not using SIMD, so alignment is not needed */ + cvalue = values = values_unaligned; +#endif - /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */ + /* Prepare data */ + entropy->AC_first_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss, + Sl, Al, values, bits); - r = 0; /* r = run length of zeros */ - - for (k = cinfo->Ss; k <= Se; k++) { - if ((temp = (*block)[jpeg_natural_order[k]]) == 0) { - r++; - continue; - } - /* We must apply the point transform by Al. For AC coefficients this - * is an integer division with rounding towards 0. To do this portably - * in C, we shift after obtaining the absolute value; so the code is - * interwoven with finding the abs value (temp) and output bits (temp2). - */ - if (temp < 0) { - temp = -temp; /* temp is abs value of input */ - temp >>= Al; /* apply the point transform */ - /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ - temp2 = ~temp; - } else { - temp >>= Al; /* apply the point transform */ - temp2 = temp; - } - /* Watch out for case that nonzero coef is zero after point transform */ - if (temp == 0) { - r++; - continue; - } + zerobits = bits[0]; +#if SIZEOF_SIZE_T == 4 + zerobits |= bits[1]; +#endif - /* Emit any pending EOBRUN */ - if (entropy->EOBRUN > 0) - emit_eobrun(entropy); - /* if run length > 15, must emit special run-length-16 codes (0xF0) */ - while (r > 15) { - emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); - r -= 16; - } + /* Emit any pending EOBRUN */ + if (zerobits && (entropy->EOBRUN > 0)) + emit_eobrun(entropy); - /* Find the number of bits needed for the magnitude of the coefficient */ - nbits = 1; /* there must be at least one 1 bit */ - while ((temp >>= 1)) - nbits++; - /* Check for out-of-range coefficient values */ - if (nbits > MAX_COEF_BITS) - ERREXIT(cinfo, JERR_BAD_DCT_COEF); +#if SIZEOF_SIZE_T == 4 + zerobits = bits[0]; +#endif - /* Count/emit Huffman symbol for run length / number of bits */ - emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); + /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */ - /* Emit that number of bits of the value, if positive, */ - /* or the complement of its magnitude, if negative. */ - emit_bits(entropy, (unsigned int) temp2, nbits); + ENCODE_COEFS_AC_FIRST((void)0;); - r = 0; /* reset zero run length */ +#if SIZEOF_SIZE_T == 4 + zerobits = bits[1]; + if (zerobits) { + int diff = ((values + DCTSIZE2 / 2) - cvalue); + r = count_zeroes(&zerobits); + r += diff; + cvalue += r; + goto first_iter_ac_first; } - if (r > 0) { /* If there are trailing zeroes, */ + ENCODE_COEFS_AC_FIRST(first_iter_ac_first:); +#endif + + if (cvalue < (values + Sl)) { /* If there are trailing zeroes, */ entropy->EOBRUN++; /* count an EOB */ if (entropy->EOBRUN == 0x7FFF) emit_eobrun(entropy); /* force it out to avoid overflow */ @@ -569,9 +742,9 @@ encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) */ METHODDEF(boolean) -encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) { - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; register int temp; int blkn; int Al = cinfo->Al; @@ -591,7 +764,7 @@ encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) /* We simply emit the Al'th bit of the DC coefficient value. */ temp = (*block)[0]; - emit_bits(entropy, (unsigned int) (temp >> Al), 1); + emit_bits(entropy, (unsigned int)(temp >> Al), 1); } cinfo->dest->next_output_byte = entropy->next_output_byte; @@ -612,22 +785,148 @@ encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) /* + * Data preparation for encode_mcu_AC_refine(). + */ + +#define COMPUTE_ABSVALUES_AC_REFINE(Sl, koffset) { \ + /* It is convenient to make a pre-pass to determine the transformed \ + * coefficients' absolute values and the EOB position. \ + */ \ + for (k = 0; k < Sl; k++) { \ + temp = block[jpeg_natural_order_start[k]]; \ + /* We must apply the point transform by Al. For AC coefficients this \ + * is an integer division with rounding towards 0. To do this portably \ + * in C, we shift after obtaining the absolute value. \ + */ \ + temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \ + temp ^= temp2; \ + temp -= temp2; /* temp is abs value of input */ \ + temp >>= Al; /* apply the point transform */ \ + if (temp != 0) { \ + zerobits |= ((size_t)1U) << k; \ + signbits |= ((size_t)(temp2 + 1)) << k; \ + } \ + absvalues[k] = (JCOEF)temp; /* save abs value for main pass */ \ + if (temp == 1) \ + EOB = k + koffset; /* EOB = index of last newly-nonzero coef */ \ + } \ +} + +METHODDEF(int) +encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *absvalues, size_t *bits) +{ + register int k, temp, temp2; + int EOB = 0; + size_t zerobits = 0U, signbits = 0U; + int Sl0 = Sl; + +#if SIZEOF_SIZE_T == 4 + if (Sl0 > 32) + Sl0 = 32; +#endif + + COMPUTE_ABSVALUES_AC_REFINE(Sl0, 0); + + bits[0] = zerobits; +#if SIZEOF_SIZE_T == 8 + bits[1] = signbits; +#else + bits[2] = signbits; + + zerobits = 0U; + signbits = 0U; + + if (Sl > 32) { + Sl -= 32; + jpeg_natural_order_start += 32; + absvalues += 32; + + COMPUTE_ABSVALUES_AC_REFINE(Sl, 32); + } + + bits[1] = zerobits; + bits[3] = signbits; +#endif + + return EOB; +} + + +/* * MCU encoding for AC successive approximation refinement scan. */ +#define ENCODE_COEFS_AC_REFINE(label) { \ + while (zerobits) { \ + int idx = count_zeroes(&zerobits); \ + r += idx; \ + cabsvalue += idx; \ + signbits >>= idx; \ +label \ + /* Emit any required ZRLs, but not if they can be folded into EOB */ \ + while (r > 15 && (cabsvalue <= EOBPTR)) { \ + /* emit any pending EOBRUN and the BE correction bits */ \ + emit_eobrun(entropy); \ + /* Emit ZRL */ \ + emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \ + r -= 16; \ + /* Emit buffered correction bits that must be associated with ZRL */ \ + emit_buffered_bits(entropy, BR_buffer, BR); \ + BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \ + BR = 0; \ + } \ + \ + temp = *cabsvalue++; \ + \ + /* If the coef was previously nonzero, it only needs a correction bit. \ + * NOTE: a straight translation of the spec's figure G.7 would suggest \ + * that we also need to test r > 15. But if r > 15, we can only get here \ + * if k > EOB, which implies that this coefficient is not 1. \ + */ \ + if (temp > 1) { \ + /* The correction bit is the next bit of the absolute value. */ \ + BR_buffer[BR++] = (char)(temp & 1); \ + signbits >>= 1; \ + zerobits >>= 1; \ + continue; \ + } \ + \ + /* Emit any pending EOBRUN and the BE correction bits */ \ + emit_eobrun(entropy); \ + \ + /* Count/emit Huffman symbol for run length / number of bits */ \ + emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); \ + \ + /* Emit output bit for newly-nonzero coef */ \ + temp = signbits & 1; /* ((*block)[jpeg_natural_order_start[k]] < 0) ? 0 : 1 */ \ + emit_bits(entropy, (unsigned int)temp, 1); \ + \ + /* Emit buffered correction bits that must be associated with this code */ \ + emit_buffered_bits(entropy, BR_buffer, BR); \ + BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \ + BR = 0; \ + r = 0; /* reset zero run length */ \ + signbits >>= 1; \ + zerobits >>= 1; \ + } \ +} + METHODDEF(boolean) -encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) { - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - register int temp; - register int r, k; - int EOB; + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; + register int temp, r; char *BR_buffer; unsigned int BR; - int Se = cinfo->Se; + int Sl = cinfo->Se - cinfo->Ss + 1; int Al = cinfo->Al; - JBLOCKROW block; - int absvalues[DCTSIZE2]; + JCOEF absvalues_unaligned[DCTSIZE2 + 15]; + JCOEF *absvalues; + const JCOEF *cabsvalue, *EOBPTR; + size_t zerobits, signbits; + size_t bits[16 / SIZEOF_SIZE_T]; entropy->next_output_byte = cinfo->dest->next_output_byte; entropy->free_in_buffer = cinfo->dest->free_in_buffer; @@ -637,26 +936,17 @@ encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) if (entropy->restarts_to_go == 0) emit_restart(entropy, entropy->next_restart_num); - /* Encode the MCU data block */ - block = MCU_data[0]; +#ifdef WITH_SIMD + cabsvalue = absvalues = (JCOEF *)PAD((size_t)absvalues_unaligned, 16); +#else + /* Not using SIMD, so alignment is not needed */ + cabsvalue = absvalues = absvalues_unaligned; +#endif - /* It is convenient to make a pre-pass to determine the transformed - * coefficients' absolute values and the EOB position. - */ - EOB = 0; - for (k = cinfo->Ss; k <= Se; k++) { - temp = (*block)[jpeg_natural_order[k]]; - /* We must apply the point transform by Al. For AC coefficients this - * is an integer division with rounding towards 0. To do this portably - * in C, we shift after obtaining the absolute value. - */ - if (temp < 0) - temp = -temp; /* temp is abs value of input */ - temp >>= Al; /* apply the point transform */ - absvalues[k] = temp; /* save abs value for main pass */ - if (temp == 1) - EOB = k; /* EOB = index of last newly-nonzero coef */ - } + /* Prepare data */ + EOBPTR = absvalues + + entropy->AC_refine_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss, + Sl, Al, absvalues, bits); /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */ @@ -664,52 +954,32 @@ encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) BR = 0; /* BR = count of buffered bits added now */ BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */ - for (k = cinfo->Ss; k <= Se; k++) { - if ((temp = absvalues[k]) == 0) { - r++; - continue; - } - - /* Emit any required ZRLs, but not if they can be folded into EOB */ - while (r > 15 && k <= EOB) { - /* emit any pending EOBRUN and the BE correction bits */ - emit_eobrun(entropy); - /* Emit ZRL */ - emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); - r -= 16; - /* Emit buffered correction bits that must be associated with ZRL */ - emit_buffered_bits(entropy, BR_buffer, BR); - BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ - BR = 0; - } - - /* If the coef was previously nonzero, it only needs a correction bit. - * NOTE: a straight translation of the spec's figure G.7 would suggest - * that we also need to test r > 15. But if r > 15, we can only get here - * if k > EOB, which implies that this coefficient is not 1. - */ - if (temp > 1) { - /* The correction bit is the next bit of the absolute value. */ - BR_buffer[BR++] = (char) (temp & 1); - continue; - } - - /* Emit any pending EOBRUN and the BE correction bits */ - emit_eobrun(entropy); - - /* Count/emit Huffman symbol for run length / number of bits */ - emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); + zerobits = bits[0]; +#if SIZEOF_SIZE_T == 8 + signbits = bits[1]; +#else + signbits = bits[2]; +#endif + ENCODE_COEFS_AC_REFINE((void)0;); + +#if SIZEOF_SIZE_T == 4 + zerobits = bits[1]; + signbits = bits[3]; + + if (zerobits) { + int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue); + int idx = count_zeroes(&zerobits); + signbits >>= idx; + idx += diff; + r += idx; + cabsvalue += idx; + goto first_iter_ac_refine; + } - /* Emit output bit for newly-nonzero coef */ - temp = ((*block)[jpeg_natural_order[k]] < 0) ? 0 : 1; - emit_bits(entropy, (unsigned int) temp, 1); + ENCODE_COEFS_AC_REFINE(first_iter_ac_refine:); +#endif - /* Emit buffered correction bits that must be associated with this code */ - emit_buffered_bits(entropy, BR_buffer, BR); - BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ - BR = 0; - r = 0; /* reset zero run length */ - } + r |= (int)((absvalues + Sl) - cabsvalue); if (r > 0 || BR > 0) { /* If there are trailing zeroes, */ entropy->EOBRUN++; /* count an EOB */ @@ -718,7 +988,8 @@ encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) * 1. overflow of the EOB counter; * 2. overflow of the correction bit buffer during the next MCU. */ - if (entropy->EOBRUN == 0x7FFF || entropy->BE > (MAX_CORR_BITS-DCTSIZE2+1)) + if (entropy->EOBRUN == 0x7FFF || + entropy->BE > (MAX_CORR_BITS - DCTSIZE2 + 1)) emit_eobrun(entropy); } @@ -744,9 +1015,9 @@ encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) */ METHODDEF(void) -finish_pass_phuff (j_compress_ptr cinfo) +finish_pass_phuff(j_compress_ptr cinfo) { - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; entropy->next_output_byte = cinfo->dest->next_output_byte; entropy->free_in_buffer = cinfo->dest->free_in_buffer; @@ -765,9 +1036,9 @@ finish_pass_phuff (j_compress_ptr cinfo) */ METHODDEF(void) -finish_pass_gather_phuff (j_compress_ptr cinfo) +finish_pass_gather_phuff(j_compress_ptr cinfo) { - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; boolean is_DC_band; int ci, tbl; jpeg_component_info *compptr; @@ -793,13 +1064,13 @@ finish_pass_gather_phuff (j_compress_ptr cinfo) } else { tbl = compptr->ac_tbl_no; } - if (! did[tbl]) { + if (!did[tbl]) { if (is_DC_band) - htblptr = & cinfo->dc_huff_tbl_ptrs[tbl]; + htblptr = &cinfo->dc_huff_tbl_ptrs[tbl]; else - htblptr = & cinfo->ac_huff_tbl_ptrs[tbl]; + htblptr = &cinfo->ac_huff_tbl_ptrs[tbl]; if (*htblptr == NULL) - *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); + *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo); jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]); did[tbl] = TRUE; } @@ -812,15 +1083,15 @@ finish_pass_gather_phuff (j_compress_ptr cinfo) */ GLOBAL(void) -jinit_phuff_encoder (j_compress_ptr cinfo) +jinit_phuff_encoder(j_compress_ptr cinfo) { phuff_entropy_ptr entropy; int i; entropy = (phuff_entropy_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(phuff_entropy_encoder)); - cinfo->entropy = (struct jpeg_entropy_encoder *) entropy; + cinfo->entropy = (struct jpeg_entropy_encoder *)entropy; entropy->pub.start_pass = start_pass_phuff; /* Mark tables unallocated */ diff --git a/jcprepct.c b/jcprepct.c index e72ebd8..d59713a 100644 --- a/jcprepct.c +++ b/jcprepct.c @@ -78,9 +78,9 @@ typedef my_prep_controller *my_prep_ptr; */ METHODDEF(void) -start_pass_prep (j_compress_ptr cinfo, J_BUF_MODE pass_mode) +start_pass_prep(j_compress_ptr cinfo, J_BUF_MODE pass_mode) { - my_prep_ptr prep = (my_prep_ptr) cinfo->prep; + my_prep_ptr prep = (my_prep_ptr)cinfo->prep; if (pass_mode != JBUF_PASS_THRU) ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); @@ -106,14 +106,14 @@ start_pass_prep (j_compress_ptr cinfo, J_BUF_MODE pass_mode) */ LOCAL(void) -expand_bottom_edge (JSAMPARRAY image_data, JDIMENSION num_cols, - int input_rows, int output_rows) +expand_bottom_edge(JSAMPARRAY image_data, JDIMENSION num_cols, int input_rows, + int output_rows) { register int row; for (row = input_rows; row < output_rows; row++) { - jcopy_sample_rows(image_data, input_rows-1, image_data, row, - 1, num_cols); + jcopy_sample_rows(image_data, input_rows - 1, image_data, row, 1, + num_cols); } } @@ -128,13 +128,12 @@ expand_bottom_edge (JSAMPARRAY image_data, JDIMENSION num_cols, */ METHODDEF(void) -pre_process_data (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail, - JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr, - JDIMENSION out_row_groups_avail) +pre_process_data(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail, + JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr, + JDIMENSION out_row_groups_avail) { - my_prep_ptr prep = (my_prep_ptr) cinfo->prep; + my_prep_ptr prep = (my_prep_ptr)cinfo->prep; int numrows, ci; JDIMENSION inrows; jpeg_component_info *compptr; @@ -144,10 +143,10 @@ pre_process_data (j_compress_ptr cinfo, /* Do color conversion to fill the conversion buffer. */ inrows = in_rows_avail - *in_row_ctr; numrows = cinfo->max_v_samp_factor - prep->next_buf_row; - numrows = (int) MIN((JDIMENSION) numrows, inrows); + numrows = (int)MIN((JDIMENSION)numrows, inrows); (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr, prep->color_buf, - (JDIMENSION) prep->next_buf_row, + (JDIMENSION)prep->next_buf_row, numrows); *in_row_ctr += numrows; prep->next_buf_row += numrows; @@ -164,7 +163,7 @@ pre_process_data (j_compress_ptr cinfo, /* If we've filled the conversion buffer, empty it. */ if (prep->next_buf_row == cinfo->max_v_samp_factor) { (*cinfo->downsample->downsample) (cinfo, - prep->color_buf, (JDIMENSION) 0, + prep->color_buf, (JDIMENSION)0, output_buf, *out_row_group_ctr); prep->next_buf_row = 0; (*out_row_group_ctr)++; @@ -172,14 +171,12 @@ pre_process_data (j_compress_ptr cinfo, /* If at bottom of image, pad the output to a full iMCU height. * Note we assume the caller is providing a one-iMCU-height output buffer! */ - if (prep->rows_to_go == 0 && - *out_row_group_ctr < out_row_groups_avail) { + if (prep->rows_to_go == 0 && *out_row_group_ctr < out_row_groups_avail) { for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { - expand_bottom_edge(output_buf[ci], - compptr->width_in_blocks * DCTSIZE, - (int) (*out_row_group_ctr * compptr->v_samp_factor), - (int) (out_row_groups_avail * compptr->v_samp_factor)); + expand_bottom_edge(output_buf[ci], compptr->width_in_blocks * DCTSIZE, + (int)(*out_row_group_ctr * compptr->v_samp_factor), + (int)(out_row_groups_avail * compptr->v_samp_factor)); } *out_row_group_ctr = out_row_groups_avail; break; /* can exit outer loop without test */ @@ -195,13 +192,12 @@ pre_process_data (j_compress_ptr cinfo, */ METHODDEF(void) -pre_process_context (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail, - JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr, - JDIMENSION out_row_groups_avail) +pre_process_context(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail, + JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr, + JDIMENSION out_row_groups_avail) { - my_prep_ptr prep = (my_prep_ptr) cinfo->prep; + my_prep_ptr prep = (my_prep_ptr)cinfo->prep; int numrows, ci; int buf_height = cinfo->max_v_samp_factor * 3; JDIMENSION inrows; @@ -211,19 +207,18 @@ pre_process_context (j_compress_ptr cinfo, /* Do color conversion to fill the conversion buffer. */ inrows = in_rows_avail - *in_row_ctr; numrows = prep->next_buf_stop - prep->next_buf_row; - numrows = (int) MIN((JDIMENSION) numrows, inrows); + numrows = (int)MIN((JDIMENSION)numrows, inrows); (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr, prep->color_buf, - (JDIMENSION) prep->next_buf_row, + (JDIMENSION)prep->next_buf_row, numrows); /* Pad at top of image, if first time through */ if (prep->rows_to_go == cinfo->image_height) { for (ci = 0; ci < cinfo->num_components; ci++) { int row; for (row = 1; row <= cinfo->max_v_samp_factor; row++) { - jcopy_sample_rows(prep->color_buf[ci], 0, - prep->color_buf[ci], -row, - 1, cinfo->image_width); + jcopy_sample_rows(prep->color_buf[ci], 0, prep->color_buf[ci], + -row, 1, cinfo->image_width); } } } @@ -245,9 +240,8 @@ pre_process_context (j_compress_ptr cinfo, } /* If we've gotten enough data, downsample a row group. */ if (prep->next_buf_row == prep->next_buf_stop) { - (*cinfo->downsample->downsample) (cinfo, - prep->color_buf, - (JDIMENSION) prep->this_row_group, + (*cinfo->downsample->downsample) (cinfo, prep->color_buf, + (JDIMENSION)prep->this_row_group, output_buf, *out_row_group_ctr); (*out_row_group_ctr)++; /* Advance pointers with wraparound as necessary. */ @@ -267,9 +261,9 @@ pre_process_context (j_compress_ptr cinfo, */ LOCAL(void) -create_context_buffer (j_compress_ptr cinfo) +create_context_buffer(j_compress_ptr cinfo) { - my_prep_ptr prep = (my_prep_ptr) cinfo->prep; + my_prep_ptr prep = (my_prep_ptr)cinfo->prep; int rgroup_height = cinfo->max_v_samp_factor; int ci, i; jpeg_component_info *compptr; @@ -279,7 +273,7 @@ create_context_buffer (j_compress_ptr cinfo) * we need five row groups' worth of pointers for each component. */ fake_buffer = (JSAMPARRAY) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, (cinfo->num_components * 5 * rgroup_height) * sizeof(JSAMPROW)); @@ -290,10 +284,10 @@ create_context_buffer (j_compress_ptr cinfo) * horizontally within the buffer, if it so chooses. */ true_buffer = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE * - cinfo->max_h_samp_factor) / compptr->h_samp_factor), - (JDIMENSION) (3 * rgroup_height)); + ((j_common_ptr)cinfo, JPOOL_IMAGE, + (JDIMENSION)(((long)compptr->width_in_blocks * DCTSIZE * + cinfo->max_h_samp_factor) / compptr->h_samp_factor), + (JDIMENSION)(3 * rgroup_height)); /* Copy true buffer row pointers into the middle of the fake row array */ MEMCOPY(fake_buffer + rgroup_height, true_buffer, 3 * rgroup_height * sizeof(JSAMPROW)); @@ -315,7 +309,7 @@ create_context_buffer (j_compress_ptr cinfo) */ GLOBAL(void) -jinit_c_prep_controller (j_compress_ptr cinfo, boolean need_full_buffer) +jinit_c_prep_controller(j_compress_ptr cinfo, boolean need_full_buffer) { my_prep_ptr prep; int ci; @@ -325,9 +319,9 @@ jinit_c_prep_controller (j_compress_ptr cinfo, boolean need_full_buffer) ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); prep = (my_prep_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_prep_controller)); - cinfo->prep = (struct jpeg_c_prep_controller *) prep; + cinfo->prep = (struct jpeg_c_prep_controller *)prep; prep->pub.start_pass = start_pass_prep; /* Allocate the color conversion buffer. @@ -348,10 +342,10 @@ jinit_c_prep_controller (j_compress_ptr cinfo, boolean need_full_buffer) for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { prep->color_buf[ci] = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE * - cinfo->max_h_samp_factor) / compptr->h_samp_factor), - (JDIMENSION) cinfo->max_v_samp_factor); + ((j_common_ptr)cinfo, JPOOL_IMAGE, + (JDIMENSION)(((long)compptr->width_in_blocks * DCTSIZE * + cinfo->max_h_samp_factor) / compptr->h_samp_factor), + (JDIMENSION)cinfo->max_v_samp_factor); } } } diff --git a/jcsample.c b/jcsample.c index c4b4991..bd27b84 100644 --- a/jcsample.c +++ b/jcsample.c @@ -79,7 +79,7 @@ typedef my_downsampler *my_downsample_ptr; */ METHODDEF(void) -start_pass_downsample (j_compress_ptr cinfo) +start_pass_downsample(j_compress_ptr cinfo) { /* no work for now */ } @@ -91,14 +91,14 @@ start_pass_downsample (j_compress_ptr cinfo) */ LOCAL(void) -expand_right_edge (JSAMPARRAY image_data, int num_rows, - JDIMENSION input_cols, JDIMENSION output_cols) +expand_right_edge(JSAMPARRAY image_data, int num_rows, JDIMENSION input_cols, + JDIMENSION output_cols) { register JSAMPROW ptr; register JSAMPLE pixval; register int count; int row; - int numcols = (int) (output_cols - input_cols); + int numcols = (int)(output_cols - input_cols); if (numcols > 0) { for (row = 0; row < num_rows; row++) { @@ -118,11 +118,11 @@ expand_right_edge (JSAMPARRAY image_data, int num_rows, */ METHODDEF(void) -sep_downsample (j_compress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_index, - JSAMPIMAGE output_buf, JDIMENSION out_row_group_index) +sep_downsample(j_compress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_index, JSAMPIMAGE output_buf, + JDIMENSION out_row_group_index) { - my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample; + my_downsample_ptr downsample = (my_downsample_ptr)cinfo->downsample; int ci; jpeg_component_info *compptr; JSAMPARRAY in_ptr, out_ptr; @@ -144,8 +144,8 @@ sep_downsample (j_compress_ptr cinfo, */ METHODDEF(void) -int_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +int_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v; JDIMENSION outcol, outcol_h; /* outcol_h == outcol*h_expand */ @@ -156,14 +156,14 @@ int_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, h_expand = cinfo->max_h_samp_factor / compptr->h_samp_factor; v_expand = cinfo->max_v_samp_factor / compptr->v_samp_factor; numpix = h_expand * v_expand; - numpix2 = numpix/2; + numpix2 = numpix / 2; /* Expand input data enough to let all the output samples be generated * by the standard loop. Special-casing padded output would be more * efficient. */ - expand_right_edge(input_data, cinfo->max_v_samp_factor, - cinfo->image_width, output_cols * h_expand); + expand_right_edge(input_data, cinfo->max_v_samp_factor, cinfo->image_width, + output_cols * h_expand); inrow = 0; for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { @@ -172,12 +172,12 @@ int_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, outcol++, outcol_h += h_expand) { outvalue = 0; for (v = 0; v < v_expand; v++) { - inptr = input_data[inrow+v] + outcol_h; + inptr = input_data[inrow + v] + outcol_h; for (h = 0; h < h_expand; h++) { - outvalue += (JLONG) GETJSAMPLE(*inptr++); + outvalue += (JLONG)GETJSAMPLE(*inptr++); } } - *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix); + *outptr++ = (JSAMPLE)((outvalue + numpix2) / numpix); } inrow += v_expand; } @@ -191,15 +191,15 @@ int_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, */ METHODDEF(void) -fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +fullsize_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { /* Copy the data */ - jcopy_sample_rows(input_data, 0, output_data, 0, - cinfo->max_v_samp_factor, cinfo->image_width); + jcopy_sample_rows(input_data, 0, output_data, 0, cinfo->max_v_samp_factor, + cinfo->image_width); /* Edge-expand */ - expand_right_edge(output_data, cinfo->max_v_samp_factor, - cinfo->image_width, compptr->width_in_blocks * DCTSIZE); + expand_right_edge(output_data, cinfo->max_v_samp_factor, cinfo->image_width, + compptr->width_in_blocks * DCTSIZE); } @@ -216,8 +216,8 @@ fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, */ METHODDEF(void) -h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { int outrow; JDIMENSION outcol; @@ -229,16 +229,16 @@ h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, * by the standard loop. Special-casing padded output would be more * efficient. */ - expand_right_edge(input_data, cinfo->max_v_samp_factor, - cinfo->image_width, output_cols * 2); + expand_right_edge(input_data, cinfo->max_v_samp_factor, cinfo->image_width, + output_cols * 2); for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { outptr = output_data[outrow]; inptr = input_data[outrow]; bias = 0; /* bias = 0,1,0,1,... for successive samples */ for (outcol = 0; outcol < output_cols; outcol++) { - *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1]) - + bias) >> 1); + *outptr++ = + (JSAMPLE)((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1]) + bias) >> 1); bias ^= 1; /* 0=>1, 1=>0 */ inptr += 2; } @@ -253,8 +253,8 @@ h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, */ METHODDEF(void) -h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { int inrow, outrow; JDIMENSION outcol; @@ -266,21 +266,21 @@ h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, * by the standard loop. Special-casing padded output would be more * efficient. */ - expand_right_edge(input_data, cinfo->max_v_samp_factor, - cinfo->image_width, output_cols * 2); + expand_right_edge(input_data, cinfo->max_v_samp_factor, cinfo->image_width, + output_cols * 2); inrow = 0; for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { outptr = output_data[outrow]; inptr0 = input_data[inrow]; - inptr1 = input_data[inrow+1]; + inptr1 = input_data[inrow + 1]; bias = 1; /* bias = 1,2,1,2,... for successive samples */ for (outcol = 0; outcol < output_cols; outcol++) { - *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]) - + bias) >> 2); + *outptr++ = + (JSAMPLE)((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + + GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]) + bias) >> 2); bias ^= 3; /* 1=>2, 2=>1 */ - inptr0 += 2; inptr1 += 2; + inptr0 += 2; inptr1 += 2; } inrow += 2; } @@ -296,8 +296,8 @@ h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, */ METHODDEF(void) -h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { int inrow, outrow; JDIMENSION colctr; @@ -332,9 +332,9 @@ h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { outptr = output_data[outrow]; inptr0 = input_data[inrow]; - inptr1 = input_data[inrow+1]; - above_ptr = input_data[inrow-1]; - below_ptr = input_data[inrow+2]; + inptr1 = input_data[inrow + 1]; + above_ptr = input_data[inrow - 1]; + below_ptr = input_data[inrow + 2]; /* Special case for first column: pretend column -1 is same as column 0 */ membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + @@ -347,8 +347,8 @@ h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) + GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]); membersum = membersum * memberscale + neighsum * neighscale; - *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16); - inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2; + *outptr++ = (JSAMPLE)((membersum + 32768) >> 16); + inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2; for (colctr = output_cols - 2; colctr > 0; colctr--) { /* sum of pixels directly mapped to this output element */ @@ -367,8 +367,8 @@ h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, /* form final output scaled up by 2^16 */ membersum = membersum * memberscale + neighsum * neighscale; /* round, descale and output it */ - *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16); - inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2; + *outptr++ = (JSAMPLE)((membersum + 32768) >> 16); + inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2; } /* Special case for last column */ @@ -382,7 +382,7 @@ h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) + GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]); membersum = membersum * memberscale + neighsum * neighscale; - *outptr = (JSAMPLE) ((membersum + 32768) >> 16); + *outptr = (JSAMPLE)((membersum + 32768) >> 16); inrow += 2; } @@ -396,8 +396,8 @@ h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, */ METHODDEF(void) -fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { int outrow; JDIMENSION colctr; @@ -425,8 +425,8 @@ fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { outptr = output_data[outrow]; inptr = input_data[outrow]; - above_ptr = input_data[outrow-1]; - below_ptr = input_data[outrow+1]; + above_ptr = input_data[outrow - 1]; + below_ptr = input_data[outrow + 1]; /* Special case for first column */ colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) + @@ -436,25 +436,25 @@ fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, GETJSAMPLE(*inptr); neighsum = colsum + (colsum - membersum) + nextcolsum; membersum = membersum * memberscale + neighsum * neighscale; - *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16); - lastcolsum = colsum; colsum = nextcolsum; + *outptr++ = (JSAMPLE)((membersum + 32768) >> 16); + lastcolsum = colsum; colsum = nextcolsum; for (colctr = output_cols - 2; colctr > 0; colctr--) { membersum = GETJSAMPLE(*inptr++); - above_ptr++; below_ptr++; + above_ptr++; below_ptr++; nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) + GETJSAMPLE(*inptr); neighsum = lastcolsum + (colsum - membersum) + nextcolsum; membersum = membersum * memberscale + neighsum * neighscale; - *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16); - lastcolsum = colsum; colsum = nextcolsum; + *outptr++ = (JSAMPLE)((membersum + 32768) >> 16); + lastcolsum = colsum; colsum = nextcolsum; } /* Special case for last column */ membersum = GETJSAMPLE(*inptr); neighsum = lastcolsum + (colsum - membersum) + colsum; membersum = membersum * memberscale + neighsum * neighscale; - *outptr = (JSAMPLE) ((membersum + 32768) >> 16); + *outptr = (JSAMPLE)((membersum + 32768) >> 16); } } @@ -468,7 +468,7 @@ fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jinit_downsampler (j_compress_ptr cinfo) +jinit_downsampler(j_compress_ptr cinfo) { my_downsample_ptr downsample; int ci; @@ -476,9 +476,9 @@ jinit_downsampler (j_compress_ptr cinfo) boolean smoothok = TRUE; downsample = (my_downsample_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_downsampler)); - cinfo->downsample = (struct jpeg_downsampler *) downsample; + cinfo->downsample = (struct jpeg_downsampler *)downsample; downsample->pub.start_pass = start_pass_downsample; downsample->pub.downsample = sep_downsample; downsample->pub.need_context_rows = FALSE; diff --git a/jcstest.c b/jcstest.c index 11883b5..8b1fe38 100644 --- a/jcstest.c +++ b/jcstest.c @@ -35,10 +35,10 @@ #include #ifndef JCS_EXTENSIONS -#define JCS_EXT_RGB 6 +#define JCS_EXT_RGB 6 #endif #if !defined(JCS_EXTENSIONS) || !defined(JCS_ALPHA_EXTENSIONS) -#define JCS_EXT_RGBA 12 +#define JCS_EXT_RGBA 12 #endif static char lasterror[JMSG_LENGTH_MAX] = "No error"; @@ -51,13 +51,13 @@ typedef struct _error_mgr { static void my_error_exit(j_common_ptr cinfo) { error_mgr *myerr = (error_mgr *)cinfo->err; - (*cinfo->err->output_message)(cinfo); + (*cinfo->err->output_message) (cinfo); longjmp(myerr->jb, 1); } static void my_output_message(j_common_ptr cinfo) { - (*cinfo->err->format_message)(cinfo, lasterror); + (*cinfo->err->format_message) (cinfo, lasterror); } int main(void) @@ -67,11 +67,11 @@ int main(void) error_mgr jerr; printf("libjpeg-turbo colorspace extensions:\n"); - #if JCS_EXTENSIONS +#if JCS_EXTENSIONS printf(" Present at compile time\n"); - #else +#else printf(" Not present at compile time\n"); - #endif +#endif cinfo.err = jpeg_std_error(&jerr.pub); jerr.pub.error_exit = my_error_exit; @@ -90,7 +90,7 @@ int main(void) jpeg_default_colorspace(&cinfo); jcs_valid = 1; - done: +done: if (jcs_valid) printf(" Working properly\n"); else @@ -98,11 +98,11 @@ int main(void) lasterror); printf("libjpeg-turbo alpha colorspace extensions:\n"); - #if JCS_ALPHA_EXTENSIONS +#if JCS_ALPHA_EXTENSIONS printf(" Present at compile time\n"); - #else +#else printf(" Not present at compile time\n"); - #endif +#endif if (setjmp(jerr.jb)) { /* this will execute if libjpeg has an error */ @@ -114,7 +114,7 @@ int main(void) jpeg_default_colorspace(&cinfo); jcs_alpha_valid = 1; - done2: +done2: if (jcs_alpha_valid) printf(" Working properly\n"); else diff --git a/jctrans.c b/jctrans.c index 6f16b05..ce70a30 100644 --- a/jctrans.c +++ b/jctrans.c @@ -20,10 +20,10 @@ /* Forward declarations */ -LOCAL(void) transencode_master_selection - (j_compress_ptr cinfo, jvirt_barray_ptr *coef_arrays); -LOCAL(void) transencode_coef_controller - (j_compress_ptr cinfo, jvirt_barray_ptr *coef_arrays); +LOCAL(void) transencode_master_selection(j_compress_ptr cinfo, + jvirt_barray_ptr *coef_arrays); +LOCAL(void) transencode_coef_controller(j_compress_ptr cinfo, + jvirt_barray_ptr *coef_arrays); /* @@ -39,14 +39,14 @@ LOCAL(void) transencode_coef_controller */ GLOBAL(void) -jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr *coef_arrays) +jpeg_write_coefficients(j_compress_ptr cinfo, jvirt_barray_ptr *coef_arrays) { if (cinfo->global_state != CSTATE_START) ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); /* Mark all tables to be written */ jpeg_suppress_tables(cinfo, FALSE); /* (Re)initialize error mgr and destination modules */ - (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo); + (*cinfo->err->reset_error_mgr) ((j_common_ptr)cinfo); (*cinfo->dest->init_destination) (cinfo); /* Perform master selection of active modules */ transencode_master_selection(cinfo, coef_arrays); @@ -64,8 +64,7 @@ jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr *coef_arrays) */ GLOBAL(void) -jpeg_copy_critical_parameters (j_decompress_ptr srcinfo, - j_compress_ptr dstinfo) +jpeg_copy_critical_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo) { JQUANT_TBL **qtblptr; jpeg_component_info *incomp, *outcomp; @@ -97,11 +96,10 @@ jpeg_copy_critical_parameters (j_decompress_ptr srcinfo, /* Copy the source's quantization tables. */ for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) { if (srcinfo->quant_tbl_ptrs[tblno] != NULL) { - qtblptr = & dstinfo->quant_tbl_ptrs[tblno]; + qtblptr = &dstinfo->quant_tbl_ptrs[tblno]; if (*qtblptr == NULL) - *qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo); - MEMCOPY((*qtblptr)->quantval, - srcinfo->quant_tbl_ptrs[tblno]->quantval, + *qtblptr = jpeg_alloc_quant_table((j_common_ptr)dstinfo); + MEMCOPY((*qtblptr)->quantval, srcinfo->quant_tbl_ptrs[tblno]->quantval, sizeof((*qtblptr)->quantval)); (*qtblptr)->sent_table = FALSE; } @@ -165,8 +163,8 @@ jpeg_copy_critical_parameters (j_decompress_ptr srcinfo, */ LOCAL(void) -transencode_master_selection (j_compress_ptr cinfo, - jvirt_barray_ptr *coef_arrays) +transencode_master_selection(j_compress_ptr cinfo, + jvirt_barray_ptr *coef_arrays) { /* Although we don't actually use input_components for transcoding, * jcmaster.c's initial_setup will complain if input_components is 0. @@ -199,7 +197,7 @@ transencode_master_selection (j_compress_ptr cinfo, jinit_marker_writer(cinfo); /* We can now tell the memory manager to allocate virtual arrays. */ - (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo); + (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo); /* Write the datastream header (SOI, JFIF) immediately. * Frame and scan headers are postponed till later. @@ -238,10 +236,10 @@ typedef my_coef_controller *my_coef_ptr; LOCAL(void) -start_iMCU_row (j_compress_ptr cinfo) +start_iMCU_row(j_compress_ptr cinfo) /* Reset within-iMCU-row counters for a new row */ { - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_coef_ptr coef = (my_coef_ptr)cinfo->coef; /* In an interleaved scan, an MCU row is the same as an iMCU row. * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows. @@ -250,7 +248,7 @@ start_iMCU_row (j_compress_ptr cinfo) if (cinfo->comps_in_scan > 1) { coef->MCU_rows_per_iMCU_row = 1; } else { - if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1)) + if (coef->iMCU_row_num < (cinfo->total_iMCU_rows - 1)) coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor; else coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height; @@ -266,9 +264,9 @@ start_iMCU_row (j_compress_ptr cinfo) */ METHODDEF(void) -start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode) +start_pass_coef(j_compress_ptr cinfo, J_BUF_MODE pass_mode) { - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_coef_ptr coef = (my_coef_ptr)cinfo->coef; if (pass_mode != JBUF_CRANK_DEST) ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); @@ -289,9 +287,9 @@ start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode) */ METHODDEF(boolean) -compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf) +compress_output(j_compress_ptr cinfo, JSAMPIMAGE input_buf) { - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_coef_ptr coef = (my_coef_ptr)cinfo->coef; JDIMENSION MCU_col_num; /* index of current MCU within row */ JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1; JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1; @@ -306,9 +304,9 @@ compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf) for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; buffer[ci] = (*cinfo->mem->access_virt_barray) - ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index], + ((j_common_ptr)cinfo, coef->whole_image[compptr->component_index], coef->iMCU_row_num * compptr->v_samp_factor, - (JDIMENSION) compptr->v_samp_factor, FALSE); + (JDIMENSION)compptr->v_samp_factor, FALSE); } /* Loop to process one whole iMCU row */ @@ -321,13 +319,13 @@ compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf) for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; start_col = MCU_col_num * compptr->MCU_width; - blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width - : compptr->last_col_width; + blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width : + compptr->last_col_width; for (yindex = 0; yindex < compptr->MCU_height; yindex++) { if (coef->iMCU_row_num < last_iMCU_row || - yindex+yoffset < compptr->last_row_height) { + yindex + yoffset < compptr->last_row_height) { /* Fill in pointers to real blocks in this row */ - buffer_ptr = buffer[ci][yindex+yoffset] + start_col; + buffer_ptr = buffer[ci][yindex + yoffset] + start_col; for (xindex = 0; xindex < blockcnt; xindex++) MCU_buffer[blkn++] = buffer_ptr++; } else { @@ -342,13 +340,13 @@ compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf) */ for (; xindex < compptr->MCU_width; xindex++) { MCU_buffer[blkn] = coef->dummy_buffer[blkn]; - MCU_buffer[blkn][0][0] = MCU_buffer[blkn-1][0][0]; + MCU_buffer[blkn][0][0] = MCU_buffer[blkn - 1][0][0]; blkn++; } } } /* Try to write the MCU. */ - if (! (*cinfo->entropy->encode_mcu) (cinfo, MCU_buffer)) { + if (!(*cinfo->entropy->encode_mcu) (cinfo, MCU_buffer)) { /* Suspension forced; update state counters and exit */ coef->MCU_vert_offset = yoffset; coef->mcu_ctr = MCU_col_num; @@ -374,17 +372,17 @@ compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf) */ LOCAL(void) -transencode_coef_controller (j_compress_ptr cinfo, - jvirt_barray_ptr *coef_arrays) +transencode_coef_controller(j_compress_ptr cinfo, + jvirt_barray_ptr *coef_arrays) { my_coef_ptr coef; JBLOCKROW buffer; int i; coef = (my_coef_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_coef_controller)); - cinfo->coef = (struct jpeg_c_coef_controller *) coef; + cinfo->coef = (struct jpeg_c_coef_controller *)coef; coef->pub.start_pass = start_pass_coef; coef->pub.compress_data = compress_output; @@ -393,9 +391,9 @@ transencode_coef_controller (j_compress_ptr cinfo, /* Allocate and pre-zero space for dummy DCT blocks. */ buffer = (JBLOCKROW) - (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE, C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK)); - jzero_far((void *) buffer, C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK)); + jzero_far((void *)buffer, C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK)); for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) { coef->dummy_buffer[i] = buffer + i; } diff --git a/jdapimin.c b/jdapimin.c index f80a146..21a41d2 100644 --- a/jdapimin.c +++ b/jdapimin.c @@ -31,7 +31,7 @@ */ GLOBAL(void) -jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, size_t structsize) +jpeg_CreateDecompress(j_decompress_ptr cinfo, int version, size_t structsize) { int i; @@ -41,7 +41,7 @@ jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, size_t structsize) ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version); if (structsize != sizeof(struct jpeg_decompress_struct)) ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, - (int) sizeof(struct jpeg_decompress_struct), (int) structsize); + (int)sizeof(struct jpeg_decompress_struct), (int)structsize); /* For debugging purposes, we zero the whole master structure. * But the application has already set the err pointer, and may have set @@ -50,8 +50,8 @@ jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, size_t structsize) * complain here. */ { - struct jpeg_error_mgr * err = cinfo->err; - void * client_data = cinfo->client_data; /* ignore Purify complaint here */ + struct jpeg_error_mgr *err = cinfo->err; + void *client_data = cinfo->client_data; /* ignore Purify complaint here */ MEMZERO(cinfo, sizeof(struct jpeg_decompress_struct)); cinfo->err = err; cinfo->client_data = client_data; @@ -59,7 +59,7 @@ jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, size_t structsize) cinfo->is_decompressor = TRUE; /* Initialize a memory manager instance for this object */ - jinit_memory_mgr((j_common_ptr) cinfo); + jinit_memory_mgr((j_common_ptr)cinfo); /* Zero out pointers to permanent structures. */ cinfo->progress = NULL; @@ -89,8 +89,8 @@ jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, size_t structsize) * here. */ cinfo->master = (struct jpeg_decomp_master *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, - sizeof(my_decomp_master)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT, + sizeof(my_decomp_master)); MEMZERO(cinfo->master, sizeof(my_decomp_master)); } @@ -100,9 +100,9 @@ jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, size_t structsize) */ GLOBAL(void) -jpeg_destroy_decompress (j_decompress_ptr cinfo) +jpeg_destroy_decompress(j_decompress_ptr cinfo) { - jpeg_destroy((j_common_ptr) cinfo); /* use common routine */ + jpeg_destroy((j_common_ptr)cinfo); /* use common routine */ } @@ -112,9 +112,9 @@ jpeg_destroy_decompress (j_decompress_ptr cinfo) */ GLOBAL(void) -jpeg_abort_decompress (j_decompress_ptr cinfo) +jpeg_abort_decompress(j_decompress_ptr cinfo) { - jpeg_abort((j_common_ptr) cinfo); /* use common routine */ + jpeg_abort((j_common_ptr)cinfo); /* use common routine */ } @@ -123,7 +123,7 @@ jpeg_abort_decompress (j_decompress_ptr cinfo) */ LOCAL(void) -default_decompress_parms (j_decompress_ptr cinfo) +default_decompress_parms(j_decompress_ptr cinfo) { /* Guess the input colorspace, and set output colorspace accordingly. */ /* (Wish JPEG committee had provided a real way to specify this...) */ @@ -250,7 +250,7 @@ default_decompress_parms (j_decompress_ptr cinfo) */ GLOBAL(int) -jpeg_read_header (j_decompress_ptr cinfo, boolean require_image) +jpeg_read_header(j_decompress_ptr cinfo, boolean require_image) { int retcode; @@ -271,7 +271,7 @@ jpeg_read_header (j_decompress_ptr cinfo, boolean require_image) * call jpeg_abort, but we can't change it now for compatibility reasons. * A side effect is to free any temporary memory (there shouldn't be any). */ - jpeg_abort((j_common_ptr) cinfo); /* sets state = DSTATE_START */ + jpeg_abort((j_common_ptr)cinfo); /* sets state = DSTATE_START */ retcode = JPEG_HEADER_TABLES_ONLY; break; case JPEG_SUSPENDED: @@ -296,7 +296,7 @@ jpeg_read_header (j_decompress_ptr cinfo, boolean require_image) */ GLOBAL(int) -jpeg_consume_input (j_decompress_ptr cinfo) +jpeg_consume_input(j_decompress_ptr cinfo) { int retcode = JPEG_SUSPENDED; @@ -343,7 +343,7 @@ jpeg_consume_input (j_decompress_ptr cinfo) */ GLOBAL(boolean) -jpeg_input_complete (j_decompress_ptr cinfo) +jpeg_input_complete(j_decompress_ptr cinfo) { /* Check for valid jpeg object */ if (cinfo->global_state < DSTATE_START || @@ -358,7 +358,7 @@ jpeg_input_complete (j_decompress_ptr cinfo) */ GLOBAL(boolean) -jpeg_has_multiple_scans (j_decompress_ptr cinfo) +jpeg_has_multiple_scans(j_decompress_ptr cinfo) { /* Only valid after jpeg_read_header completes */ if (cinfo->global_state < DSTATE_READY || @@ -378,10 +378,10 @@ jpeg_has_multiple_scans (j_decompress_ptr cinfo) */ GLOBAL(boolean) -jpeg_finish_decompress (j_decompress_ptr cinfo) +jpeg_finish_decompress(j_decompress_ptr cinfo) { if ((cinfo->global_state == DSTATE_SCANNING || - cinfo->global_state == DSTATE_RAW_OK) && ! cinfo->buffered_image) { + cinfo->global_state == DSTATE_RAW_OK) && !cinfo->buffered_image) { /* Terminate final pass of non-buffered mode */ if (cinfo->output_scanline < cinfo->output_height) ERREXIT(cinfo, JERR_TOO_LITTLE_DATA); @@ -395,13 +395,13 @@ jpeg_finish_decompress (j_decompress_ptr cinfo) ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); } /* Read until EOI */ - while (! cinfo->inputctl->eoi_reached) { + while (!cinfo->inputctl->eoi_reached) { if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED) return FALSE; /* Suspend, come back later */ } /* Do final cleanup */ (*cinfo->src->term_source) (cinfo); /* We can use jpeg_abort to release memory and reset global_state */ - jpeg_abort((j_common_ptr) cinfo); + jpeg_abort((j_common_ptr)cinfo); return TRUE; } diff --git a/jdapistd.c b/jdapistd.c index 105121d..2c808fa 100644 --- a/jdapistd.c +++ b/jdapistd.c @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2010, 2015-2017, D. R. Commander. + * Copyright (C) 2010, 2015-2018, D. R. Commander. * Copyright (C) 2015, Google, Inc. * For conditions of distribution and use, see the accompanying README.ijg * file. @@ -25,7 +25,7 @@ #include "jmemsys.h" /* Forward declarations */ -LOCAL(boolean) output_pass_setup (j_decompress_ptr cinfo); +LOCAL(boolean) output_pass_setup(j_decompress_ptr cinfo); /* @@ -40,7 +40,7 @@ LOCAL(boolean) output_pass_setup (j_decompress_ptr cinfo); */ GLOBAL(boolean) -jpeg_start_decompress (j_decompress_ptr cinfo) +jpeg_start_decompress(j_decompress_ptr cinfo) { if (cinfo->global_state == DSTATE_READY) { /* First call: initialize master control, select active modules */ @@ -60,7 +60,7 @@ jpeg_start_decompress (j_decompress_ptr cinfo) int retcode; /* Call progress monitor hook if present */ if (cinfo->progress != NULL) - (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); + (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo); /* Absorb some more input */ retcode = (*cinfo->inputctl->consume_input) (cinfo); if (retcode == JPEG_SUSPENDED) @@ -72,7 +72,7 @@ jpeg_start_decompress (j_decompress_ptr cinfo) (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) { if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) { /* jdmaster underestimated number of scans; ratchet up one scan */ - cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows; + cinfo->progress->pass_limit += (long)cinfo->total_iMCU_rows; } } } @@ -97,7 +97,7 @@ jpeg_start_decompress (j_decompress_ptr cinfo) */ LOCAL(boolean) -output_pass_setup (j_decompress_ptr cinfo) +output_pass_setup(j_decompress_ptr cinfo) { if (cinfo->global_state != DSTATE_PRESCAN) { /* First call: do pass setup */ @@ -113,14 +113,14 @@ output_pass_setup (j_decompress_ptr cinfo) JDIMENSION last_scanline; /* Call progress monitor hook if present */ if (cinfo->progress != NULL) { - cinfo->progress->pass_counter = (long) cinfo->output_scanline; - cinfo->progress->pass_limit = (long) cinfo->output_height; - (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); + cinfo->progress->pass_counter = (long)cinfo->output_scanline; + cinfo->progress->pass_limit = (long)cinfo->output_height; + (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo); } /* Process some data */ last_scanline = cinfo->output_scanline; - (*cinfo->main->process_data) (cinfo, (JSAMPARRAY) NULL, - &cinfo->output_scanline, (JDIMENSION) 0); + (*cinfo->main->process_data) (cinfo, (JSAMPARRAY)NULL, + &cinfo->output_scanline, (JDIMENSION)0); if (cinfo->output_scanline == last_scanline) return FALSE; /* No progress made, must suspend */ } @@ -150,8 +150,8 @@ output_pass_setup (j_decompress_ptr cinfo) */ GLOBAL(void) -jpeg_crop_scanline (j_decompress_ptr cinfo, JDIMENSION *xoffset, - JDIMENSION *width) +jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset, + JDIMENSION *width) { int ci, align, orig_downsampled_width; JDIMENSION input_xoffset; @@ -210,11 +210,10 @@ jpeg_crop_scanline (j_decompress_ptr cinfo, JDIMENSION *xoffset, /* Set the first and last iMCU columns that we must decompress. These values * will be used in single-scan decompressions. */ - cinfo->master->first_iMCU_col = - (JDIMENSION) (long) (*xoffset) / (long) align; + cinfo->master->first_iMCU_col = (JDIMENSION)(long)(*xoffset) / (long)align; cinfo->master->last_iMCU_col = - (JDIMENSION) jdiv_round_up((long) (*xoffset + cinfo->output_width), - (long) align) - 1; + (JDIMENSION)jdiv_round_up((long)(*xoffset + cinfo->output_width), + (long)align) - 1; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { @@ -224,9 +223,9 @@ jpeg_crop_scanline (j_decompress_ptr cinfo, JDIMENSION *xoffset, /* Set downsampled_width to the new output width. */ orig_downsampled_width = compptr->downsampled_width; compptr->downsampled_width = - (JDIMENSION) jdiv_round_up((long) (cinfo->output_width * - compptr->h_samp_factor), - (long) cinfo->max_h_samp_factor); + (JDIMENSION)jdiv_round_up((long)(cinfo->output_width * + compptr->h_samp_factor), + (long)cinfo->max_h_samp_factor); if (compptr->downsampled_width < 2 && orig_downsampled_width >= 2) reinit_upsampler = TRUE; @@ -234,11 +233,10 @@ jpeg_crop_scanline (j_decompress_ptr cinfo, JDIMENSION *xoffset, * values will be used in multi-scan decompressions. */ cinfo->master->first_MCU_col[ci] = - (JDIMENSION) (long) (*xoffset * hsf) / (long) align; + (JDIMENSION)(long)(*xoffset * hsf) / (long)align; cinfo->master->last_MCU_col[ci] = - (JDIMENSION) jdiv_round_up((long) ((*xoffset + cinfo->output_width) * - hsf), - (long) align) - 1; + (JDIMENSION)jdiv_round_up((long)((*xoffset + cinfo->output_width) * hsf), + (long)align) - 1; } if (reinit_upsampler) { @@ -263,8 +261,8 @@ jpeg_crop_scanline (j_decompress_ptr cinfo, JDIMENSION *xoffset, */ GLOBAL(JDIMENSION) -jpeg_read_scanlines (j_decompress_ptr cinfo, JSAMPARRAY scanlines, - JDIMENSION max_lines) +jpeg_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines, + JDIMENSION max_lines) { JDIMENSION row_ctr; @@ -277,9 +275,9 @@ jpeg_read_scanlines (j_decompress_ptr cinfo, JSAMPARRAY scanlines, /* Call progress monitor hook if present */ if (cinfo->progress != NULL) { - cinfo->progress->pass_counter = (long) cinfo->output_scanline; - cinfo->progress->pass_limit = (long) cinfo->output_height; - (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); + cinfo->progress->pass_counter = (long)cinfo->output_scanline; + cinfo->progress->pass_limit = (long)cinfo->output_height; + (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo); } /* Process some data */ @@ -292,16 +290,16 @@ jpeg_read_scanlines (j_decompress_ptr cinfo, JSAMPARRAY scanlines, /* Dummy color convert function used by jpeg_skip_scanlines() */ LOCAL(void) -noop_convert (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, - JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) +noop_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) { } /* Dummy quantize function used by jpeg_skip_scanlines() */ LOCAL(void) -noop_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf, - JSAMPARRAY output_buf, int num_rows) +noop_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPARRAY output_buf, int num_rows) { } @@ -315,17 +313,20 @@ noop_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf, */ LOCAL(void) -read_and_discard_scanlines (j_decompress_ptr cinfo, JDIMENSION num_lines) +read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) { JDIMENSION n; void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row, JSAMPARRAY output_buf, - int num_rows); + int num_rows) = NULL; void (*color_quantize) (j_decompress_ptr cinfo, JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows) = NULL; - color_convert = cinfo->cconvert->color_convert; - cinfo->cconvert->color_convert = noop_convert; + if (cinfo->cconvert && cinfo->cconvert->color_convert) { + color_convert = cinfo->cconvert->color_convert; + cinfo->cconvert->color_convert = noop_convert; + } + if (cinfo->cquantize && cinfo->cquantize->color_quantize) { color_quantize = cinfo->cquantize->color_quantize; cinfo->cquantize->color_quantize = noop_quantize; @@ -334,7 +335,9 @@ read_and_discard_scanlines (j_decompress_ptr cinfo, JDIMENSION num_lines) for (n = 0; n < num_lines; n++) jpeg_read_scanlines(cinfo, NULL, 1); - cinfo->cconvert->color_convert = color_convert; + if (color_convert) + cinfo->cconvert->color_convert = color_convert; + if (color_quantize) cinfo->cquantize->color_quantize = color_quantize; } @@ -346,10 +349,10 @@ read_and_discard_scanlines (j_decompress_ptr cinfo, JDIMENSION num_lines) */ LOCAL(void) -increment_simple_rowgroup_ctr (j_decompress_ptr cinfo, JDIMENSION rows) +increment_simple_rowgroup_ctr(j_decompress_ptr cinfo, JDIMENSION rows) { JDIMENSION rows_left; - my_main_ptr main_ptr = (my_main_ptr) cinfo->main; + my_main_ptr main_ptr = (my_main_ptr)cinfo->main; /* Increment the counter to the next row group after the skipped rows. */ main_ptr->rowgroup_ctr += rows / cinfo->max_v_samp_factor; @@ -375,11 +378,11 @@ increment_simple_rowgroup_ctr (j_decompress_ptr cinfo, JDIMENSION rows) */ GLOBAL(JDIMENSION) -jpeg_skip_scanlines (j_decompress_ptr cinfo, JDIMENSION num_lines) +jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) { - my_main_ptr main_ptr = (my_main_ptr) cinfo->main; - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + my_main_ptr main_ptr = (my_main_ptr)cinfo->main; + my_coef_ptr coef = (my_coef_ptr)cinfo->coef; + my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample; JDIMENSION i, x; int y; JDIMENSION lines_per_iMCU_row, lines_left_in_iMCU_row, lines_after_iMCU_row; @@ -481,7 +484,7 @@ jpeg_skip_scanlines (j_decompress_ptr cinfo, JDIMENSION num_lines) if (cinfo->upsample->need_context_rows) { cinfo->output_scanline += lines_to_skip; cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row; - main_ptr->iMCU_row_ctr += lines_after_iMCU_row / lines_per_iMCU_row; + main_ptr->iMCU_row_ctr += lines_to_skip / lines_per_iMCU_row; /* It is complex to properly move to the middle of a context block, so * read the remaining lines instead of skipping them. */ @@ -544,8 +547,8 @@ jpeg_skip_scanlines (j_decompress_ptr cinfo, JDIMENSION num_lines) */ GLOBAL(JDIMENSION) -jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data, - JDIMENSION max_lines) +jpeg_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION max_lines) { JDIMENSION lines_per_iMCU_row; @@ -558,9 +561,9 @@ jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data, /* Call progress monitor hook if present */ if (cinfo->progress != NULL) { - cinfo->progress->pass_counter = (long) cinfo->output_scanline; - cinfo->progress->pass_limit = (long) cinfo->output_height; - (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); + cinfo->progress->pass_counter = (long)cinfo->output_scanline; + cinfo->progress->pass_limit = (long)cinfo->output_height; + (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo); } /* Verify that at least one iMCU row can be returned. */ @@ -569,7 +572,7 @@ jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data, ERREXIT(cinfo, JERR_BUFFER_SIZE); /* Decompress directly into user's buffer. */ - if (! (*cinfo->coef->decompress_data) (cinfo, data)) + if (!(*cinfo->coef->decompress_data) (cinfo, data)) return 0; /* suspension forced, can do nothing more */ /* OK, we processed one iMCU row. */ @@ -587,7 +590,7 @@ jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data, */ GLOBAL(boolean) -jpeg_start_output (j_decompress_ptr cinfo, int scan_number) +jpeg_start_output(j_decompress_ptr cinfo, int scan_number) { if (cinfo->global_state != DSTATE_BUFIMAGE && cinfo->global_state != DSTATE_PRESCAN) @@ -595,8 +598,7 @@ jpeg_start_output (j_decompress_ptr cinfo, int scan_number) /* Limit scan number to valid range */ if (scan_number <= 0) scan_number = 1; - if (cinfo->inputctl->eoi_reached && - scan_number > cinfo->input_scan_number) + if (cinfo->inputctl->eoi_reached && scan_number > cinfo->input_scan_number) scan_number = cinfo->input_scan_number; cinfo->output_scan_number = scan_number; /* Perform any dummy output passes, and set up for the real pass */ @@ -612,7 +614,7 @@ jpeg_start_output (j_decompress_ptr cinfo, int scan_number) */ GLOBAL(boolean) -jpeg_finish_output (j_decompress_ptr cinfo) +jpeg_finish_output(j_decompress_ptr cinfo) { if ((cinfo->global_state == DSTATE_SCANNING || cinfo->global_state == DSTATE_RAW_OK) && cinfo->buffered_image) { @@ -626,7 +628,7 @@ jpeg_finish_output (j_decompress_ptr cinfo) } /* Read markers looking for SOS or EOI */ while (cinfo->input_scan_number <= cinfo->output_scan_number && - ! cinfo->inputctl->eoi_reached) { + !cinfo->inputctl->eoi_reached) { if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED) return FALSE; /* Suspend, come back later */ } diff --git a/jdarith.c b/jdarith.c index ce0f920..6002481 100644 --- a/jdarith.c +++ b/jdarith.c @@ -4,16 +4,19 @@ * This file was part of the Independent JPEG Group's software: * Developed 1997-2015 by Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2015-2016, D. R. Commander. + * Copyright (C) 2015-2018, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * - * This file contains portable arithmetic entropy decoding routines for JPEG - * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81). + * This file contains portable arithmetic entropy encoding routines for JPEG + * (implementing Recommendation ITU-T T.81 | ISO/IEC 10918-1). * * Both sequential and progressive modes are supported in this single module. * * Suspension is not currently supported in this module. + * + * NOTE: All referenced figures are from + * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994. */ #define JPEG_INTERNALS @@ -21,7 +24,7 @@ #include "jpeglib.h" -#define NEG_1 ((unsigned int)-1) +#define NEG_1 ((unsigned int)-1) /* Expanded entropy decoder object for arithmetic decoding. */ @@ -63,18 +66,18 @@ typedef arith_entropy_decoder *arith_entropy_ptr; * in the lower bits (mask 0x7F). */ -#define DC_STAT_BINS 64 -#define AC_STAT_BINS 256 +#define DC_STAT_BINS 64 +#define AC_STAT_BINS 256 LOCAL(int) -get_byte (j_decompress_ptr cinfo) +get_byte(j_decompress_ptr cinfo) /* Read next input byte; we do not support suspension in this module. */ { struct jpeg_source_mgr *src = cinfo->src; if (src->bytes_in_buffer == 0) - if (! (*src->fill_input_buffer) (cinfo)) + if (!(*src->fill_input_buffer) (cinfo)) ERREXIT(cinfo, JERR_CANT_SUSPEND); src->bytes_in_buffer--; return GETJOCTET(*src->next_input_byte++); @@ -109,9 +112,9 @@ get_byte (j_decompress_ptr cinfo) */ LOCAL(int) -arith_decode (j_decompress_ptr cinfo, unsigned char *st) +arith_decode(j_decompress_ptr cinfo, unsigned char *st) { - register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy; + register arith_entropy_ptr e = (arith_entropy_ptr)cinfo->entropy; register unsigned char nl, nm; register JLONG qe, temp; register int sv, data; @@ -156,8 +159,8 @@ arith_decode (j_decompress_ptr cinfo, unsigned char *st) */ sv = *st; qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */ - nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ - nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ + nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ + nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ /* Decode & estimation procedures per sections D.2.4 & D.2.5 */ temp = e->a - qe; @@ -193,14 +196,14 @@ arith_decode (j_decompress_ptr cinfo, unsigned char *st) */ LOCAL(void) -process_restart (j_decompress_ptr cinfo) +process_restart(j_decompress_ptr cinfo) { - arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy; int ci; jpeg_component_info *compptr; /* Advance past the RSTn marker */ - if (! (*cinfo->marker->read_restart_marker) (cinfo)) + if (!(*cinfo->marker->read_restart_marker) (cinfo)) ERREXIT(cinfo, JERR_CANT_SUSPEND); /* Re-initialize statistics areas */ @@ -244,9 +247,9 @@ process_restart (j_decompress_ptr cinfo) */ METHODDEF(boolean) -decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +decode_mcu_DC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) { - arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy; JBLOCKROW block; unsigned char *st; int blkn, ci, tbl, sign; @@ -280,7 +283,7 @@ decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Figure F.21: Decoding nonzero value v */ /* Figure F.22: Decoding the sign of v */ sign = arith_decode(cinfo, st + 1); - st += 2; st += sign; + st += 2; st += sign; /* Figure F.23: Decoding the magnitude category of v */ if ((m = arith_decode(cinfo, st)) != 0) { st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ @@ -294,9 +297,9 @@ decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) } } /* Section F.1.4.4.1.2: Establish dc_context conditioning category */ - if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1)) + if (m < (int)((1L << cinfo->arith_dc_L[tbl]) >> 1)) entropy->dc_context[ci] = 0; /* zero diff category */ - else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1)) + else if (m > (int)((1L << cinfo->arith_dc_U[tbl]) >> 1)) entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */ else entropy->dc_context[ci] = 4 + (sign * 4); /* small diff category */ @@ -305,12 +308,12 @@ decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) st += 14; while (m >>= 1) if (arith_decode(cinfo, st)) v |= m; - v += 1; if (sign) v = -v; - entropy->last_dc_val[ci] += v; + v += 1; if (sign) v = -v; + entropy->last_dc_val[ci] = (entropy->last_dc_val[ci] + v) & 0xffff; } /* Scale and output the DC coefficient (assumes jpeg_natural_order[0]=0) */ - (*block)[0] = (JCOEF) LEFT_SHIFT(entropy->last_dc_val[ci], cinfo->Al); + (*block)[0] = (JCOEF)LEFT_SHIFT(entropy->last_dc_val[ci], cinfo->Al); } return TRUE; @@ -323,9 +326,9 @@ decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) */ METHODDEF(boolean) -decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +decode_mcu_AC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) { - arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy; JBLOCKROW block; unsigned char *st; int tbl, sign, k; @@ -351,7 +354,7 @@ decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) st = entropy->ac_stats[tbl] + 3 * (k - 1); if (arith_decode(cinfo, st)) break; /* EOB flag */ while (arith_decode(cinfo, st + 1) == 0) { - st += 3; k++; + st += 3; k++; if (k > cinfo->Se) { WARNMS(cinfo, JWRN_ARITH_BAD_CODE); entropy->ct = -1; /* spectral overflow */ @@ -383,9 +386,9 @@ decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) st += 14; while (m >>= 1) if (arith_decode(cinfo, st)) v |= m; - v += 1; if (sign) v = -v; + v += 1; if (sign) v = -v; /* Scale and output coefficient in natural (dezigzagged) order */ - (*block)[jpeg_natural_order[k]] = (JCOEF) ((unsigned)v << cinfo->Al); + (*block)[jpeg_natural_order[k]] = (JCOEF)((unsigned)v << cinfo->Al); } return TRUE; @@ -397,9 +400,9 @@ decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) */ METHODDEF(boolean) -decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +decode_mcu_DC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) { - arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy; unsigned char *st; int p1, blkn; @@ -430,9 +433,9 @@ decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) */ METHODDEF(boolean) -decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +decode_mcu_AC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) { - arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy; JBLOCKROW block; JCOEFPTR thiscoef; unsigned char *st; @@ -481,7 +484,7 @@ decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) *thiscoef = p1; break; } - st += 3; k++; + st += 3; k++; if (k > cinfo->Se) { WARNMS(cinfo, JWRN_ARITH_BAD_CODE); entropy->ct = -1; /* spectral overflow */ @@ -499,9 +502,9 @@ decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) */ METHODDEF(boolean) -decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +decode_mcu(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) { - arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy; jpeg_component_info *compptr; JBLOCKROW block; unsigned char *st; @@ -538,7 +541,7 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Figure F.21: Decoding nonzero value v */ /* Figure F.22: Decoding the sign of v */ sign = arith_decode(cinfo, st + 1); - st += 2; st += sign; + st += 2; st += sign; /* Figure F.23: Decoding the magnitude category of v */ if ((m = arith_decode(cinfo, st)) != 0) { st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ @@ -552,9 +555,9 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) } } /* Section F.1.4.4.1.2: Establish dc_context conditioning category */ - if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1)) + if (m < (int)((1L << cinfo->arith_dc_L[tbl]) >> 1)) entropy->dc_context[ci] = 0; /* zero diff category */ - else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1)) + else if (m > (int)((1L << cinfo->arith_dc_U[tbl]) >> 1)) entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */ else entropy->dc_context[ci] = 4 + (sign * 4); /* small diff category */ @@ -563,12 +566,12 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) st += 14; while (m >>= 1) if (arith_decode(cinfo, st)) v |= m; - v += 1; if (sign) v = -v; - entropy->last_dc_val[ci] += v; + v += 1; if (sign) v = -v; + entropy->last_dc_val[ci] = (entropy->last_dc_val[ci] + v) & 0xffff; } if (block) - (*block)[0] = (JCOEF) entropy->last_dc_val[ci]; + (*block)[0] = (JCOEF)entropy->last_dc_val[ci]; /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */ @@ -579,7 +582,7 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) st = entropy->ac_stats[tbl] + 3 * (k - 1); if (arith_decode(cinfo, st)) break; /* EOB flag */ while (arith_decode(cinfo, st + 1) == 0) { - st += 3; k++; + st += 3; k++; if (k > DCTSIZE2 - 1) { WARNMS(cinfo, JWRN_ARITH_BAD_CODE); entropy->ct = -1; /* spectral overflow */ @@ -611,9 +614,9 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) st += 14; while (m >>= 1) if (arith_decode(cinfo, st)) v |= m; - v += 1; if (sign) v = -v; + v += 1; if (sign) v = -v; if (block) - (*block)[jpeg_natural_order[k]] = (JCOEF) v; + (*block)[jpeg_natural_order[k]] = (JCOEF)v; } } @@ -626,9 +629,9 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) */ METHODDEF(void) -start_pass (j_decompress_ptr cinfo) +start_pass(j_decompress_ptr cinfo) { - arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy; int ci, tbl; jpeg_component_info *compptr; @@ -647,11 +650,11 @@ start_pass (j_decompress_ptr cinfo) } if (cinfo->Ah != 0) { /* Successive approximation refinement scan: must have Al = Ah-1. */ - if (cinfo->Ah-1 != cinfo->Al) + if (cinfo->Ah - 1 != cinfo->Al) goto bad; } if (cinfo->Al > 13) { /* need not check for < 0 */ - bad: +bad: ERREXIT4(cinfo, JERR_BAD_PROGRESSION, cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al); } @@ -661,7 +664,7 @@ start_pass (j_decompress_ptr cinfo) */ for (ci = 0; ci < cinfo->comps_in_scan; ci++) { int coefi, cindex = cinfo->cur_comp_info[ci]->component_index; - int *coef_bit_ptr = & cinfo->coef_bits[cindex][0]; + int *coef_bit_ptr = &cinfo->coef_bits[cindex][0]; if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */ WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0); for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) { @@ -702,8 +705,8 @@ start_pass (j_decompress_ptr cinfo) if (tbl < 0 || tbl >= NUM_ARITH_TBLS) ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); if (entropy->dc_stats[tbl] == NULL) - entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS); + entropy->dc_stats[tbl] = (unsigned char *)(*cinfo->mem->alloc_small) + ((j_common_ptr)cinfo, JPOOL_IMAGE, DC_STAT_BINS); MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS); /* Initialize DC predictions to 0 */ entropy->last_dc_val[ci] = 0; @@ -714,8 +717,8 @@ start_pass (j_decompress_ptr cinfo) if (tbl < 0 || tbl >= NUM_ARITH_TBLS) ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); if (entropy->ac_stats[tbl] == NULL) - entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS); + entropy->ac_stats[tbl] = (unsigned char *)(*cinfo->mem->alloc_small) + ((j_common_ptr)cinfo, JPOOL_IMAGE, AC_STAT_BINS); MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS); } } @@ -735,15 +738,15 @@ start_pass (j_decompress_ptr cinfo) */ GLOBAL(void) -jinit_arith_decoder (j_decompress_ptr cinfo) +jinit_arith_decoder(j_decompress_ptr cinfo) { arith_entropy_ptr entropy; int i; entropy = (arith_entropy_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(arith_entropy_decoder)); - cinfo->entropy = (struct jpeg_entropy_decoder *) entropy; + cinfo->entropy = (struct jpeg_entropy_decoder *)entropy; entropy->pub.start_pass = start_pass; /* Mark tables unallocated */ @@ -759,9 +762,10 @@ jinit_arith_decoder (j_decompress_ptr cinfo) /* Create progression status table */ int *coef_bit_ptr, ci; cinfo->coef_bits = (int (*)[DCTSIZE2]) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->num_components*DCTSIZE2*sizeof(int)); - coef_bit_ptr = & cinfo->coef_bits[0][0]; + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + cinfo->num_components * DCTSIZE2 * + sizeof(int)); + coef_bit_ptr = &cinfo->coef_bits[0][0]; for (ci = 0; ci < cinfo->num_components; ci++) for (i = 0; i < DCTSIZE2; i++) *coef_bit_ptr++ = -1; diff --git a/jdatadst-tj.c b/jdatadst-tj.c index a2219df..0bd961b 100644 --- a/jdatadst-tj.c +++ b/jdatadst-tj.c @@ -24,8 +24,8 @@ #include "jerror.h" #ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ -extern void *malloc (size_t size); -extern void free (void *ptr); +extern void *malloc(size_t size); +extern void free(void *ptr); #endif @@ -54,7 +54,7 @@ typedef my_mem_destination_mgr *my_mem_dest_ptr; */ METHODDEF(void) -init_mem_destination (j_compress_ptr cinfo) +init_mem_destination(j_compress_ptr cinfo) { /* no work necessary here */ } @@ -84,17 +84,17 @@ init_mem_destination (j_compress_ptr cinfo) */ METHODDEF(boolean) -empty_mem_output_buffer (j_compress_ptr cinfo) +empty_mem_output_buffer(j_compress_ptr cinfo) { size_t nextsize; JOCTET *nextbuffer; - my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest; + my_mem_dest_ptr dest = (my_mem_dest_ptr)cinfo->dest; if (!dest->alloc) ERREXIT(cinfo, JERR_BUFFER_SIZE); /* Try to allocate new buffer with double size */ nextsize = dest->bufsize * 2; - nextbuffer = (JOCTET *) malloc(nextsize); + nextbuffer = (JOCTET *)malloc(nextsize); if (nextbuffer == NULL) ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10); @@ -126,9 +126,9 @@ empty_mem_output_buffer (j_compress_ptr cinfo) */ METHODDEF(void) -term_mem_destination (j_compress_ptr cinfo) +term_mem_destination(j_compress_ptr cinfo) { - my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest; + my_mem_dest_ptr dest = (my_mem_dest_ptr)cinfo->dest; if (dest->alloc) *dest->outbuffer = dest->buffer; *dest->outsize = (unsigned long)(dest->bufsize - dest->pub.free_in_buffer); @@ -147,9 +147,8 @@ term_mem_destination (j_compress_ptr cinfo) */ GLOBAL(void) -jpeg_mem_dest_tj (j_compress_ptr cinfo, - unsigned char **outbuffer, unsigned long *outsize, - boolean alloc) +jpeg_mem_dest_tj(j_compress_ptr cinfo, unsigned char **outbuffer, + unsigned long *outsize, boolean alloc) { boolean reused = FALSE; my_mem_dest_ptr dest; @@ -162,9 +161,9 @@ jpeg_mem_dest_tj (j_compress_ptr cinfo, */ if (cinfo->dest == NULL) { /* first time for this JPEG object? */ cinfo->dest = (struct jpeg_destination_mgr *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT, sizeof(my_mem_destination_mgr)); - dest = (my_mem_dest_ptr) cinfo->dest; + dest = (my_mem_dest_ptr)cinfo->dest; dest->newbuffer = NULL; dest->buffer = NULL; } else if (cinfo->dest->init_destination != init_mem_destination) { @@ -174,7 +173,7 @@ jpeg_mem_dest_tj (j_compress_ptr cinfo, ERREXIT(cinfo, JERR_BUFFER_SIZE); } - dest = (my_mem_dest_ptr) cinfo->dest; + dest = (my_mem_dest_ptr)cinfo->dest; dest->pub.init_destination = init_mem_destination; dest->pub.empty_output_buffer = empty_mem_output_buffer; dest->pub.term_destination = term_mem_destination; @@ -187,12 +186,12 @@ jpeg_mem_dest_tj (j_compress_ptr cinfo, if (*outbuffer == NULL || *outsize == 0) { if (alloc) { /* Allocate initial buffer */ - dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE); + dest->newbuffer = *outbuffer = (unsigned char *)malloc(OUTPUT_BUF_SIZE); if (dest->newbuffer == NULL) ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10); *outsize = OUTPUT_BUF_SIZE; - } - else ERREXIT(cinfo, JERR_BUFFER_SIZE); + } else + ERREXIT(cinfo, JERR_BUFFER_SIZE); } dest->pub.next_output_byte = dest->buffer = *outbuffer; diff --git a/jdatadst.c b/jdatadst.c index dcaf6f0..3168b96 100644 --- a/jdatadst.c +++ b/jdatadst.c @@ -24,8 +24,8 @@ #include "jerror.h" #ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ -extern void *malloc (size_t size); -extern void free (void *ptr); +extern void *malloc(size_t size); +extern void free(void *ptr); #endif @@ -66,14 +66,14 @@ typedef my_mem_destination_mgr *my_mem_dest_ptr; */ METHODDEF(void) -init_destination (j_compress_ptr cinfo) +init_destination(j_compress_ptr cinfo) { - my_dest_ptr dest = (my_dest_ptr) cinfo->dest; + my_dest_ptr dest = (my_dest_ptr)cinfo->dest; /* Allocate the output buffer --- it will be released when done with image */ dest->buffer = (JOCTET *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - OUTPUT_BUF_SIZE * sizeof(JOCTET)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + OUTPUT_BUF_SIZE * sizeof(JOCTET)); dest->pub.next_output_byte = dest->buffer; dest->pub.free_in_buffer = OUTPUT_BUF_SIZE; @@ -81,7 +81,7 @@ init_destination (j_compress_ptr cinfo) #if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) METHODDEF(void) -init_mem_destination (j_compress_ptr cinfo) +init_mem_destination(j_compress_ptr cinfo) { /* no work necessary here */ } @@ -112,12 +112,12 @@ init_mem_destination (j_compress_ptr cinfo) */ METHODDEF(boolean) -empty_output_buffer (j_compress_ptr cinfo) +empty_output_buffer(j_compress_ptr cinfo) { - my_dest_ptr dest = (my_dest_ptr) cinfo->dest; + my_dest_ptr dest = (my_dest_ptr)cinfo->dest; if (JFWRITE(dest->outfile, dest->buffer, OUTPUT_BUF_SIZE) != - (size_t) OUTPUT_BUF_SIZE) + (size_t)OUTPUT_BUF_SIZE) ERREXIT(cinfo, JERR_FILE_WRITE); dest->pub.next_output_byte = dest->buffer; @@ -128,15 +128,15 @@ empty_output_buffer (j_compress_ptr cinfo) #if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) METHODDEF(boolean) -empty_mem_output_buffer (j_compress_ptr cinfo) +empty_mem_output_buffer(j_compress_ptr cinfo) { size_t nextsize; JOCTET *nextbuffer; - my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest; + my_mem_dest_ptr dest = (my_mem_dest_ptr)cinfo->dest; /* Try to allocate new buffer with double size */ nextsize = dest->bufsize * 2; - nextbuffer = (JOCTET *) malloc(nextsize); + nextbuffer = (JOCTET *)malloc(nextsize); if (nextbuffer == NULL) ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10); @@ -169,9 +169,9 @@ empty_mem_output_buffer (j_compress_ptr cinfo) */ METHODDEF(void) -term_destination (j_compress_ptr cinfo) +term_destination(j_compress_ptr cinfo) { - my_dest_ptr dest = (my_dest_ptr) cinfo->dest; + my_dest_ptr dest = (my_dest_ptr)cinfo->dest; size_t datacount = OUTPUT_BUF_SIZE - dest->pub.free_in_buffer; /* Write any data remaining in the buffer */ @@ -187,9 +187,9 @@ term_destination (j_compress_ptr cinfo) #if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) METHODDEF(void) -term_mem_destination (j_compress_ptr cinfo) +term_mem_destination(j_compress_ptr cinfo) { - my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest; + my_mem_dest_ptr dest = (my_mem_dest_ptr)cinfo->dest; *dest->outbuffer = dest->buffer; *dest->outsize = (unsigned long)(dest->bufsize - dest->pub.free_in_buffer); @@ -204,7 +204,7 @@ term_mem_destination (j_compress_ptr cinfo) */ GLOBAL(void) -jpeg_stdio_dest (j_compress_ptr cinfo, FILE *outfile) +jpeg_stdio_dest(j_compress_ptr cinfo, FILE *outfile) { my_dest_ptr dest; @@ -213,7 +213,7 @@ jpeg_stdio_dest (j_compress_ptr cinfo, FILE *outfile) */ if (cinfo->dest == NULL) { /* first time for this JPEG object? */ cinfo->dest = (struct jpeg_destination_mgr *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT, sizeof(my_destination_mgr)); } else if (cinfo->dest->init_destination != init_destination) { /* It is unsafe to reuse the existing destination manager unless it was @@ -225,7 +225,7 @@ jpeg_stdio_dest (j_compress_ptr cinfo, FILE *outfile) ERREXIT(cinfo, JERR_BUFFER_SIZE); } - dest = (my_dest_ptr) cinfo->dest; + dest = (my_dest_ptr)cinfo->dest; dest->pub.init_destination = init_destination; dest->pub.empty_output_buffer = empty_output_buffer; dest->pub.term_destination = term_destination; @@ -249,8 +249,8 @@ jpeg_stdio_dest (j_compress_ptr cinfo, FILE *outfile) */ GLOBAL(void) -jpeg_mem_dest (j_compress_ptr cinfo, - unsigned char **outbuffer, unsigned long *outsize) +jpeg_mem_dest(j_compress_ptr cinfo, unsigned char **outbuffer, + unsigned long *outsize) { my_mem_dest_ptr dest; @@ -262,7 +262,7 @@ jpeg_mem_dest (j_compress_ptr cinfo, */ if (cinfo->dest == NULL) { /* first time for this JPEG object? */ cinfo->dest = (struct jpeg_destination_mgr *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT, sizeof(my_mem_destination_mgr)); } else if (cinfo->dest->init_destination != init_mem_destination) { /* It is unsafe to reuse the existing destination manager unless it was @@ -271,7 +271,7 @@ jpeg_mem_dest (j_compress_ptr cinfo, ERREXIT(cinfo, JERR_BUFFER_SIZE); } - dest = (my_mem_dest_ptr) cinfo->dest; + dest = (my_mem_dest_ptr)cinfo->dest; dest->pub.init_destination = init_mem_destination; dest->pub.empty_output_buffer = empty_mem_output_buffer; dest->pub.term_destination = term_mem_destination; @@ -281,7 +281,7 @@ jpeg_mem_dest (j_compress_ptr cinfo, if (*outbuffer == NULL || *outsize == 0) { /* Allocate initial buffer */ - dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE); + dest->newbuffer = *outbuffer = (unsigned char *)malloc(OUTPUT_BUF_SIZE); if (dest->newbuffer == NULL) ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10); *outsize = OUTPUT_BUF_SIZE; diff --git a/jdatasrc-tj.c b/jdatasrc-tj.c index 05456c8..1c71307 100644 --- a/jdatasrc-tj.c +++ b/jdatasrc-tj.c @@ -30,7 +30,7 @@ */ METHODDEF(void) -init_mem_source (j_decompress_ptr cinfo) +init_mem_source(j_decompress_ptr cinfo) { /* no work necessary here */ } @@ -70,10 +70,10 @@ init_mem_source (j_decompress_ptr cinfo) */ METHODDEF(boolean) -fill_mem_input_buffer (j_decompress_ptr cinfo) +fill_mem_input_buffer(j_decompress_ptr cinfo) { static const JOCTET mybuffer[4] = { - (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0 + (JOCTET)0xFF, (JOCTET)JPEG_EOI, 0, 0 }; /* The whole JPEG data is expected to reside in the supplied memory @@ -104,7 +104,7 @@ fill_mem_input_buffer (j_decompress_ptr cinfo) */ METHODDEF(void) -skip_input_data (j_decompress_ptr cinfo, long num_bytes) +skip_input_data(j_decompress_ptr cinfo, long num_bytes) { struct jpeg_source_mgr *src = cinfo->src; @@ -113,15 +113,15 @@ skip_input_data (j_decompress_ptr cinfo, long num_bytes) * any trouble anyway --- large skips are infrequent. */ if (num_bytes > 0) { - while (num_bytes > (long) src->bytes_in_buffer) { - num_bytes -= (long) src->bytes_in_buffer; - (void) (*src->fill_input_buffer) (cinfo); + while (num_bytes > (long)src->bytes_in_buffer) { + num_bytes -= (long)src->bytes_in_buffer; + (void)(*src->fill_input_buffer) (cinfo); /* note we assume that fill_input_buffer will never return FALSE, * so suspension need not be handled. */ } - src->next_input_byte += (size_t) num_bytes; - src->bytes_in_buffer -= (size_t) num_bytes; + src->next_input_byte += (size_t)num_bytes; + src->bytes_in_buffer -= (size_t)num_bytes; } } @@ -145,7 +145,7 @@ skip_input_data (j_decompress_ptr cinfo, long num_bytes) */ METHODDEF(void) -term_source (j_decompress_ptr cinfo) +term_source(j_decompress_ptr cinfo) { /* no work necessary here */ } @@ -157,8 +157,8 @@ term_source (j_decompress_ptr cinfo) */ GLOBAL(void) -jpeg_mem_src_tj (j_decompress_ptr cinfo, - const unsigned char *inbuffer, unsigned long insize) +jpeg_mem_src_tj(j_decompress_ptr cinfo, const unsigned char *inbuffer, + unsigned long insize) { struct jpeg_source_mgr *src; @@ -171,7 +171,7 @@ jpeg_mem_src_tj (j_decompress_ptr cinfo, */ if (cinfo->src == NULL) { /* first time for this JPEG object? */ cinfo->src = (struct jpeg_source_mgr *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT, sizeof(struct jpeg_source_mgr)); } else if (cinfo->src->init_source != init_mem_source) { /* It is unsafe to reuse the existing source manager unless it was created @@ -186,6 +186,6 @@ jpeg_mem_src_tj (j_decompress_ptr cinfo, src->skip_input_data = skip_input_data; src->resync_to_restart = jpeg_resync_to_restart; /* use default method */ src->term_source = term_source; - src->bytes_in_buffer = (size_t) insize; - src->next_input_byte = (const JOCTET *) inbuffer; + src->bytes_in_buffer = (size_t)insize; + src->next_input_byte = (const JOCTET *)inbuffer; } diff --git a/jdatasrc.c b/jdatasrc.c index c83183f..eadb4a2 100644 --- a/jdatasrc.c +++ b/jdatasrc.c @@ -45,9 +45,9 @@ typedef my_source_mgr *my_src_ptr; */ METHODDEF(void) -init_source (j_decompress_ptr cinfo) +init_source(j_decompress_ptr cinfo) { - my_src_ptr src = (my_src_ptr) cinfo->src; + my_src_ptr src = (my_src_ptr)cinfo->src; /* We reset the empty-input-file flag for each image, * but we don't clear the input buffer. @@ -58,7 +58,7 @@ init_source (j_decompress_ptr cinfo) #if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) METHODDEF(void) -init_mem_source (j_decompress_ptr cinfo) +init_mem_source(j_decompress_ptr cinfo) { /* no work necessary here */ } @@ -99,9 +99,9 @@ init_mem_source (j_decompress_ptr cinfo) */ METHODDEF(boolean) -fill_input_buffer (j_decompress_ptr cinfo) +fill_input_buffer(j_decompress_ptr cinfo) { - my_src_ptr src = (my_src_ptr) cinfo->src; + my_src_ptr src = (my_src_ptr)cinfo->src; size_t nbytes; nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE); @@ -111,8 +111,8 @@ fill_input_buffer (j_decompress_ptr cinfo) ERREXIT(cinfo, JERR_INPUT_EMPTY); WARNMS(cinfo, JWRN_JPEG_EOF); /* Insert a fake EOI marker */ - src->buffer[0] = (JOCTET) 0xFF; - src->buffer[1] = (JOCTET) JPEG_EOI; + src->buffer[0] = (JOCTET)0xFF; + src->buffer[1] = (JOCTET)JPEG_EOI; nbytes = 2; } @@ -125,10 +125,10 @@ fill_input_buffer (j_decompress_ptr cinfo) #if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) METHODDEF(boolean) -fill_mem_input_buffer (j_decompress_ptr cinfo) +fill_mem_input_buffer(j_decompress_ptr cinfo) { static const JOCTET mybuffer[4] = { - (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0 + (JOCTET)0xFF, (JOCTET)JPEG_EOI, 0, 0 }; /* The whole JPEG data is expected to reside in the supplied memory @@ -160,7 +160,7 @@ fill_mem_input_buffer (j_decompress_ptr cinfo) */ METHODDEF(void) -skip_input_data (j_decompress_ptr cinfo, long num_bytes) +skip_input_data(j_decompress_ptr cinfo, long num_bytes) { struct jpeg_source_mgr *src = cinfo->src; @@ -169,15 +169,15 @@ skip_input_data (j_decompress_ptr cinfo, long num_bytes) * any trouble anyway --- large skips are infrequent. */ if (num_bytes > 0) { - while (num_bytes > (long) src->bytes_in_buffer) { - num_bytes -= (long) src->bytes_in_buffer; - (void) (*src->fill_input_buffer) (cinfo); + while (num_bytes > (long)src->bytes_in_buffer) { + num_bytes -= (long)src->bytes_in_buffer; + (void)(*src->fill_input_buffer) (cinfo); /* note we assume that fill_input_buffer will never return FALSE, * so suspension need not be handled. */ } - src->next_input_byte += (size_t) num_bytes; - src->bytes_in_buffer -= (size_t) num_bytes; + src->next_input_byte += (size_t)num_bytes; + src->bytes_in_buffer -= (size_t)num_bytes; } } @@ -201,7 +201,7 @@ skip_input_data (j_decompress_ptr cinfo, long num_bytes) */ METHODDEF(void) -term_source (j_decompress_ptr cinfo) +term_source(j_decompress_ptr cinfo) { /* no work necessary here */ } @@ -214,7 +214,7 @@ term_source (j_decompress_ptr cinfo) */ GLOBAL(void) -jpeg_stdio_src (j_decompress_ptr cinfo, FILE *infile) +jpeg_stdio_src(j_decompress_ptr cinfo, FILE *infile) { my_src_ptr src; @@ -225,11 +225,11 @@ jpeg_stdio_src (j_decompress_ptr cinfo, FILE *infile) */ if (cinfo->src == NULL) { /* first time for this JPEG object? */ cinfo->src = (struct jpeg_source_mgr *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT, sizeof(my_source_mgr)); - src = (my_src_ptr) cinfo->src; + src = (my_src_ptr)cinfo->src; src->buffer = (JOCTET *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT, INPUT_BUF_SIZE * sizeof(JOCTET)); } else if (cinfo->src->init_source != init_source) { /* It is unsafe to reuse the existing source manager unless it was created @@ -241,7 +241,7 @@ jpeg_stdio_src (j_decompress_ptr cinfo, FILE *infile) ERREXIT(cinfo, JERR_BUFFER_SIZE); } - src = (my_src_ptr) cinfo->src; + src = (my_src_ptr)cinfo->src; src->pub.init_source = init_source; src->pub.fill_input_buffer = fill_input_buffer; src->pub.skip_input_data = skip_input_data; @@ -260,8 +260,8 @@ jpeg_stdio_src (j_decompress_ptr cinfo, FILE *infile) */ GLOBAL(void) -jpeg_mem_src (j_decompress_ptr cinfo, - const unsigned char *inbuffer, unsigned long insize) +jpeg_mem_src(j_decompress_ptr cinfo, const unsigned char *inbuffer, + unsigned long insize) { struct jpeg_source_mgr *src; @@ -274,7 +274,7 @@ jpeg_mem_src (j_decompress_ptr cinfo, */ if (cinfo->src == NULL) { /* first time for this JPEG object? */ cinfo->src = (struct jpeg_source_mgr *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT, sizeof(struct jpeg_source_mgr)); } else if (cinfo->src->init_source != init_mem_source) { /* It is unsafe to reuse the existing source manager unless it was created @@ -289,7 +289,7 @@ jpeg_mem_src (j_decompress_ptr cinfo, src->skip_input_data = skip_input_data; src->resync_to_restart = jpeg_resync_to_restart; /* use default method */ src->term_source = term_source; - src->bytes_in_buffer = (size_t) insize; - src->next_input_byte = (const JOCTET *) inbuffer; + src->bytes_in_buffer = (size_t)insize; + src->next_input_byte = (const JOCTET *)inbuffer; } #endif diff --git a/jdcoefct.c b/jdcoefct.c index 1a48969..723a9ac 100644 --- a/jdcoefct.c +++ b/jdcoefct.c @@ -25,16 +25,15 @@ /* Forward declarations */ -METHODDEF(int) decompress_onepass - (j_decompress_ptr cinfo, JSAMPIMAGE output_buf); +METHODDEF(int) decompress_onepass(j_decompress_ptr cinfo, + JSAMPIMAGE output_buf); #ifdef D_MULTISCAN_FILES_SUPPORTED -METHODDEF(int) decompress_data - (j_decompress_ptr cinfo, JSAMPIMAGE output_buf); +METHODDEF(int) decompress_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf); #endif #ifdef BLOCK_SMOOTHING_SUPPORTED -LOCAL(boolean) smoothing_ok (j_decompress_ptr cinfo); -METHODDEF(int) decompress_smooth_data - (j_decompress_ptr cinfo, JSAMPIMAGE output_buf); +LOCAL(boolean) smoothing_ok(j_decompress_ptr cinfo); +METHODDEF(int) decompress_smooth_data(j_decompress_ptr cinfo, + JSAMPIMAGE output_buf); #endif @@ -43,7 +42,7 @@ METHODDEF(int) decompress_smooth_data */ METHODDEF(void) -start_input_pass (j_decompress_ptr cinfo) +start_input_pass(j_decompress_ptr cinfo) { cinfo->input_iMCU_row = 0; start_iMCU_row(cinfo); @@ -55,10 +54,10 @@ start_input_pass (j_decompress_ptr cinfo) */ METHODDEF(void) -start_output_pass (j_decompress_ptr cinfo) +start_output_pass(j_decompress_ptr cinfo) { #ifdef BLOCK_SMOOTHING_SUPPORTED - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_coef_ptr coef = (my_coef_ptr)cinfo->coef; /* If multipass, check to see whether to use block smoothing on this pass */ if (coef->pub.coef_arrays != NULL) { @@ -83,9 +82,9 @@ start_output_pass (j_decompress_ptr cinfo) */ METHODDEF(int) -decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) +decompress_onepass(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) { - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_coef_ptr coef = (my_coef_ptr)cinfo->coef; JDIMENSION MCU_col_num; /* index of current MCU within row */ JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1; JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1; @@ -101,9 +100,9 @@ decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col; MCU_col_num++) { /* Try to fetch an MCU. Entropy decoder expects buffer to be zeroed. */ - jzero_far((void *) coef->MCU_buffer[0], - (size_t) (cinfo->blocks_in_MCU * sizeof(JBLOCK))); - if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) { + jzero_far((void *)coef->MCU_buffer[0], + (size_t)(cinfo->blocks_in_MCU * sizeof(JBLOCK))); + if (!(*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) { /* Suspension forced; update state counters and exit */ coef->MCU_vert_offset = yoffset; coef->MCU_ctr = MCU_col_num; @@ -120,28 +119,28 @@ decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) * incremented past them!). Note the inner loop relies on having * allocated the MCU_buffer[] blocks sequentially. */ - blkn = 0; /* index of current DCT block within MCU */ + blkn = 0; /* index of current DCT block within MCU */ for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; /* Don't bother to IDCT an uninteresting component. */ - if (! compptr->component_needed) { + if (!compptr->component_needed) { blkn += compptr->MCU_blocks; continue; } inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index]; - useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width - : compptr->last_col_width; + useful_width = (MCU_col_num < last_MCU_col) ? + compptr->MCU_width : compptr->last_col_width; output_ptr = output_buf[compptr->component_index] + - yoffset * compptr->_DCT_scaled_size; + yoffset * compptr->_DCT_scaled_size; start_col = (MCU_col_num - cinfo->master->first_iMCU_col) * - compptr->MCU_sample_width; + compptr->MCU_sample_width; for (yindex = 0; yindex < compptr->MCU_height; yindex++) { if (cinfo->input_iMCU_row < last_iMCU_row || - yoffset+yindex < compptr->last_row_height) { + yoffset + yindex < compptr->last_row_height) { output_col = start_col; for (xindex = 0; xindex < useful_width; xindex++) { (*inverse_DCT) (cinfo, compptr, - (JCOEFPTR) coef->MCU_buffer[blkn+xindex], + (JCOEFPTR)coef->MCU_buffer[blkn + xindex], output_ptr, output_col); output_col += compptr->_DCT_scaled_size; } @@ -172,7 +171,7 @@ decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) */ METHODDEF(int) -dummy_consume_data (j_decompress_ptr cinfo) +dummy_consume_data(j_decompress_ptr cinfo) { return JPEG_SUSPENDED; /* Always indicate nothing was done */ } @@ -188,9 +187,9 @@ dummy_consume_data (j_decompress_ptr cinfo) */ METHODDEF(int) -consume_data (j_decompress_ptr cinfo) +consume_data(j_decompress_ptr cinfo) { - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_coef_ptr coef = (my_coef_ptr)cinfo->coef; JDIMENSION MCU_col_num; /* index of current MCU within row */ int blkn, ci, xindex, yindex, yoffset; JDIMENSION start_col; @@ -202,9 +201,9 @@ consume_data (j_decompress_ptr cinfo) for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; buffer[ci] = (*cinfo->mem->access_virt_barray) - ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index], + ((j_common_ptr)cinfo, coef->whole_image[compptr->component_index], cinfo->input_iMCU_row * compptr->v_samp_factor, - (JDIMENSION) compptr->v_samp_factor, TRUE); + (JDIMENSION)compptr->v_samp_factor, TRUE); /* Note: entropy decoder expects buffer to be zeroed, * but this is handled automatically by the memory manager * because we requested a pre-zeroed array. @@ -222,14 +221,14 @@ consume_data (j_decompress_ptr cinfo) compptr = cinfo->cur_comp_info[ci]; start_col = MCU_col_num * compptr->MCU_width; for (yindex = 0; yindex < compptr->MCU_height; yindex++) { - buffer_ptr = buffer[ci][yindex+yoffset] + start_col; + buffer_ptr = buffer[ci][yindex + yoffset] + start_col; for (xindex = 0; xindex < compptr->MCU_width; xindex++) { coef->MCU_buffer[blkn++] = buffer_ptr++; } } } /* Try to fetch the MCU. */ - if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) { + if (!(*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) { /* Suspension forced; update state counters and exit */ coef->MCU_vert_offset = yoffset; coef->MCU_ctr = MCU_col_num; @@ -259,9 +258,9 @@ consume_data (j_decompress_ptr cinfo) */ METHODDEF(int) -decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) +decompress_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) { - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_coef_ptr coef = (my_coef_ptr)cinfo->coef; JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1; JDIMENSION block_num; int ci, block_row, block_rows; @@ -276,7 +275,7 @@ decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) while (cinfo->input_scan_number < cinfo->output_scan_number || (cinfo->input_scan_number == cinfo->output_scan_number && cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) { - if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED) + if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED) return JPEG_SUSPENDED; } @@ -284,19 +283,19 @@ decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { /* Don't bother to IDCT an uninteresting component. */ - if (! compptr->component_needed) + if (!compptr->component_needed) continue; /* Align the virtual buffer for this component. */ buffer = (*cinfo->mem->access_virt_barray) - ((j_common_ptr) cinfo, coef->whole_image[ci], + ((j_common_ptr)cinfo, coef->whole_image[ci], cinfo->output_iMCU_row * compptr->v_samp_factor, - (JDIMENSION) compptr->v_samp_factor, FALSE); + (JDIMENSION)compptr->v_samp_factor, FALSE); /* Count non-dummy DCT block rows in this iMCU row. */ if (cinfo->output_iMCU_row < last_iMCU_row) block_rows = compptr->v_samp_factor; else { /* NB: can't use last_row_height here; it is input-side-dependent! */ - block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor); + block_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor); if (block_rows == 0) block_rows = compptr->v_samp_factor; } inverse_DCT = cinfo->idct->inverse_DCT[ci]; @@ -307,8 +306,8 @@ decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) output_col = 0; for (block_num = cinfo->master->first_MCU_col[ci]; block_num <= cinfo->master->last_MCU_col[ci]; block_num++) { - (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr, - output_ptr, output_col); + (*inverse_DCT) (cinfo, compptr, (JCOEFPTR)buffer_ptr, output_ptr, + output_col); buffer_ptr++; output_col += compptr->_DCT_scaled_size; } @@ -350,9 +349,9 @@ decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) */ LOCAL(boolean) -smoothing_ok (j_decompress_ptr cinfo) +smoothing_ok(j_decompress_ptr cinfo) { - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_coef_ptr coef = (my_coef_ptr)cinfo->coef; boolean smoothing_useful = FALSE; int ci, coefi; jpeg_component_info *compptr; @@ -360,13 +359,13 @@ smoothing_ok (j_decompress_ptr cinfo) int *coef_bits; int *coef_bits_latch; - if (! cinfo->progressive_mode || cinfo->coef_bits == NULL) + if (!cinfo->progressive_mode || cinfo->coef_bits == NULL) return FALSE; /* Allocate latch area if not already done */ if (coef->coef_bits_latch == NULL) coef->coef_bits_latch = (int *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, cinfo->num_components * (SAVED_COEFS * sizeof(int))); coef_bits_latch = coef->coef_bits_latch; @@ -406,9 +405,9 @@ smoothing_ok (j_decompress_ptr cinfo) */ METHODDEF(int) -decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) +decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) { - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_coef_ptr coef = (my_coef_ptr)cinfo->coef; JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1; JDIMENSION block_num, last_block_column; int ci, block_row, block_rows, access_rows; @@ -422,8 +421,8 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) JCOEF *workspace; int *coef_bits; JQUANT_TBL *quanttbl; - JLONG Q00,Q01,Q02,Q10,Q11,Q20, num; - int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9; + JLONG Q00, Q01, Q02, Q10, Q11, Q20, num; + int DC1, DC2, DC3, DC4, DC5, DC6, DC7, DC8, DC9; int Al, pred; /* Keep a local variable to avoid looking it up more than once */ @@ -431,7 +430,7 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) /* Force some input to be done if we are getting ahead of the input. */ while (cinfo->input_scan_number <= cinfo->output_scan_number && - ! cinfo->inputctl->eoi_reached) { + !cinfo->inputctl->eoi_reached) { if (cinfo->input_scan_number == cinfo->output_scan_number) { /* If input is working on current scan, we ordinarily want it to * have completed the current row. But if input scan is DC, @@ -439,10 +438,10 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) * values are up to date. */ JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0; - if (cinfo->input_iMCU_row > cinfo->output_iMCU_row+delta) + if (cinfo->input_iMCU_row > cinfo->output_iMCU_row + delta) break; } - if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED) + if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED) return JPEG_SUSPENDED; } @@ -450,7 +449,7 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { /* Don't bother to IDCT an uninteresting component. */ - if (! compptr->component_needed) + if (!compptr->component_needed) continue; /* Count non-dummy DCT block rows in this iMCU row. */ if (cinfo->output_iMCU_row < last_iMCU_row) { @@ -459,7 +458,7 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) last_row = FALSE; } else { /* NB: can't use last_row_height here; it is input-side-dependent! */ - block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor); + block_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor); if (block_rows == 0) block_rows = compptr->v_samp_factor; access_rows = block_rows; /* this iMCU row only */ last_row = TRUE; @@ -468,15 +467,15 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) if (cinfo->output_iMCU_row > 0) { access_rows += compptr->v_samp_factor; /* prior iMCU row too */ buffer = (*cinfo->mem->access_virt_barray) - ((j_common_ptr) cinfo, coef->whole_image[ci], + ((j_common_ptr)cinfo, coef->whole_image[ci], (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor, - (JDIMENSION) access_rows, FALSE); + (JDIMENSION)access_rows, FALSE); buffer += compptr->v_samp_factor; /* point to current iMCU row */ first_row = FALSE; } else { buffer = (*cinfo->mem->access_virt_barray) - ((j_common_ptr) cinfo, coef->whole_image[ci], - (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE); + ((j_common_ptr)cinfo, coef->whole_image[ci], + (JDIMENSION)0, (JDIMENSION)access_rows, FALSE); first_row = TRUE; } /* Fetch component-dependent info */ @@ -496,115 +495,115 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) if (first_row && block_row == 0) prev_block_row = buffer_ptr; else - prev_block_row = buffer[block_row-1]; - if (last_row && block_row == block_rows-1) + prev_block_row = buffer[block_row - 1]; + if (last_row && block_row == block_rows - 1) next_block_row = buffer_ptr; else - next_block_row = buffer[block_row+1]; + next_block_row = buffer[block_row + 1]; /* We fetch the surrounding DC values using a sliding-register approach. * Initialize all nine here so as to do the right thing on narrow pics. */ - DC1 = DC2 = DC3 = (int) prev_block_row[0][0]; - DC4 = DC5 = DC6 = (int) buffer_ptr[0][0]; - DC7 = DC8 = DC9 = (int) next_block_row[0][0]; + DC1 = DC2 = DC3 = (int)prev_block_row[0][0]; + DC4 = DC5 = DC6 = (int)buffer_ptr[0][0]; + DC7 = DC8 = DC9 = (int)next_block_row[0][0]; output_col = 0; last_block_column = compptr->width_in_blocks - 1; for (block_num = cinfo->master->first_MCU_col[ci]; block_num <= cinfo->master->last_MCU_col[ci]; block_num++) { /* Fetch current DCT block into workspace so we can modify it. */ - jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1); + jcopy_block_row(buffer_ptr, (JBLOCKROW)workspace, (JDIMENSION)1); /* Update DC values */ if (block_num < last_block_column) { - DC3 = (int) prev_block_row[1][0]; - DC6 = (int) buffer_ptr[1][0]; - DC9 = (int) next_block_row[1][0]; + DC3 = (int)prev_block_row[1][0]; + DC6 = (int)buffer_ptr[1][0]; + DC9 = (int)next_block_row[1][0]; } /* Compute coefficient estimates per K.8. * An estimate is applied only if coefficient is still zero, * and is not known to be fully accurate. */ /* AC01 */ - if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) { + if ((Al = coef_bits[1]) != 0 && workspace[1] == 0) { num = 36 * Q00 * (DC4 - DC6); if (num >= 0) { - pred = (int) (((Q01<<7) + num) / (Q01<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; } else { - pred = (int) (((Q01<<7) - num) / (Q01<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; pred = -pred; } - workspace[1] = (JCOEF) pred; + workspace[1] = (JCOEF)pred; } /* AC10 */ - if ((Al=coef_bits[2]) != 0 && workspace[8] == 0) { + if ((Al = coef_bits[2]) != 0 && workspace[8] == 0) { num = 36 * Q00 * (DC2 - DC8); if (num >= 0) { - pred = (int) (((Q10<<7) + num) / (Q10<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; } else { - pred = (int) (((Q10<<7) - num) / (Q10<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; pred = -pred; } - workspace[8] = (JCOEF) pred; + workspace[8] = (JCOEF)pred; } /* AC20 */ - if ((Al=coef_bits[3]) != 0 && workspace[16] == 0) { - num = 9 * Q00 * (DC2 + DC8 - 2*DC5); + if ((Al = coef_bits[3]) != 0 && workspace[16] == 0) { + num = 9 * Q00 * (DC2 + DC8 - 2 * DC5); if (num >= 0) { - pred = (int) (((Q20<<7) + num) / (Q20<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; } else { - pred = (int) (((Q20<<7) - num) / (Q20<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; pred = -pred; } - workspace[16] = (JCOEF) pred; + workspace[16] = (JCOEF)pred; } /* AC11 */ - if ((Al=coef_bits[4]) != 0 && workspace[9] == 0) { + if ((Al = coef_bits[4]) != 0 && workspace[9] == 0) { num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9); if (num >= 0) { - pred = (int) (((Q11<<7) + num) / (Q11<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; } else { - pred = (int) (((Q11<<7) - num) / (Q11<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; pred = -pred; } - workspace[9] = (JCOEF) pred; + workspace[9] = (JCOEF)pred; } /* AC02 */ - if ((Al=coef_bits[5]) != 0 && workspace[2] == 0) { - num = 9 * Q00 * (DC4 + DC6 - 2*DC5); + if ((Al = coef_bits[5]) != 0 && workspace[2] == 0) { + num = 9 * Q00 * (DC4 + DC6 - 2 * DC5); if (num >= 0) { - pred = (int) (((Q02<<7) + num) / (Q02<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; } else { - pred = (int) (((Q02<<7) - num) / (Q02<<8)); - if (Al > 0 && pred >= (1< 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; pred = -pred; } - workspace[2] = (JCOEF) pred; + workspace[2] = (JCOEF)pred; } /* OK, do the IDCT */ - (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) workspace, - output_ptr, output_col); + (*inverse_DCT) (cinfo, compptr, (JCOEFPTR)workspace, output_ptr, + output_col); /* Advance for next column */ - DC1 = DC2; DC2 = DC3; - DC4 = DC5; DC5 = DC6; - DC7 = DC8; DC8 = DC9; + DC1 = DC2; DC2 = DC3; + DC4 = DC5; DC5 = DC6; + DC7 = DC8; DC8 = DC9; buffer_ptr++, prev_block_row++, next_block_row++; output_col += compptr->_DCT_scaled_size; } @@ -625,14 +624,14 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) */ GLOBAL(void) -jinit_d_coef_controller (j_decompress_ptr cinfo, boolean need_full_buffer) +jinit_d_coef_controller(j_decompress_ptr cinfo, boolean need_full_buffer) { my_coef_ptr coef; coef = (my_coef_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_coef_controller)); - cinfo->coef = (struct jpeg_d_coef_controller *) coef; + cinfo->coef = (struct jpeg_d_coef_controller *)coef; coef->pub.start_input_pass = start_input_pass; coef->pub.start_output_pass = start_output_pass; #ifdef BLOCK_SMOOTHING_SUPPORTED @@ -657,12 +656,12 @@ jinit_d_coef_controller (j_decompress_ptr cinfo, boolean need_full_buffer) access_rows *= 3; #endif coef->whole_image[ci] = (*cinfo->mem->request_virt_barray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE, - (JDIMENSION) jround_up((long) compptr->width_in_blocks, - (long) compptr->h_samp_factor), - (JDIMENSION) jround_up((long) compptr->height_in_blocks, - (long) compptr->v_samp_factor), - (JDIMENSION) access_rows); + ((j_common_ptr)cinfo, JPOOL_IMAGE, TRUE, + (JDIMENSION)jround_up((long)compptr->width_in_blocks, + (long)compptr->h_samp_factor), + (JDIMENSION)jround_up((long)compptr->height_in_blocks, + (long)compptr->v_samp_factor), + (JDIMENSION)access_rows); } coef->pub.consume_data = consume_data; coef->pub.decompress_data = decompress_data; @@ -676,7 +675,7 @@ jinit_d_coef_controller (j_decompress_ptr cinfo, boolean need_full_buffer) int i; buffer = (JBLOCKROW) - (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE, D_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK)); for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) { coef->MCU_buffer[i] = buffer + i; @@ -688,6 +687,6 @@ jinit_d_coef_controller (j_decompress_ptr cinfo, boolean need_full_buffer) /* Allocate the workspace buffer */ coef->workspace = (JCOEF *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(JCOEF) * DCTSIZE2); } diff --git a/jdcoefct.h b/jdcoefct.h index bf6beb2..c4d1943 100644 --- a/jdcoefct.h +++ b/jdcoefct.h @@ -59,10 +59,10 @@ typedef my_coef_controller *my_coef_ptr; LOCAL(void) -start_iMCU_row (j_decompress_ptr cinfo) +start_iMCU_row(j_decompress_ptr cinfo) /* Reset within-iMCU-row counters for a new row (input side) */ { - my_coef_ptr coef = (my_coef_ptr) cinfo->coef; + my_coef_ptr coef = (my_coef_ptr)cinfo->coef; /* In an interleaved scan, an MCU row is the same as an iMCU row. * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows. @@ -71,7 +71,7 @@ start_iMCU_row (j_decompress_ptr cinfo) if (cinfo->comps_in_scan > 1) { coef->MCU_rows_per_iMCU_row = 1; } else { - if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1)) + if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows - 1)) coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor; else coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height; diff --git a/jdcol565.c b/jdcol565.c index 349fce4..40068ef 100644 --- a/jdcol565.c +++ b/jdcol565.c @@ -17,22 +17,22 @@ INLINE LOCAL(void) -ycc_rgb565_convert_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) { - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert; register int y, cb, cr; register JSAMPROW outptr; register JSAMPROW inptr0, inptr1, inptr2; register JDIMENSION col; JDIMENSION num_cols = cinfo->output_width; /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - register int * Crrtab = cconvert->Cr_r_tab; - register int * Cbbtab = cconvert->Cb_b_tab; - register JLONG * Crgtab = cconvert->Cr_g_tab; - register JLONG * Cbgtab = cconvert->Cb_g_tab; + register JSAMPLE *range_limit = cinfo->sample_range_limit; + register int *Crrtab = cconvert->Cr_r_tab; + register int *Cbbtab = cconvert->Cb_b_tab; + register JLONG *Crgtab = cconvert->Cr_g_tab; + register JLONG *Cbgtab = cconvert->Cb_g_tab; SHIFT_TEMPS while (--num_rows >= 0) { @@ -53,7 +53,7 @@ ycc_rgb565_convert_internal (j_decompress_ptr cinfo, SCALEBITS))]; b = range_limit[y + Cbbtab[cb]]; rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr = (INT16)rgb; + *(INT16 *)outptr = (INT16)rgb; outptr += 2; num_cols--; } @@ -88,7 +88,7 @@ ycc_rgb565_convert_internal (j_decompress_ptr cinfo, SCALEBITS))]; b = range_limit[y + Cbbtab[cb]]; rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr = (INT16)rgb; + *(INT16 *)outptr = (INT16)rgb; } } } @@ -96,22 +96,22 @@ ycc_rgb565_convert_internal (j_decompress_ptr cinfo, INLINE LOCAL(void) -ycc_rgb565D_convert_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) { - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert; register int y, cb, cr; register JSAMPROW outptr; register JSAMPROW inptr0, inptr1, inptr2; register JDIMENSION col; JDIMENSION num_cols = cinfo->output_width; /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - register int * Crrtab = cconvert->Cr_r_tab; - register int * Cbbtab = cconvert->Cb_b_tab; - register JLONG * Crgtab = cconvert->Cr_g_tab; - register JLONG * Cbgtab = cconvert->Cb_g_tab; + register JSAMPLE *range_limit = cinfo->sample_range_limit; + register int *Crrtab = cconvert->Cr_r_tab; + register int *Cbbtab = cconvert->Cb_b_tab; + register JLONG *Crgtab = cconvert->Cr_g_tab; + register JLONG *Cbgtab = cconvert->Cb_g_tab; JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; SHIFT_TEMPS @@ -134,7 +134,7 @@ ycc_rgb565D_convert_internal (j_decompress_ptr cinfo, SCALEBITS)), d0)]; b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)]; rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr = (INT16)rgb; + *(INT16 *)outptr = (INT16)rgb; outptr += 2; num_cols--; } @@ -174,7 +174,7 @@ ycc_rgb565D_convert_internal (j_decompress_ptr cinfo, SCALEBITS)), d0)]; b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)]; rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr = (INT16)rgb; + *(INT16 *)outptr = (INT16)rgb; } } } @@ -182,9 +182,9 @@ ycc_rgb565D_convert_internal (j_decompress_ptr cinfo, INLINE LOCAL(void) -rgb_rgb565_convert_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +rgb_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) { register JSAMPROW outptr; register JSAMPROW inptr0, inptr1, inptr2; @@ -206,7 +206,7 @@ rgb_rgb565_convert_internal (j_decompress_ptr cinfo, g = GETJSAMPLE(*inptr1++); b = GETJSAMPLE(*inptr2++); rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr = (INT16)rgb; + *(INT16 *)outptr = (INT16)rgb; outptr += 2; num_cols--; } @@ -229,7 +229,7 @@ rgb_rgb565_convert_internal (j_decompress_ptr cinfo, g = GETJSAMPLE(*inptr1); b = GETJSAMPLE(*inptr2); rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr = (INT16)rgb; + *(INT16 *)outptr = (INT16)rgb; } } } @@ -237,14 +237,14 @@ rgb_rgb565_convert_internal (j_decompress_ptr cinfo, INLINE LOCAL(void) -rgb_rgb565D_convert_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) { register JSAMPROW outptr; register JSAMPROW inptr0, inptr1, inptr2; register JDIMENSION col; - register JSAMPLE * range_limit = cinfo->sample_range_limit; + register JSAMPLE *range_limit = cinfo->sample_range_limit; JDIMENSION num_cols = cinfo->output_width; JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; SHIFT_TEMPS @@ -263,7 +263,7 @@ rgb_rgb565D_convert_internal (j_decompress_ptr cinfo, g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)]; b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)]; rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr = (INT16)rgb; + *(INT16 *)outptr = (INT16)rgb; outptr += 2; num_cols--; } @@ -288,7 +288,7 @@ rgb_rgb565D_convert_internal (j_decompress_ptr cinfo, g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1), d0)]; b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2), d0)]; rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr = (INT16)rgb; + *(INT16 *)outptr = (INT16)rgb; } } } @@ -296,9 +296,9 @@ rgb_rgb565D_convert_internal (j_decompress_ptr cinfo, INLINE LOCAL(void) -gray_rgb565_convert_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +gray_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) { register JSAMPROW inptr, outptr; register JDIMENSION col; @@ -313,7 +313,7 @@ gray_rgb565_convert_internal (j_decompress_ptr cinfo, if (PACK_NEED_ALIGNMENT(outptr)) { g = *inptr++; rgb = PACK_SHORT_565(g, g, g); - *(INT16*)outptr = (INT16)rgb; + *(INT16 *)outptr = (INT16)rgb; outptr += 2; num_cols--; } @@ -328,7 +328,7 @@ gray_rgb565_convert_internal (j_decompress_ptr cinfo, if (num_cols & 1) { g = *inptr; rgb = PACK_SHORT_565(g, g, g); - *(INT16*)outptr = (INT16)rgb; + *(INT16 *)outptr = (INT16)rgb; } } } @@ -336,13 +336,13 @@ gray_rgb565_convert_internal (j_decompress_ptr cinfo, INLINE LOCAL(void) -gray_rgb565D_convert_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +gray_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) { register JSAMPROW inptr, outptr; register JDIMENSION col; - register JSAMPLE * range_limit = cinfo->sample_range_limit; + register JSAMPLE *range_limit = cinfo->sample_range_limit; JDIMENSION num_cols = cinfo->output_width; JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; @@ -356,7 +356,7 @@ gray_rgb565D_convert_internal (j_decompress_ptr cinfo, g = *inptr++; g = range_limit[DITHER_565_R(g, d0)]; rgb = PACK_SHORT_565(g, g, g); - *(INT16*)outptr = (INT16)rgb; + *(INT16 *)outptr = (INT16)rgb; outptr += 2; num_cols--; } @@ -378,7 +378,7 @@ gray_rgb565D_convert_internal (j_decompress_ptr cinfo, g = *inptr; g = range_limit[DITHER_565_R(g, d0)]; rgb = PACK_SHORT_565(g, g, g); - *(INT16*)outptr = (INT16)rgb; + *(INT16 *)outptr = (INT16)rgb; } } } diff --git a/jdcolext.c b/jdcolext.c index 59b676c..72a5301 100644 --- a/jdcolext.c +++ b/jdcolext.c @@ -28,22 +28,22 @@ INLINE LOCAL(void) -ycc_rgb_convert_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +ycc_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) { - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert; register int y, cb, cr; register JSAMPROW outptr; register JSAMPROW inptr0, inptr1, inptr2; register JDIMENSION col; JDIMENSION num_cols = cinfo->output_width; /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - register int * Crrtab = cconvert->Cr_r_tab; - register int * Cbbtab = cconvert->Cb_b_tab; - register JLONG * Crgtab = cconvert->Cr_g_tab; - register JLONG * Cbgtab = cconvert->Cb_g_tab; + register JSAMPLE *range_limit = cinfo->sample_range_limit; + register int *Crrtab = cconvert->Cr_r_tab; + register int *Cbbtab = cconvert->Cb_b_tab; + register JLONG *Crgtab = cconvert->Cr_g_tab; + register JLONG *Cbgtab = cconvert->Cb_g_tab; SHIFT_TEMPS while (--num_rows >= 0) { @@ -59,8 +59,8 @@ ycc_rgb_convert_internal (j_decompress_ptr cinfo, /* Range-limiting is essential due to noise introduced by DCT losses. */ outptr[RGB_RED] = range_limit[y + Crrtab[cr]]; outptr[RGB_GREEN] = range_limit[y + - ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], - SCALEBITS))]; + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + SCALEBITS))]; outptr[RGB_BLUE] = range_limit[y + Cbbtab[cb]]; /* Set unused byte to 0xFF so it can be interpreted as an opaque */ /* alpha channel value */ @@ -81,9 +81,9 @@ ycc_rgb_convert_internal (j_decompress_ptr cinfo, INLINE LOCAL(void) -gray_rgb_convert_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +gray_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) { register JSAMPROW inptr, outptr; register JDIMENSION col; @@ -112,9 +112,9 @@ gray_rgb_convert_internal (j_decompress_ptr cinfo, INLINE LOCAL(void) -rgb_rgb_convert_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +rgb_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) { register JSAMPROW inptr0, inptr1, inptr2; register JSAMPROW outptr; diff --git a/jdcolor.c b/jdcolor.c index 05cbf4d..dc0e3b6 100644 --- a/jdcolor.c +++ b/jdcolor.c @@ -74,8 +74,8 @@ typedef my_color_deconverter *my_cconvert_ptr; */ #define SCALEBITS 16 /* speediest right-shift on some machines */ -#define ONE_HALF ((JLONG) 1 << (SCALEBITS-1)) -#define FIX(x) ((JLONG) ((x) * (1L<Y conversion and divide it up into * three parts, instead of doing three alloc_small requests. This lets us @@ -85,9 +85,9 @@ typedef my_color_deconverter *my_cconvert_ptr; */ #define R_Y_OFF 0 /* offset to R => Y section */ -#define G_Y_OFF (1*(MAXJSAMPLE+1)) /* offset to G => Y section */ -#define B_Y_OFF (2*(MAXJSAMPLE+1)) /* etc. */ -#define TABLE_SIZE (3*(MAXJSAMPLE+1)) +#define G_Y_OFF (1 * (MAXJSAMPLE + 1)) /* offset to G => Y section */ +#define B_Y_OFF (2 * (MAXJSAMPLE + 1)) /* etc. */ +#define TABLE_SIZE (3 * (MAXJSAMPLE + 1)) /* Include inline routines for colorspace extensions */ @@ -98,13 +98,13 @@ typedef my_color_deconverter *my_cconvert_ptr; #undef RGB_BLUE #undef RGB_PIXELSIZE -#define RGB_RED EXT_RGB_RED -#define RGB_GREEN EXT_RGB_GREEN -#define RGB_BLUE EXT_RGB_BLUE -#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -#define ycc_rgb_convert_internal ycc_extrgb_convert_internal -#define gray_rgb_convert_internal gray_extrgb_convert_internal -#define rgb_rgb_convert_internal rgb_extrgb_convert_internal +#define RGB_RED EXT_RGB_RED +#define RGB_GREEN EXT_RGB_GREEN +#define RGB_BLUE EXT_RGB_BLUE +#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +#define ycc_rgb_convert_internal ycc_extrgb_convert_internal +#define gray_rgb_convert_internal gray_extrgb_convert_internal +#define rgb_rgb_convert_internal rgb_extrgb_convert_internal #include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN @@ -114,14 +114,14 @@ typedef my_color_deconverter *my_cconvert_ptr; #undef gray_rgb_convert_internal #undef rgb_rgb_convert_internal -#define RGB_RED EXT_RGBX_RED -#define RGB_GREEN EXT_RGBX_GREEN -#define RGB_BLUE EXT_RGBX_BLUE -#define RGB_ALPHA 3 -#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -#define ycc_rgb_convert_internal ycc_extrgbx_convert_internal -#define gray_rgb_convert_internal gray_extrgbx_convert_internal -#define rgb_rgb_convert_internal rgb_extrgbx_convert_internal +#define RGB_RED EXT_RGBX_RED +#define RGB_GREEN EXT_RGBX_GREEN +#define RGB_BLUE EXT_RGBX_BLUE +#define RGB_ALPHA 3 +#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +#define ycc_rgb_convert_internal ycc_extrgbx_convert_internal +#define gray_rgb_convert_internal gray_extrgbx_convert_internal +#define rgb_rgb_convert_internal rgb_extrgbx_convert_internal #include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN @@ -132,13 +132,13 @@ typedef my_color_deconverter *my_cconvert_ptr; #undef gray_rgb_convert_internal #undef rgb_rgb_convert_internal -#define RGB_RED EXT_BGR_RED -#define RGB_GREEN EXT_BGR_GREEN -#define RGB_BLUE EXT_BGR_BLUE -#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -#define ycc_rgb_convert_internal ycc_extbgr_convert_internal -#define gray_rgb_convert_internal gray_extbgr_convert_internal -#define rgb_rgb_convert_internal rgb_extbgr_convert_internal +#define RGB_RED EXT_BGR_RED +#define RGB_GREEN EXT_BGR_GREEN +#define RGB_BLUE EXT_BGR_BLUE +#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +#define ycc_rgb_convert_internal ycc_extbgr_convert_internal +#define gray_rgb_convert_internal gray_extbgr_convert_internal +#define rgb_rgb_convert_internal rgb_extbgr_convert_internal #include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN @@ -148,14 +148,14 @@ typedef my_color_deconverter *my_cconvert_ptr; #undef gray_rgb_convert_internal #undef rgb_rgb_convert_internal -#define RGB_RED EXT_BGRX_RED -#define RGB_GREEN EXT_BGRX_GREEN -#define RGB_BLUE EXT_BGRX_BLUE -#define RGB_ALPHA 3 -#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -#define ycc_rgb_convert_internal ycc_extbgrx_convert_internal -#define gray_rgb_convert_internal gray_extbgrx_convert_internal -#define rgb_rgb_convert_internal rgb_extbgrx_convert_internal +#define RGB_RED EXT_BGRX_RED +#define RGB_GREEN EXT_BGRX_GREEN +#define RGB_BLUE EXT_BGRX_BLUE +#define RGB_ALPHA 3 +#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +#define ycc_rgb_convert_internal ycc_extbgrx_convert_internal +#define gray_rgb_convert_internal gray_extbgrx_convert_internal +#define rgb_rgb_convert_internal rgb_extbgrx_convert_internal #include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN @@ -166,14 +166,14 @@ typedef my_color_deconverter *my_cconvert_ptr; #undef gray_rgb_convert_internal #undef rgb_rgb_convert_internal -#define RGB_RED EXT_XBGR_RED -#define RGB_GREEN EXT_XBGR_GREEN -#define RGB_BLUE EXT_XBGR_BLUE -#define RGB_ALPHA 0 -#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -#define ycc_rgb_convert_internal ycc_extxbgr_convert_internal -#define gray_rgb_convert_internal gray_extxbgr_convert_internal -#define rgb_rgb_convert_internal rgb_extxbgr_convert_internal +#define RGB_RED EXT_XBGR_RED +#define RGB_GREEN EXT_XBGR_GREEN +#define RGB_BLUE EXT_XBGR_BLUE +#define RGB_ALPHA 0 +#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +#define ycc_rgb_convert_internal ycc_extxbgr_convert_internal +#define gray_rgb_convert_internal gray_extxbgr_convert_internal +#define rgb_rgb_convert_internal rgb_extxbgr_convert_internal #include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN @@ -184,14 +184,14 @@ typedef my_color_deconverter *my_cconvert_ptr; #undef gray_rgb_convert_internal #undef rgb_rgb_convert_internal -#define RGB_RED EXT_XRGB_RED -#define RGB_GREEN EXT_XRGB_GREEN -#define RGB_BLUE EXT_XRGB_BLUE -#define RGB_ALPHA 0 -#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -#define ycc_rgb_convert_internal ycc_extxrgb_convert_internal -#define gray_rgb_convert_internal gray_extxrgb_convert_internal -#define rgb_rgb_convert_internal rgb_extxrgb_convert_internal +#define RGB_RED EXT_XRGB_RED +#define RGB_GREEN EXT_XRGB_GREEN +#define RGB_BLUE EXT_XRGB_BLUE +#define RGB_ALPHA 0 +#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +#define ycc_rgb_convert_internal ycc_extxrgb_convert_internal +#define gray_rgb_convert_internal gray_extxrgb_convert_internal +#define rgb_rgb_convert_internal rgb_extxrgb_convert_internal #include "jdcolext.c" #undef RGB_RED #undef RGB_GREEN @@ -208,25 +208,25 @@ typedef my_color_deconverter *my_cconvert_ptr; */ LOCAL(void) -build_ycc_rgb_table (j_decompress_ptr cinfo) +build_ycc_rgb_table(j_decompress_ptr cinfo) { - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert; int i; JLONG x; SHIFT_TEMPS cconvert->Cr_r_tab = (int *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * sizeof(int)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + (MAXJSAMPLE + 1) * sizeof(int)); cconvert->Cb_b_tab = (int *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * sizeof(int)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + (MAXJSAMPLE + 1) * sizeof(int)); cconvert->Cr_g_tab = (JLONG *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * sizeof(JLONG)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + (MAXJSAMPLE + 1) * sizeof(JLONG)); cconvert->Cb_g_tab = (JLONG *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * sizeof(JLONG)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + (MAXJSAMPLE + 1) * sizeof(JLONG)); for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) { /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */ @@ -238,10 +238,10 @@ build_ycc_rgb_table (j_decompress_ptr cinfo) cconvert->Cb_b_tab[i] = (int) RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS); /* Cr=>G value is scaled-up -0.71414 * x */ - cconvert->Cr_g_tab[i] = (- FIX(0.71414)) * x; + cconvert->Cr_g_tab[i] = (-FIX(0.71414)) * x; /* Cb=>G value is scaled-up -0.34414 * x */ /* We also add in ONE_HALF so that need not do it in inner loop */ - cconvert->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF; + cconvert->Cb_g_tab[i] = (-FIX(0.34414)) * x + ONE_HALF; } } @@ -251,43 +251,42 @@ build_ycc_rgb_table (j_decompress_ptr cinfo) */ METHODDEF(void) -ycc_rgb_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) { switch (cinfo->out_color_space) { - case JCS_EXT_RGB: - ycc_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - ycc_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - case JCS_EXT_BGR: - ycc_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - ycc_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - ycc_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - ycc_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - default: - ycc_rgb_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; + case JCS_EXT_RGB: + ycc_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + ycc_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_BGR: + ycc_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + ycc_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + ycc_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + ycc_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + default: + ycc_rgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; } } @@ -300,21 +299,21 @@ ycc_rgb_convert (j_decompress_ptr cinfo, */ LOCAL(void) -build_rgb_y_table (j_decompress_ptr cinfo) +build_rgb_y_table(j_decompress_ptr cinfo) { - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert; JLONG *rgb_y_tab; JLONG i; /* Allocate and fill in the conversion tables. */ cconvert->rgb_y_tab = rgb_y_tab = (JLONG *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, (TABLE_SIZE * sizeof(JLONG))); for (i = 0; i <= MAXJSAMPLE; i++) { - rgb_y_tab[i+R_Y_OFF] = FIX(0.29900) * i; - rgb_y_tab[i+G_Y_OFF] = FIX(0.58700) * i; - rgb_y_tab[i+B_Y_OFF] = FIX(0.11400) * i + ONE_HALF; + rgb_y_tab[i + R_Y_OFF] = FIX(0.29900) * i; + rgb_y_tab[i + G_Y_OFF] = FIX(0.58700) * i; + rgb_y_tab[i + B_Y_OFF] = FIX(0.11400) * i + ONE_HALF; } } @@ -324,11 +323,10 @@ build_rgb_y_table (j_decompress_ptr cinfo) */ METHODDEF(void) -rgb_gray_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +rgb_gray_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) { - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert; register int r, g, b; register JLONG *ctab = cconvert->rgb_y_tab; register JSAMPROW outptr; @@ -347,9 +345,8 @@ rgb_gray_convert (j_decompress_ptr cinfo, g = GETJSAMPLE(inptr1[col]); b = GETJSAMPLE(inptr2[col]); /* Y */ - outptr[col] = (JSAMPLE) - ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) - >> SCALEBITS); + outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] + + ctab[b + B_Y_OFF]) >> SCALEBITS); } } } @@ -361,9 +358,8 @@ rgb_gray_convert (j_decompress_ptr cinfo, */ METHODDEF(void) -null_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +null_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) { register JSAMPROW inptr, inptr0, inptr1, inptr2, inptr3, outptr; register JDIMENSION col; @@ -423,12 +419,11 @@ null_convert (j_decompress_ptr cinfo, */ METHODDEF(void) -grayscale_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +grayscale_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) { - jcopy_sample_rows(input_buf[0], (int) input_row, output_buf, 0, - num_rows, cinfo->output_width); + jcopy_sample_rows(input_buf[0], (int)input_row, output_buf, 0, num_rows, + cinfo->output_width); } @@ -437,43 +432,42 @@ grayscale_convert (j_decompress_ptr cinfo, */ METHODDEF(void) -gray_rgb_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +gray_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) { switch (cinfo->out_color_space) { - case JCS_EXT_RGB: - gray_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - gray_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - case JCS_EXT_BGR: - gray_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - gray_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - gray_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - gray_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - default: - gray_rgb_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; + case JCS_EXT_RGB: + gray_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + gray_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_BGR: + gray_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + gray_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + gray_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + gray_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + default: + gray_rgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; } } @@ -483,43 +477,42 @@ gray_rgb_convert (j_decompress_ptr cinfo, */ METHODDEF(void) -rgb_rgb_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +rgb_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) { switch (cinfo->out_color_space) { - case JCS_EXT_RGB: - rgb_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - rgb_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - case JCS_EXT_BGR: - rgb_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - rgb_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - rgb_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - rgb_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; - default: - rgb_rgb_convert_internal(cinfo, input_buf, input_row, output_buf, - num_rows); - break; + case JCS_EXT_RGB: + rgb_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + rgb_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_BGR: + rgb_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + rgb_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + rgb_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + rgb_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; + default: + rgb_rgb_convert_internal(cinfo, input_buf, input_row, output_buf, + num_rows); + break; } } @@ -532,11 +525,10 @@ rgb_rgb_convert (j_decompress_ptr cinfo, */ METHODDEF(void) -ycck_cmyk_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) { - my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; + my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert; register int y, cb, cr; register JSAMPROW outptr; register JSAMPROW inptr0, inptr1, inptr2, inptr3; @@ -564,7 +556,7 @@ ycck_cmyk_convert (j_decompress_ptr cinfo, /* Range-limiting is essential due to noise introduced by DCT losses. */ outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])]; /* red */ outptr[1] = range_limit[MAXJSAMPLE - (y + /* green */ - ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS)))]; outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])]; /* blue */ /* K passes through unchanged */ @@ -579,16 +571,16 @@ ycck_cmyk_convert (j_decompress_ptr cinfo, * RGB565 conversion */ -#define PACK_SHORT_565_LE(r, g, b) ((((r) << 8) & 0xF800) | \ - (((g) << 3) & 0x7E0) | ((b) >> 3)) -#define PACK_SHORT_565_BE(r, g, b) (((r) & 0xF8) | ((g) >> 5) | \ - (((g) << 11) & 0xE000) | \ - (((b) << 5) & 0x1F00)) +#define PACK_SHORT_565_LE(r, g, b) ((((r) << 8) & 0xF800) | \ + (((g) << 3) & 0x7E0) | ((b) >> 3)) +#define PACK_SHORT_565_BE(r, g, b) (((r) & 0xF8) | ((g) >> 5) | \ + (((g) << 11) & 0xE000) | \ + (((b) << 5) & 0x1F00)) -#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l) -#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r) +#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l) +#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r) -#define PACK_NEED_ALIGNMENT(ptr) (((size_t)(ptr)) & 3) +#define PACK_NEED_ALIGNMENT(ptr) (((size_t)(ptr)) & 3) #define WRITE_TWO_ALIGNED_PIXELS(addr, pixels) ((*(int *)(addr)) = pixels) @@ -600,7 +592,7 @@ ycck_cmyk_convert (j_decompress_ptr cinfo, /* Declarations for ordered dithering * * We use a 4x4 ordered dither array packed into 32 bits. This array is - * sufficent for dithering RGB888 to RGB565. + * sufficient for dithering RGB888 to RGB565. */ #define DITHER_MASK 0x3 @@ -624,14 +616,14 @@ static INLINE boolean is_big_endian(void) /* Include inline routines for RGB565 conversion */ -#define PACK_SHORT_565 PACK_SHORT_565_LE -#define PACK_TWO_PIXELS PACK_TWO_PIXELS_LE -#define ycc_rgb565_convert_internal ycc_rgb565_convert_le -#define ycc_rgb565D_convert_internal ycc_rgb565D_convert_le -#define rgb_rgb565_convert_internal rgb_rgb565_convert_le -#define rgb_rgb565D_convert_internal rgb_rgb565D_convert_le -#define gray_rgb565_convert_internal gray_rgb565_convert_le -#define gray_rgb565D_convert_internal gray_rgb565D_convert_le +#define PACK_SHORT_565 PACK_SHORT_565_LE +#define PACK_TWO_PIXELS PACK_TWO_PIXELS_LE +#define ycc_rgb565_convert_internal ycc_rgb565_convert_le +#define ycc_rgb565D_convert_internal ycc_rgb565D_convert_le +#define rgb_rgb565_convert_internal rgb_rgb565_convert_le +#define rgb_rgb565D_convert_internal rgb_rgb565D_convert_le +#define gray_rgb565_convert_internal gray_rgb565_convert_le +#define gray_rgb565D_convert_internal gray_rgb565D_convert_le #include "jdcol565.c" #undef PACK_SHORT_565 #undef PACK_TWO_PIXELS @@ -642,14 +634,14 @@ static INLINE boolean is_big_endian(void) #undef gray_rgb565_convert_internal #undef gray_rgb565D_convert_internal -#define PACK_SHORT_565 PACK_SHORT_565_BE -#define PACK_TWO_PIXELS PACK_TWO_PIXELS_BE -#define ycc_rgb565_convert_internal ycc_rgb565_convert_be -#define ycc_rgb565D_convert_internal ycc_rgb565D_convert_be -#define rgb_rgb565_convert_internal rgb_rgb565_convert_be -#define rgb_rgb565D_convert_internal rgb_rgb565D_convert_be -#define gray_rgb565_convert_internal gray_rgb565_convert_be -#define gray_rgb565D_convert_internal gray_rgb565D_convert_be +#define PACK_SHORT_565 PACK_SHORT_565_BE +#define PACK_TWO_PIXELS PACK_TWO_PIXELS_BE +#define ycc_rgb565_convert_internal ycc_rgb565_convert_be +#define ycc_rgb565D_convert_internal ycc_rgb565D_convert_be +#define rgb_rgb565_convert_internal rgb_rgb565_convert_be +#define rgb_rgb565D_convert_internal rgb_rgb565D_convert_be +#define gray_rgb565_convert_internal gray_rgb565_convert_be +#define gray_rgb565D_convert_internal gray_rgb565D_convert_be #include "jdcol565.c" #undef PACK_SHORT_565 #undef PACK_TWO_PIXELS @@ -662,9 +654,8 @@ static INLINE boolean is_big_endian(void) METHODDEF(void) -ycc_rgb565_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) { if (is_big_endian()) ycc_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); @@ -674,9 +665,8 @@ ycc_rgb565_convert (j_decompress_ptr cinfo, METHODDEF(void) -ycc_rgb565D_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +ycc_rgb565D_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) { if (is_big_endian()) ycc_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); @@ -686,9 +676,8 @@ ycc_rgb565D_convert (j_decompress_ptr cinfo, METHODDEF(void) -rgb_rgb565_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +rgb_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) { if (is_big_endian()) rgb_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); @@ -698,9 +687,8 @@ rgb_rgb565_convert (j_decompress_ptr cinfo, METHODDEF(void) -rgb_rgb565D_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +rgb_rgb565D_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) { if (is_big_endian()) rgb_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); @@ -710,9 +698,8 @@ rgb_rgb565D_convert (j_decompress_ptr cinfo, METHODDEF(void) -gray_rgb565_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +gray_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) { if (is_big_endian()) gray_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); @@ -722,9 +709,8 @@ gray_rgb565_convert (j_decompress_ptr cinfo, METHODDEF(void) -gray_rgb565D_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +gray_rgb565D_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) { if (is_big_endian()) gray_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows); @@ -738,7 +724,7 @@ gray_rgb565D_convert (j_decompress_ptr cinfo, */ METHODDEF(void) -start_pass_dcolor (j_decompress_ptr cinfo) +start_pass_dcolor(j_decompress_ptr cinfo) { /* no work needed */ } @@ -749,15 +735,15 @@ start_pass_dcolor (j_decompress_ptr cinfo) */ GLOBAL(void) -jinit_color_deconverter (j_decompress_ptr cinfo) +jinit_color_deconverter(j_decompress_ptr cinfo) { my_cconvert_ptr cconvert; int ci; cconvert = (my_cconvert_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_color_deconverter)); - cinfo->cconvert = (struct jpeg_color_deconverter *) cconvert; + cinfo->cconvert = (struct jpeg_color_deconverter *)cconvert; cconvert->pub.start_pass = start_pass_dcolor; /* Make sure num_components agrees with jpeg_color_space */ @@ -843,11 +829,11 @@ jinit_color_deconverter (j_decompress_ptr cinfo) cinfo->out_color_components = 3; if (cinfo->dither_mode == JDITHER_NONE) { if (cinfo->jpeg_color_space == JCS_YCbCr) { - if (jsimd_can_ycc_rgb565()) - cconvert->pub.color_convert = jsimd_ycc_rgb565_convert; - else { - cconvert->pub.color_convert = ycc_rgb565_convert; - build_ycc_rgb_table(cinfo); + if (jsimd_can_ycc_rgb565()) + cconvert->pub.color_convert = jsimd_ycc_rgb565_convert; + else { + cconvert->pub.color_convert = ycc_rgb565_convert; + build_ycc_rgb_table(cinfo); } } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) { cconvert->pub.color_convert = gray_rgb565_convert; diff --git a/jdct.h b/jdct.h index faf8e1c..66d1718 100644 --- a/jdct.h +++ b/jdct.h @@ -36,7 +36,7 @@ typedef int DCTELEM; /* 16 or 32 bits is fine */ typedef unsigned int UDCTELEM; typedef unsigned long long UDCTELEM2; #else -typedef short DCTELEM; /* prefer 16 bit with SIMD for parellelism */ +typedef short DCTELEM; /* prefer 16 bit with SIMD for parellelism */ typedef unsigned short UDCTELEM; typedef unsigned int UDCTELEM2; #endif @@ -63,15 +63,15 @@ typedef unsigned long long UDCTELEM2; * Each IDCT routine has its own ideas about the best dct_table element type. */ -typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */ +typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */ #if BITS_IN_JSAMPLE == 8 -typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */ -#define IFAST_SCALE_BITS 2 /* fractional bits in scale factors */ +typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */ +#define IFAST_SCALE_BITS 2 /* fractional bits in scale factors */ #else -typedef JLONG IFAST_MULT_TYPE; /* need 32 bits for scaled quantizers */ -#define IFAST_SCALE_BITS 13 /* fractional bits in scale factors */ +typedef JLONG IFAST_MULT_TYPE; /* need 32 bits for scaled quantizers */ +#define IFAST_SCALE_BITS 13 /* fractional bits in scale factors */ #endif -typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */ +typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */ /* @@ -90,64 +90,64 @@ typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */ /* Extern declarations for the forward and inverse DCT routines. */ -EXTERN(void) jpeg_fdct_islow (DCTELEM *data); -EXTERN(void) jpeg_fdct_ifast (DCTELEM *data); -EXTERN(void) jpeg_fdct_float (FAST_FLOAT *data); - -EXTERN(void) jpeg_idct_islow - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_ifast - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_float - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_7x7 - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_6x6 - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_5x5 - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_4x4 - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_3x3 - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_2x2 - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_1x1 - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_9x9 - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_10x10 - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_11x11 - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_12x12 - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_13x13 - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_14x14 - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_15x15 - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); -EXTERN(void) jpeg_idct_16x16 - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_fdct_islow(DCTELEM *data); +EXTERN(void) jpeg_fdct_ifast(DCTELEM *data); +EXTERN(void) jpeg_fdct_float(FAST_FLOAT *data); + +EXTERN(void) jpeg_idct_islow(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_ifast(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_float(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_7x7(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_6x6(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_5x5(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_4x4(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_3x3(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_2x2(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_1x1(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_9x9(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_10x10(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_11x11(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_12x12(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_13x13(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_14x14(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_15x15(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jpeg_idct_16x16(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); /* @@ -160,22 +160,22 @@ EXTERN(void) jpeg_idct_16x16 * and may differ from one module to the next. */ -#define ONE ((JLONG) 1) -#define CONST_SCALE (ONE << CONST_BITS) +#define ONE ((JLONG)1) +#define CONST_SCALE (ONE << CONST_BITS) /* Convert a positive real constant to an integer scaled by CONST_SCALE. * Caution: some C compilers fail to reduce "FIX(constant)" at compile time, * thus causing a lot of useless floating-point operations at run time. */ -#define FIX(x) ((JLONG) ((x) * CONST_SCALE + 0.5)) +#define FIX(x) ((JLONG)((x) * CONST_SCALE + 0.5)) /* Descale and correctly round a JLONG value that's scaled by N bits. * We assume RIGHT_SHIFT rounds towards minus infinity, so adding * the fudge factor is correct for either sign of X. */ -#define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) +#define DESCALE(x, n) RIGHT_SHIFT((x) + (ONE << ((n) - 1)), n) /* Multiply a JLONG variable by a JLONG constant to yield a JLONG result. * This macro is used only when the two inputs will actually be no more than @@ -187,22 +187,22 @@ EXTERN(void) jpeg_idct_16x16 */ #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ -#define MULTIPLY16C16(var,const) (((INT16) (var)) * ((INT16) (const))) +#define MULTIPLY16C16(var, const) (((INT16)(var)) * ((INT16)(const))) #endif #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ -#define MULTIPLY16C16(var,const) (((INT16) (var)) * ((JLONG) (const))) +#define MULTIPLY16C16(var, const) (((INT16)(var)) * ((JLONG)(const))) #endif #ifndef MULTIPLY16C16 /* default definition */ -#define MULTIPLY16C16(var,const) ((var) * (const)) +#define MULTIPLY16C16(var, const) ((var) * (const)) #endif /* Same except both inputs are variables. */ #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ -#define MULTIPLY16V16(var1,var2) (((INT16) (var1)) * ((INT16) (var2))) +#define MULTIPLY16V16(var1, var2) (((INT16)(var1)) * ((INT16)(var2))) #endif #ifndef MULTIPLY16V16 /* default definition */ -#define MULTIPLY16V16(var1,var2) ((var1) * (var2)) +#define MULTIPLY16V16(var1, var2) ((var1) * (var2)) #endif diff --git a/jddctmgr.c b/jddctmgr.c index 3a5ba7e..266f446 100644 --- a/jddctmgr.c +++ b/jddctmgr.c @@ -94,9 +94,9 @@ typedef union { */ METHODDEF(void) -start_pass (j_decompress_ptr cinfo) +start_pass(j_decompress_ptr cinfo) { - my_idct_ptr idct = (my_idct_ptr) cinfo->idct; + my_idct_ptr idct = (my_idct_ptr)cinfo->idct; int ci, i; jpeg_component_info *compptr; int method = 0; @@ -233,7 +233,7 @@ start_pass (j_decompress_ptr cinfo) * multiplier table all-zero; we'll be reading zeroes from the * coefficient controller's buffer anyway. */ - if (! compptr->component_needed || idct->cur_method[ci] == method) + if (!compptr->component_needed || idct->cur_method[ci] == method) continue; qtbl = compptr->quant_table; if (qtbl == NULL) /* happens if no data yet for component */ @@ -246,9 +246,9 @@ start_pass (j_decompress_ptr cinfo) /* For LL&M IDCT method, multipliers are equal to raw quantization * coefficients, but are stored as ints to ensure access efficiency. */ - ISLOW_MULT_TYPE *ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table; + ISLOW_MULT_TYPE *ismtbl = (ISLOW_MULT_TYPE *)compptr->dct_table; for (i = 0; i < DCTSIZE2; i++) { - ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i]; + ismtbl[i] = (ISLOW_MULT_TYPE)qtbl->quantval[i]; } } break; @@ -263,8 +263,8 @@ start_pass (j_decompress_ptr cinfo) * For integer operation, the multiplier table is to be scaled by * IFAST_SCALE_BITS. */ - IFAST_MULT_TYPE *ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table; -#define CONST_BITS 14 + IFAST_MULT_TYPE *ifmtbl = (IFAST_MULT_TYPE *)compptr->dct_table; +#define CONST_BITS 14 static const INT16 aanscales[DCTSIZE2] = { /* precomputed values scaled up by 14 bits */ 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, @@ -280,9 +280,9 @@ start_pass (j_decompress_ptr cinfo) for (i = 0; i < DCTSIZE2; i++) { ifmtbl[i] = (IFAST_MULT_TYPE) - DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i], - (JLONG) aanscales[i]), - CONST_BITS-IFAST_SCALE_BITS); + DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i], + (JLONG)aanscales[i]), + CONST_BITS - IFAST_SCALE_BITS); } } break; @@ -295,7 +295,7 @@ start_pass (j_decompress_ptr cinfo) * scalefactor[0] = 1 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 */ - FLOAT_MULT_TYPE *fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table; + FLOAT_MULT_TYPE *fmtbl = (FLOAT_MULT_TYPE *)compptr->dct_table; int row, col; static const double aanscalefactor[DCTSIZE] = { 1.0, 1.387039845, 1.306562965, 1.175875602, @@ -306,7 +306,7 @@ start_pass (j_decompress_ptr cinfo) for (row = 0; row < DCTSIZE; row++) { for (col = 0; col < DCTSIZE; col++) { fmtbl[i] = (FLOAT_MULT_TYPE) - ((double) qtbl->quantval[i] * + ((double)qtbl->quantval[i] * aanscalefactor[row] * aanscalefactor[col]); i++; } @@ -327,23 +327,23 @@ start_pass (j_decompress_ptr cinfo) */ GLOBAL(void) -jinit_inverse_dct (j_decompress_ptr cinfo) +jinit_inverse_dct(j_decompress_ptr cinfo) { my_idct_ptr idct; int ci; jpeg_component_info *compptr; idct = (my_idct_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_idct_controller)); - cinfo->idct = (struct jpeg_inverse_dct *) idct; + cinfo->idct = (struct jpeg_inverse_dct *)idct; idct->pub.start_pass = start_pass; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { /* Allocate and pre-zero a multiplier table for each component */ compptr->dct_table = - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(multiplier_table)); MEMZERO(compptr->dct_table, sizeof(multiplier_table)); /* Mark multiplier table not yet set up for any method */ diff --git a/jdhuff.c b/jdhuff.c index bb2b848..95f38e5 100644 --- a/jdhuff.c +++ b/jdhuff.c @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2009-2011, 2016, D. R. Commander. + * Copyright (C) 2009-2011, 2016, 2018, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -15,6 +15,9 @@ * up to the start of the current MCU. To do this, we copy state variables * into local working storage, and update them back to the permanent * storage only upon successful completion of an MCU. + * + * NOTE: All referenced figures are from + * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994. */ #define JPEG_INTERNALS @@ -42,14 +45,14 @@ typedef struct { */ #ifndef NO_STRUCT_ASSIGN -#define ASSIGN_STATE(dest,src) ((dest) = (src)) +#define ASSIGN_STATE(dest, src) ((dest) = (src)) #else #if MAX_COMPS_IN_SCAN == 4 -#define ASSIGN_STATE(dest,src) \ - ((dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) +#define ASSIGN_STATE(dest, src) \ + ((dest).last_dc_val[0] = (src).last_dc_val[0], \ + (dest).last_dc_val[1] = (src).last_dc_val[1], \ + (dest).last_dc_val[2] = (src).last_dc_val[2], \ + (dest).last_dc_val[3] = (src).last_dc_val[3]) #endif #endif @@ -88,9 +91,9 @@ typedef huff_entropy_decoder *huff_entropy_ptr; */ METHODDEF(void) -start_pass_huff_decoder (j_decompress_ptr cinfo) +start_pass_huff_decoder(j_decompress_ptr cinfo) { - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy; int ci, blkn, dctbl, actbl; d_derived_tbl **pdtbl; jpeg_component_info *compptr; @@ -99,7 +102,7 @@ start_pass_huff_decoder (j_decompress_ptr cinfo) * This ought to be an error condition, but we make it a warning because * there are some baseline files out there with all zeroes in these bytes. */ - if (cinfo->Ss != 0 || cinfo->Se != DCTSIZE2-1 || + if (cinfo->Ss != 0 || cinfo->Se != DCTSIZE2 - 1 || cinfo->Ah != 0 || cinfo->Al != 0) WARNMS(cinfo, JWRN_NOT_SEQUENTIAL); @@ -152,8 +155,8 @@ start_pass_huff_decoder (j_decompress_ptr cinfo) */ GLOBAL(void) -jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno, - d_derived_tbl **pdtbl) +jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC, int tblno, + d_derived_tbl **pdtbl) { JHUFF_TBL *htbl; d_derived_tbl *dtbl; @@ -178,7 +181,7 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno, /* Allocate a workspace if we haven't already done so. */ if (*pdtbl == NULL) *pdtbl = (d_derived_tbl *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(d_derived_tbl)); dtbl = *pdtbl; dtbl->pub = htbl; /* fill in back link */ @@ -187,11 +190,11 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno, p = 0; for (l = 1; l <= 16; l++) { - i = (int) htbl->bits[l]; + i = (int)htbl->bits[l]; if (i < 0 || p + i > 256) /* protect against table overrun */ ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); while (i--) - huffsize[p++] = (char) l; + huffsize[p++] = (char)l; } huffsize[p] = 0; numsymbols = p; @@ -203,14 +206,14 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno, si = huffsize[0]; p = 0; while (huffsize[p]) { - while (((int) huffsize[p]) == si) { + while (((int)huffsize[p]) == si) { huffcode[p++] = code; code++; } /* code is now 1 more than the last code used for codelength si; but * it must still fit in si bits, since no code is allowed to be all ones. */ - if (((JLONG) code) >= (((JLONG) 1) << si)) + if (((JLONG)code) >= (((JLONG)1) << si)) ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); code <<= 1; si++; @@ -224,9 +227,9 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno, /* valoffset[l] = huffval[] index of 1st symbol of code length l, * minus the minimum code of length l */ - dtbl->valoffset[l] = (JLONG) p - (JLONG) huffcode[p]; + dtbl->valoffset[l] = (JLONG)p - (JLONG)huffcode[p]; p += htbl->bits[l]; - dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */ + dtbl->maxcode[l] = huffcode[p - 1]; /* maximum code of length l */ } else { dtbl->maxcode[l] = -1; /* -1 if no codes of this length */ } @@ -241,16 +244,16 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno, * with that code. */ - for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++) - dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD; + for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++) + dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD; p = 0; for (l = 1; l <= HUFF_LOOKAHEAD; l++) { - for (i = 1; i <= (int) htbl->bits[l]; i++, p++) { + for (i = 1; i <= (int)htbl->bits[l]; i++, p++) { /* l = current code's length, p = its index in huffcode[] & huffval[]. */ /* Generate left-justified code followed by all possible bit sequences */ - lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l); - for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) { + lookbits = huffcode[p] << (HUFF_LOOKAHEAD - l); + for (ctr = 1 << (HUFF_LOOKAHEAD - l); ctr > 0; ctr--) { dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p]; lookbits++; } @@ -291,14 +294,14 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno, #ifdef SLOW_SHIFT_32 #define MIN_GET_BITS 15 /* minimum allowable value */ #else -#define MIN_GET_BITS (BIT_BUF_SIZE-7) +#define MIN_GET_BITS (BIT_BUF_SIZE - 7) #endif GLOBAL(boolean) -jpeg_fill_bit_buffer (bitread_working_state *state, - register bit_buf_type get_buffer, register int bits_left, - int nbits) +jpeg_fill_bit_buffer(bitread_working_state *state, + register bit_buf_type get_buffer, register int bits_left, + int nbits) /* Load up the bit buffer to a depth of at least nbits */ { /* Copy heavily used state fields into locals (hopefully registers) */ @@ -316,7 +319,7 @@ jpeg_fill_bit_buffer (bitread_working_state *state, /* Attempt to read a byte */ if (bytes_in_buffer == 0) { - if (! (*cinfo->src->fill_input_buffer) (cinfo)) + if (!(*cinfo->src->fill_input_buffer) (cinfo)) return FALSE; next_input_byte = cinfo->src->next_input_byte; bytes_in_buffer = cinfo->src->bytes_in_buffer; @@ -333,7 +336,7 @@ jpeg_fill_bit_buffer (bitread_working_state *state, */ do { if (bytes_in_buffer == 0) { - if (! (*cinfo->src->fill_input_buffer) (cinfo)) + if (!(*cinfo->src->fill_input_buffer) (cinfo)) return FALSE; next_input_byte = cinfo->src->next_input_byte; bytes_in_buffer = cinfo->src->bytes_in_buffer; @@ -365,7 +368,7 @@ jpeg_fill_bit_buffer (bitread_working_state *state, bits_left += 8; } /* end while */ } else { - no_more_bytes: +no_more_bytes: /* We get here if we've read the marker that terminates the compressed * data segment. There should be enough bits in the buffer register * to satisfy the request; if so, no problem. @@ -376,7 +379,7 @@ jpeg_fill_bit_buffer (bitread_working_state *state, * We use a nonvolatile flag to ensure that only one warning message * appears per data segment. */ - if (! cinfo->entropy->insufficient_data) { + if (!cinfo->entropy->insufficient_data) { WARNMS(cinfo, JWRN_HIT_MARKER); cinfo->entropy->insufficient_data = TRUE; } @@ -400,8 +403,7 @@ jpeg_fill_bit_buffer (bitread_working_state *state, handle markers. We have to hand off any blocks with markers to the slower routines. */ -#define GET_BYTE \ -{ \ +#define GET_BYTE { \ register int c0, c1; \ c0 = GETJOCTET(*buffer++); \ c1 = GETJOCTET(*buffer); \ @@ -421,7 +423,7 @@ jpeg_fill_bit_buffer (bitread_working_state *state, } \ } -#if SIZEOF_SIZE_T==8 || defined(_WIN64) +#if SIZEOF_SIZE_T == 8 || defined(_WIN64) /* Pre-fetch 48 bytes, because the holding register is 64-bit */ #define FILL_BIT_BUFFER_FAST \ @@ -446,9 +448,9 @@ jpeg_fill_bit_buffer (bitread_working_state *state, */ GLOBAL(int) -jpeg_huff_decode (bitread_working_state *state, - register bit_buf_type get_buffer, register int bits_left, - d_derived_tbl *htbl, int min_bits) +jpeg_huff_decode(bitread_working_state *state, + register bit_buf_type get_buffer, register int bits_left, + d_derived_tbl *htbl, int min_bits) { register int l = min_bits; register JLONG code; @@ -460,7 +462,7 @@ jpeg_huff_decode (bitread_working_state *state, code = GET_BITS(l); /* Collect the rest of the Huffman code one bit at a time. */ - /* This is per Figure F.16 in the JPEG spec. */ + /* This is per Figure F.16. */ while (code > htbl->maxcode[l]) { code <<= 1; @@ -480,7 +482,7 @@ jpeg_huff_decode (bitread_working_state *state, return 0; /* fake a zero as the safest result */ } - return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ]; + return htbl->pub->huffval[(int)(code + htbl->valoffset[l])]; } @@ -492,22 +494,26 @@ jpeg_huff_decode (bitread_working_state *state, #define AVOID_TABLES #ifdef AVOID_TABLES -#define NEG_1 ((unsigned int)-1) -#define HUFF_EXTEND(x,s) ((x) + ((((x) - (1<<((s)-1))) >> 31) & (((NEG_1)<<(s)) + 1))) +#define NEG_1 ((unsigned int)-1) +#define HUFF_EXTEND(x, s) \ + ((x) + ((((x) - (1 << ((s) - 1))) >> 31) & (((NEG_1) << (s)) + 1))) #else -#define HUFF_EXTEND(x,s) ((x) < extend_test[s] ? (x) + extend_offset[s] : (x)) +#define HUFF_EXTEND(x, s) \ + ((x) < extend_test[s] ? (x) + extend_offset[s] : (x)) -static const int extend_test[16] = /* entry n is 2**(n-1) */ - { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, - 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 }; +static const int extend_test[16] = { /* entry n is 2**(n-1) */ + 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, + 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 +}; -static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */ - { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, - ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, - ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, - ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 }; +static const int extend_offset[16] = { /* entry n is (-1 << n) + 1 */ + 0, ((-1) << 1) + 1, ((-1) << 2) + 1, ((-1) << 3) + 1, ((-1) << 4) + 1, + ((-1) << 5) + 1, ((-1) << 6) + 1, ((-1) << 7) + 1, ((-1) << 8) + 1, + ((-1) << 9) + 1, ((-1) << 10) + 1, ((-1) << 11) + 1, ((-1) << 12) + 1, + ((-1) << 13) + 1, ((-1) << 14) + 1, ((-1) << 15) + 1 +}; #endif /* AVOID_TABLES */ @@ -518,9 +524,9 @@ static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */ */ LOCAL(boolean) -process_restart (j_decompress_ptr cinfo) +process_restart(j_decompress_ptr cinfo) { - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy; int ci; /* Throw away any unused bits remaining in bit buffer; */ @@ -529,7 +535,7 @@ process_restart (j_decompress_ptr cinfo) entropy->bitstate.bits_left = 0; /* Advance past the RSTn marker */ - if (! (*cinfo->marker->read_restart_marker) (cinfo)) + if (!(*cinfo->marker->read_restart_marker) (cinfo)) return FALSE; /* Re-initialize DC predictions to 0 */ @@ -552,16 +558,16 @@ process_restart (j_decompress_ptr cinfo) LOCAL(boolean) -decode_mcu_slow (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) { - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy; BITREAD_STATE_VARS; int blkn; savable_state state; /* Outer loop handles each block in the MCU */ /* Load up working state */ - BITREAD_LOAD_STATE(cinfo,entropy->bitstate); + BITREAD_LOAD_STATE(cinfo, entropy->bitstate); ASSIGN_STATE(state, entropy->saved); for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { @@ -587,7 +593,7 @@ decode_mcu_slow (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) state.last_dc_val[ci] = s; if (block) { /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */ - (*block)[0] = (JCOEF) s; + (*block)[0] = (JCOEF)s; } } @@ -610,7 +616,7 @@ decode_mcu_slow (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) * Note: the extra entries in jpeg_natural_order[] will save us * if k >= DCTSIZE2, which could happen if the data is corrupted. */ - (*block)[jpeg_natural_order[k]] = (JCOEF) s; + (*block)[jpeg_natural_order[k]] = (JCOEF)s; } else { if (r != 15) break; @@ -642,16 +648,16 @@ decode_mcu_slow (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) } /* Completed MCU, so update state */ - BITREAD_SAVE_STATE(cinfo,entropy->bitstate); + BITREAD_SAVE_STATE(cinfo, entropy->bitstate); ASSIGN_STATE(entropy->saved, state); return TRUE; } LOCAL(boolean) -decode_mcu_fast (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) { - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy; BITREAD_STATE_VARS; JOCTET *buffer; int blkn; @@ -659,8 +665,8 @@ decode_mcu_fast (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Outer loop handles each block in the MCU */ /* Load up working state */ - BITREAD_LOAD_STATE(cinfo,entropy->bitstate); - buffer = (JOCTET *) br_state.next_input_byte; + BITREAD_LOAD_STATE(cinfo, entropy->bitstate); + buffer = (JOCTET *)br_state.next_input_byte; ASSIGN_STATE(state, entropy->saved); for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { @@ -681,7 +687,7 @@ decode_mcu_fast (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) s += state.last_dc_val[ci]; state.last_dc_val[ci] = s; if (block) - (*block)[0] = (JCOEF) s; + (*block)[0] = (JCOEF)s; } if (entropy->ac_needed[blkn] && block) { @@ -696,7 +702,7 @@ decode_mcu_fast (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) FILL_BIT_BUFFER_FAST r = GET_BITS(s); s = HUFF_EXTEND(r, s); - (*block)[jpeg_natural_order[k]] = (JCOEF) s; + (*block)[jpeg_natural_order[k]] = (JCOEF)s; } else { if (r != 15) break; k += 15; @@ -729,7 +735,7 @@ decode_mcu_fast (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte); br_state.next_input_byte = buffer; - BITREAD_SAVE_STATE(cinfo,entropy->bitstate); + BITREAD_SAVE_STATE(cinfo, entropy->bitstate); ASSIGN_STATE(entropy->saved, state); return TRUE; } @@ -750,36 +756,35 @@ decode_mcu_fast (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) * this module, since we'll just re-assign them on the next call.) */ -#define BUFSIZE (DCTSIZE2 * 8) +#define BUFSIZE (DCTSIZE2 * 8) METHODDEF(boolean) -decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +decode_mcu(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) { - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy; int usefast = 1; /* Process restart marker if needed; may have to suspend */ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) - if (! process_restart(cinfo)) + if (!process_restart(cinfo)) return FALSE; usefast = 0; } - if (cinfo->src->bytes_in_buffer < BUFSIZE * (size_t)cinfo->blocks_in_MCU - || cinfo->unread_marker != 0) + if (cinfo->src->bytes_in_buffer < BUFSIZE * (size_t)cinfo->blocks_in_MCU || + cinfo->unread_marker != 0) usefast = 0; /* If we've run out of data, just leave the MCU set to zeroes. * This way, we return uniform gray for the remainder of the segment. */ - if (! entropy->pub.insufficient_data) { + if (!entropy->pub.insufficient_data) { if (usefast) { if (!decode_mcu_fast(cinfo, MCU_data)) goto use_slow; - } - else { - use_slow: + } else { +use_slow: if (!decode_mcu_slow(cinfo, MCU_data)) return FALSE; } @@ -797,7 +802,7 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) */ GLOBAL(void) -jinit_huff_decoder (j_decompress_ptr cinfo) +jinit_huff_decoder(j_decompress_ptr cinfo) { huff_entropy_ptr entropy; int i; @@ -806,12 +811,12 @@ jinit_huff_decoder (j_decompress_ptr cinfo) are the default tables. Thus, if the tables are not set by the time the Huffman decoder is initialized (usually within the body of jpeg_start_decompress()), we set them to default values. */ - std_huff_tables((j_common_ptr) cinfo); + std_huff_tables((j_common_ptr)cinfo); entropy = (huff_entropy_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(huff_entropy_decoder)); - cinfo->entropy = (struct jpeg_entropy_decoder *) entropy; + cinfo->entropy = (struct jpeg_entropy_decoder *)entropy; entropy->pub.start_pass = start_pass_huff_decoder; entropy->pub.decode_mcu = decode_mcu; diff --git a/jdhuff.h b/jdhuff.h index 87d4465..6a8d90f 100644 --- a/jdhuff.h +++ b/jdhuff.h @@ -43,13 +43,12 @@ typedef struct { * if too long. The next 8 bits of each entry contain the * symbol. */ - int lookup[1<src->next_input_byte; \ - br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \ - get_buffer = permstate.get_buffer; \ - bits_left = permstate.bits_left; - -#define BITREAD_SAVE_STATE(cinfop,permstate) \ - cinfop->src->next_input_byte = br_state.next_input_byte; \ - cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \ - permstate.get_buffer = get_buffer; \ - permstate.bits_left = bits_left +#define BITREAD_STATE_VARS \ + register bit_buf_type get_buffer; \ + register int bits_left; \ + bitread_working_state br_state + +#define BITREAD_LOAD_STATE(cinfop, permstate) \ + br_state.cinfo = cinfop; \ + br_state.next_input_byte = cinfop->src->next_input_byte; \ + br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \ + get_buffer = permstate.get_buffer; \ + bits_left = permstate.bits_left; + +#define BITREAD_SAVE_STATE(cinfop, permstate) \ + cinfop->src->next_input_byte = br_state.next_input_byte; \ + cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \ + permstate.get_buffer = get_buffer; \ + permstate.bits_left = bits_left /* * These macros provide the in-line portion of bit fetching. @@ -137,7 +136,7 @@ typedef struct { /* Bitreading working state within an MCU */ * before using GET_BITS, PEEK_BITS, or DROP_BITS. * The variables get_buffer and bits_left are assumed to be locals, * but the state struct might not be (jpeg_huff_decode needs this). - * CHECK_BIT_BUFFER(state,n,action); + * CHECK_BIT_BUFFER(state, n, action); * Ensure there are N bits in get_buffer; if suspend, take action. * val = GET_BITS(n); * Fetch next N bits. @@ -149,25 +148,27 @@ typedef struct { /* Bitreading working state within an MCU */ * is evaluated multiple times. */ -#define CHECK_BIT_BUFFER(state,nbits,action) \ - { if (bits_left < (nbits)) { \ - if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits)) \ - { action; } \ - get_buffer = (state).get_buffer; bits_left = (state).bits_left; } } +#define CHECK_BIT_BUFFER(state, nbits, action) { \ + if (bits_left < (nbits)) { \ + if (!jpeg_fill_bit_buffer(&(state), get_buffer, bits_left, nbits)) \ + { action; } \ + get_buffer = (state).get_buffer; bits_left = (state).bits_left; \ + } \ +} #define GET_BITS(nbits) \ - (((int) (get_buffer >> (bits_left -= (nbits)))) & ((1<<(nbits))-1)) + (((int)(get_buffer >> (bits_left -= (nbits)))) & ((1 << (nbits)) - 1)) #define PEEK_BITS(nbits) \ - (((int) (get_buffer >> (bits_left - (nbits)))) & ((1<<(nbits))-1)) + (((int)(get_buffer >> (bits_left - (nbits)))) & ((1 << (nbits)) - 1)) #define DROP_BITS(nbits) \ - (bits_left -= (nbits)) + (bits_left -= (nbits)) /* Load up the bit buffer to a depth of at least nbits */ -EXTERN(boolean) jpeg_fill_bit_buffer - (bitread_working_state *state, register bit_buf_type get_buffer, - register int bits_left, int nbits); +EXTERN(boolean) jpeg_fill_bit_buffer(bitread_working_state *state, + register bit_buf_type get_buffer, + register int bits_left, int nbits); /* @@ -187,13 +188,14 @@ EXTERN(boolean) jpeg_fill_bit_buffer * 3. jpeg_huff_decode returns -1 if forced to suspend. */ -#define HUFF_DECODE(result,state,htbl,failaction,slowlabel) \ -{ register int nb, look; \ +#define HUFF_DECODE(result, state, htbl, failaction, slowlabel) { \ + register int nb, look; \ if (bits_left < HUFF_LOOKAHEAD) { \ - if (! jpeg_fill_bit_buffer(&state,get_buffer,bits_left, 0)) {failaction;} \ - get_buffer = state.get_buffer; bits_left = state.bits_left; \ + if (!jpeg_fill_bit_buffer(&state, get_buffer, bits_left, 0)) \ + { failaction; } \ + get_buffer = state.get_buffer; bits_left = state.bits_left; \ if (bits_left < HUFF_LOOKAHEAD) { \ - nb = 1; goto slowlabel; \ + nb = 1; goto slowlabel; \ } \ } \ look = PEEK_BITS(HUFF_LOOKAHEAD); \ @@ -202,13 +204,14 @@ EXTERN(boolean) jpeg_fill_bit_buffer result = htbl->lookup[look] & ((1 << HUFF_LOOKAHEAD) - 1); \ } else { \ slowlabel: \ - if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \ - { failaction; } \ - get_buffer = state.get_buffer; bits_left = state.bits_left; \ + if ((result = \ + jpeg_huff_decode(&state, get_buffer, bits_left, htbl, nb)) < 0) \ + { failaction; } \ + get_buffer = state.get_buffer; bits_left = state.bits_left; \ } \ } -#define HUFF_DECODE_FAST(s,nb,htbl) \ +#define HUFF_DECODE_FAST(s, nb, htbl) \ FILL_BIT_BUFFER_FAST; \ s = PEEK_BITS(HUFF_LOOKAHEAD); \ s = htbl->lookup[s]; \ @@ -225,10 +228,11 @@ slowlabel: \ s |= GET_BITS(1); \ nb++; \ } \ - s = htbl->pub->huffval[ (int) (s + htbl->valoffset[nb]) & 0xFF ]; \ + s = htbl->pub->huffval[(int)(s + htbl->valoffset[nb]) & 0xFF]; \ } /* Out-of-line case for Huffman code fetching */ -EXTERN(int) jpeg_huff_decode - (bitread_working_state *state, register bit_buf_type get_buffer, - register int bits_left, d_derived_tbl *htbl, int min_bits); +EXTERN(int) jpeg_huff_decode(bitread_working_state *state, + register bit_buf_type get_buffer, + register int bits_left, d_derived_tbl *htbl, + int min_bits); diff --git a/jdicc.c b/jdicc.c new file mode 100644 index 0000000..7224695 --- /dev/null +++ b/jdicc.c @@ -0,0 +1,171 @@ +/* + * jdicc.c + * + * Copyright (C) 1997-1998, Thomas G. Lane, Todd Newman. + * Copyright (C) 2017, D. R. Commander. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file provides code to read International Color Consortium (ICC) device + * profiles embedded in JFIF JPEG image files. The ICC has defined a standard + * for including such data in JPEG "APP2" markers. The code given here does + * not know anything about the internal structure of the ICC profile data; it + * just knows how to get the profile data from a JPEG file while reading it. + */ + +#define JPEG_INTERNALS +#include "jinclude.h" +#include "jpeglib.h" +#include "jerror.h" + +#ifndef HAVE_STDLIB_H /* should declare malloc() */ +extern void *malloc(size_t size); +#endif + + +#define ICC_MARKER (JPEG_APP0 + 2) /* JPEG marker code for ICC */ +#define ICC_OVERHEAD_LEN 14 /* size of non-profile data in APP2 */ + + +/* + * Handy subroutine to test whether a saved marker is an ICC profile marker. + */ + +LOCAL(boolean) +marker_is_icc(jpeg_saved_marker_ptr marker) +{ + return + marker->marker == ICC_MARKER && + marker->data_length >= ICC_OVERHEAD_LEN && + /* verify the identifying string */ + GETJOCTET(marker->data[0]) == 0x49 && + GETJOCTET(marker->data[1]) == 0x43 && + GETJOCTET(marker->data[2]) == 0x43 && + GETJOCTET(marker->data[3]) == 0x5F && + GETJOCTET(marker->data[4]) == 0x50 && + GETJOCTET(marker->data[5]) == 0x52 && + GETJOCTET(marker->data[6]) == 0x4F && + GETJOCTET(marker->data[7]) == 0x46 && + GETJOCTET(marker->data[8]) == 0x49 && + GETJOCTET(marker->data[9]) == 0x4C && + GETJOCTET(marker->data[10]) == 0x45 && + GETJOCTET(marker->data[11]) == 0x0; +} + + +/* + * See if there was an ICC profile in the JPEG file being read; if so, + * reassemble and return the profile data. + * + * TRUE is returned if an ICC profile was found, FALSE if not. If TRUE is + * returned, *icc_data_ptr is set to point to the returned data, and + * *icc_data_len is set to its length. + * + * IMPORTANT: the data at *icc_data_ptr is allocated with malloc() and must be + * freed by the caller with free() when the caller no longer needs it. + * (Alternatively, we could write this routine to use the IJG library's memory + * allocator, so that the data would be freed implicitly when + * jpeg_finish_decompress() is called. But it seems likely that many + * applications will prefer to have the data stick around after decompression + * finishes.) + */ + +GLOBAL(boolean) +jpeg_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr, + unsigned int *icc_data_len) +{ + jpeg_saved_marker_ptr marker; + int num_markers = 0; + int seq_no; + JOCTET *icc_data; + unsigned int total_length; +#define MAX_SEQ_NO 255 /* sufficient since marker numbers are bytes */ + char marker_present[MAX_SEQ_NO + 1]; /* 1 if marker found */ + unsigned int data_length[MAX_SEQ_NO + 1]; /* size of profile data in marker */ + unsigned int data_offset[MAX_SEQ_NO + 1]; /* offset for data in marker */ + + if (icc_data_ptr == NULL || icc_data_len == NULL) + ERREXIT(cinfo, JERR_BUFFER_SIZE); + if (cinfo->global_state < DSTATE_READY) + ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); + + *icc_data_ptr = NULL; /* avoid confusion if FALSE return */ + *icc_data_len = 0; + + /* This first pass over the saved markers discovers whether there are + * any ICC markers and verifies the consistency of the marker numbering. + */ + + for (seq_no = 1; seq_no <= MAX_SEQ_NO; seq_no++) + marker_present[seq_no] = 0; + + for (marker = cinfo->marker_list; marker != NULL; marker = marker->next) { + if (marker_is_icc(marker)) { + if (num_markers == 0) + num_markers = GETJOCTET(marker->data[13]); + else if (num_markers != GETJOCTET(marker->data[13])) { + WARNMS(cinfo, JWRN_BOGUS_ICC); /* inconsistent num_markers fields */ + return FALSE; + } + seq_no = GETJOCTET(marker->data[12]); + if (seq_no <= 0 || seq_no > num_markers) { + WARNMS(cinfo, JWRN_BOGUS_ICC); /* bogus sequence number */ + return FALSE; + } + if (marker_present[seq_no]) { + WARNMS(cinfo, JWRN_BOGUS_ICC); /* duplicate sequence numbers */ + return FALSE; + } + marker_present[seq_no] = 1; + data_length[seq_no] = marker->data_length - ICC_OVERHEAD_LEN; + } + } + + if (num_markers == 0) + return FALSE; + + /* Check for missing markers, count total space needed, + * compute offset of each marker's part of the data. + */ + + total_length = 0; + for (seq_no = 1; seq_no <= num_markers; seq_no++) { + if (marker_present[seq_no] == 0) { + WARNMS(cinfo, JWRN_BOGUS_ICC); /* missing sequence number */ + return FALSE; + } + data_offset[seq_no] = total_length; + total_length += data_length[seq_no]; + } + + if (total_length == 0) { + WARNMS(cinfo, JWRN_BOGUS_ICC); /* found only empty markers? */ + return FALSE; + } + + /* Allocate space for assembled data */ + icc_data = (JOCTET *)malloc(total_length * sizeof(JOCTET)); + if (icc_data == NULL) + ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 11); /* oops, out of memory */ + + /* and fill it in */ + for (marker = cinfo->marker_list; marker != NULL; marker = marker->next) { + if (marker_is_icc(marker)) { + JOCTET FAR *src_ptr; + JOCTET *dst_ptr; + unsigned int length; + seq_no = GETJOCTET(marker->data[12]); + dst_ptr = icc_data + data_offset[seq_no]; + src_ptr = marker->data + ICC_OVERHEAD_LEN; + length = data_length[seq_no]; + while (length--) { + *dst_ptr++ = *src_ptr++; + } + } + } + + *icc_data_ptr = icc_data; + *icc_data_len = total_length; + + return TRUE; +} diff --git a/jdinput.c b/jdinput.c index 32a6b42..deec618 100644 --- a/jdinput.c +++ b/jdinput.c @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2010, 2016, D. R. Commander. + * Copyright (C) 2010, 2016, 2018, D. R. Commander. * Copyright (C) 2015, Google, Inc. * For conditions of distribution and use, see the accompanying README.ijg * file. @@ -33,7 +33,7 @@ typedef my_input_controller *my_inputctl_ptr; /* Forward declarations */ -METHODDEF(int) consume_markers (j_decompress_ptr cinfo); +METHODDEF(int) consume_markers(j_decompress_ptr cinfo); /* @@ -41,16 +41,16 @@ METHODDEF(int) consume_markers (j_decompress_ptr cinfo); */ LOCAL(void) -initial_setup (j_decompress_ptr cinfo) +initial_setup(j_decompress_ptr cinfo) /* Called once, when first SOS marker is reached */ { int ci; jpeg_component_info *compptr; /* Make sure image isn't bigger than I can handle */ - if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION || - (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION) - ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION); + if ((long)cinfo->image_height > (long)JPEG_MAX_DIMENSION || + (long)cinfo->image_width > (long)JPEG_MAX_DIMENSION) + ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int)JPEG_MAX_DIMENSION); /* For now, precision must match compiled-in value... */ if (cinfo->data_precision != BITS_IN_JSAMPLE) @@ -66,8 +66,10 @@ initial_setup (j_decompress_ptr cinfo) cinfo->max_v_samp_factor = 1; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { - if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR || - compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR) + if (compptr->h_samp_factor <= 0 || + compptr->h_samp_factor > MAX_SAMP_FACTOR || + compptr->v_samp_factor <= 0 || + compptr->v_samp_factor > MAX_SAMP_FACTOR) ERREXIT(cinfo, JERR_BAD_SAMPLING); cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor, compptr->h_samp_factor); @@ -75,10 +77,10 @@ initial_setup (j_decompress_ptr cinfo) compptr->v_samp_factor); } -#if JPEG_LIB_VERSION >=80 - cinfo->block_size = DCTSIZE; - cinfo->natural_order = jpeg_natural_order; - cinfo->lim_Se = DCTSIZE2-1; +#if JPEG_LIB_VERSION >= 80 + cinfo->block_size = DCTSIZE; + cinfo->natural_order = jpeg_natural_order; + cinfo->lim_Se = DCTSIZE2 - 1; #endif /* We initialize DCT_scaled_size and min_DCT_scaled_size to DCTSIZE. @@ -101,11 +103,11 @@ initial_setup (j_decompress_ptr cinfo) #endif /* Size in DCT blocks */ compptr->width_in_blocks = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor, - (long) (cinfo->max_h_samp_factor * DCTSIZE)); + jdiv_round_up((long)cinfo->image_width * (long)compptr->h_samp_factor, + (long)(cinfo->max_h_samp_factor * DCTSIZE)); compptr->height_in_blocks = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor, - (long) (cinfo->max_v_samp_factor * DCTSIZE)); + jdiv_round_up((long)cinfo->image_height * (long)compptr->v_samp_factor, + (long)(cinfo->max_v_samp_factor * DCTSIZE)); /* Set the first and last MCU columns to decompress from multi-scan images. * By default, decompress all of the MCU columns. */ @@ -117,11 +119,11 @@ initial_setup (j_decompress_ptr cinfo) */ /* Size in samples */ compptr->downsampled_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor, - (long) cinfo->max_h_samp_factor); + jdiv_round_up((long)cinfo->image_width * (long)compptr->h_samp_factor, + (long)cinfo->max_h_samp_factor); compptr->downsampled_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor, - (long) cinfo->max_v_samp_factor); + jdiv_round_up((long)cinfo->image_height * (long)compptr->v_samp_factor, + (long)cinfo->max_v_samp_factor); /* Mark component needed, until color conversion says otherwise */ compptr->component_needed = TRUE; /* Mark no quantization table yet saved for component */ @@ -130,8 +132,8 @@ initial_setup (j_decompress_ptr cinfo) /* Compute number of fully interleaved MCU rows. */ cinfo->total_iMCU_rows = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, - (long) (cinfo->max_v_samp_factor*DCTSIZE)); + jdiv_round_up((long)cinfo->image_height, + (long)(cinfo->max_v_samp_factor * DCTSIZE)); /* Decide whether file contains multiple scans */ if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode) @@ -142,7 +144,7 @@ initial_setup (j_decompress_ptr cinfo) LOCAL(void) -per_scan_setup (j_decompress_ptr cinfo) +per_scan_setup(j_decompress_ptr cinfo) /* Do computations that are needed before processing a JPEG scan */ /* cinfo->comps_in_scan and cinfo->cur_comp_info[] were set from SOS marker */ { @@ -167,7 +169,7 @@ per_scan_setup (j_decompress_ptr cinfo) /* For noninterleaved scans, it is convenient to define last_row_height * as the number of block rows present in the last iMCU row. */ - tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor); + tmp = (int)(compptr->height_in_blocks % compptr->v_samp_factor); if (tmp == 0) tmp = compptr->v_samp_factor; compptr->last_row_height = tmp; @@ -184,11 +186,11 @@ per_scan_setup (j_decompress_ptr cinfo) /* Overall image size in MCUs */ cinfo->MCUs_per_row = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width, - (long) (cinfo->max_h_samp_factor*DCTSIZE)); + jdiv_round_up((long)cinfo->image_width, + (long)(cinfo->max_h_samp_factor * DCTSIZE)); cinfo->MCU_rows_in_scan = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, - (long) (cinfo->max_v_samp_factor*DCTSIZE)); + jdiv_round_up((long)cinfo->image_height, + (long)(cinfo->max_v_samp_factor * DCTSIZE)); cinfo->blocks_in_MCU = 0; @@ -198,12 +200,13 @@ per_scan_setup (j_decompress_ptr cinfo) compptr->MCU_width = compptr->h_samp_factor; compptr->MCU_height = compptr->v_samp_factor; compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height; - compptr->MCU_sample_width = compptr->MCU_width * compptr->_DCT_scaled_size; + compptr->MCU_sample_width = compptr->MCU_width * + compptr->_DCT_scaled_size; /* Figure number of non-dummy blocks in last MCU column & row */ - tmp = (int) (compptr->width_in_blocks % compptr->MCU_width); + tmp = (int)(compptr->width_in_blocks % compptr->MCU_width); if (tmp == 0) tmp = compptr->MCU_width; compptr->last_col_width = tmp; - tmp = (int) (compptr->height_in_blocks % compptr->MCU_height); + tmp = (int)(compptr->height_in_blocks % compptr->MCU_height); if (tmp == 0) tmp = compptr->MCU_height; compptr->last_row_height = tmp; /* Prepare array describing MCU composition */ @@ -231,17 +234,17 @@ per_scan_setup (j_decompress_ptr cinfo) * means that we have to save away the table actually used for each component. * We do this by copying the table at the start of the first scan containing * the component. - * The JPEG spec prohibits the encoder from changing the contents of a Q-table - * slot between scans of a component using that slot. If the encoder does so - * anyway, this decoder will simply use the Q-table values that were current - * at the start of the first scan for the component. + * Rec. ITU-T T.81 | ISO/IEC 10918-1 prohibits the encoder from changing the + * contents of a Q-table slot between scans of a component using that slot. If + * the encoder does so anyway, this decoder will simply use the Q-table values + * that were current at the start of the first scan for the component. * * The decompressor output side looks only at the saved quant tables, * not at the current Q-table slots. */ LOCAL(void) -latch_quant_tables (j_decompress_ptr cinfo) +latch_quant_tables(j_decompress_ptr cinfo) { int ci, qtblno; jpeg_component_info *compptr; @@ -259,7 +262,7 @@ latch_quant_tables (j_decompress_ptr cinfo) ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno); /* OK, save away the quantization table */ qtbl = (JQUANT_TBL *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(JQUANT_TBL)); MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], sizeof(JQUANT_TBL)); compptr->quant_table = qtbl; @@ -275,7 +278,7 @@ latch_quant_tables (j_decompress_ptr cinfo) */ METHODDEF(void) -start_input_pass (j_decompress_ptr cinfo) +start_input_pass(j_decompress_ptr cinfo) { per_scan_setup(cinfo); latch_quant_tables(cinfo); @@ -292,7 +295,7 @@ start_input_pass (j_decompress_ptr cinfo) */ METHODDEF(void) -finish_input_pass (j_decompress_ptr cinfo) +finish_input_pass(j_decompress_ptr cinfo) { cinfo->inputctl->consume_input = consume_markers; } @@ -309,9 +312,9 @@ finish_input_pass (j_decompress_ptr cinfo) */ METHODDEF(int) -consume_markers (j_decompress_ptr cinfo) +consume_markers(j_decompress_ptr cinfo) { - my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl; + my_inputctl_ptr inputctl = (my_inputctl_ptr)cinfo->inputctl; int val; if (inputctl->pub.eoi_reached) /* After hitting EOI, read no further */ @@ -329,7 +332,7 @@ consume_markers (j_decompress_ptr cinfo) * responsible for enforcing this sequencing. */ } else { /* 2nd or later SOS marker */ - if (! inputctl->pub.has_multiple_scans) + if (!inputctl->pub.has_multiple_scans) ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */ start_input_pass(cinfo); } @@ -360,16 +363,16 @@ consume_markers (j_decompress_ptr cinfo) */ METHODDEF(void) -reset_input_controller (j_decompress_ptr cinfo) +reset_input_controller(j_decompress_ptr cinfo) { - my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl; + my_inputctl_ptr inputctl = (my_inputctl_ptr)cinfo->inputctl; inputctl->pub.consume_input = consume_markers; inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */ inputctl->pub.eoi_reached = FALSE; inputctl->inheaders = TRUE; /* Reset other modules */ - (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo); + (*cinfo->err->reset_error_mgr) ((j_common_ptr)cinfo); (*cinfo->marker->reset_marker_reader) (cinfo); /* Reset progression state -- would be cleaner if entropy decoder did this */ cinfo->coef_bits = NULL; @@ -382,15 +385,15 @@ reset_input_controller (j_decompress_ptr cinfo) */ GLOBAL(void) -jinit_input_controller (j_decompress_ptr cinfo) +jinit_input_controller(j_decompress_ptr cinfo) { my_inputctl_ptr inputctl; /* Create subobject in permanent pool */ inputctl = (my_inputctl_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT, sizeof(my_input_controller)); - cinfo->inputctl = (struct jpeg_input_controller *) inputctl; + cinfo->inputctl = (struct jpeg_input_controller *)inputctl; /* Initialize method pointers */ inputctl->pub.consume_input = consume_markers; inputctl->pub.reset_input_controller = reset_input_controller; diff --git a/jdmainct.c b/jdmainct.c index ebb069b..50301d6 100644 --- a/jdmainct.c +++ b/jdmainct.c @@ -112,26 +112,29 @@ /* Forward declarations */ -METHODDEF(void) process_data_simple_main - (j_decompress_ptr cinfo, JSAMPARRAY output_buf, - JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail); -METHODDEF(void) process_data_context_main - (j_decompress_ptr cinfo, JSAMPARRAY output_buf, - JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail); +METHODDEF(void) process_data_simple_main(j_decompress_ptr cinfo, + JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail); +METHODDEF(void) process_data_context_main(j_decompress_ptr cinfo, + JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail); #ifdef QUANT_2PASS_SUPPORTED -METHODDEF(void) process_data_crank_post - (j_decompress_ptr cinfo, JSAMPARRAY output_buf, - JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail); +METHODDEF(void) process_data_crank_post(j_decompress_ptr cinfo, + JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail); #endif LOCAL(void) -alloc_funny_pointers (j_decompress_ptr cinfo) +alloc_funny_pointers(j_decompress_ptr cinfo) /* Allocate space for the funny pointer lists. * This is done only once, not once per pass. */ { - my_main_ptr main_ptr = (my_main_ptr) cinfo->main; + my_main_ptr main_ptr = (my_main_ptr)cinfo->main; int ci, rgroup; int M = cinfo->_min_DCT_scaled_size; jpeg_component_info *compptr; @@ -141,7 +144,7 @@ alloc_funny_pointers (j_decompress_ptr cinfo) * We alloc both arrays with one call to save a few cycles. */ main_ptr->xbuffer[0] = (JSAMPIMAGE) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, cinfo->num_components * 2 * sizeof(JSAMPARRAY)); main_ptr->xbuffer[1] = main_ptr->xbuffer[0] + cinfo->num_components; @@ -153,7 +156,7 @@ alloc_funny_pointers (j_decompress_ptr cinfo) * We alloc both pointer lists with one call to save a few cycles. */ xbuf = (JSAMPARRAY) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, 2 * (rgroup * (M + 4)) * sizeof(JSAMPROW)); xbuf += rgroup; /* want one row group at negative offsets */ main_ptr->xbuffer[0][ci] = xbuf; @@ -164,7 +167,7 @@ alloc_funny_pointers (j_decompress_ptr cinfo) LOCAL(void) -make_funny_pointers (j_decompress_ptr cinfo) +make_funny_pointers(j_decompress_ptr cinfo) /* Create the funny pointer lists discussed in the comments above. * The actual workspace is already allocated (in main_ptr->buffer), * and the space for the pointer lists is allocated too. @@ -172,7 +175,7 @@ make_funny_pointers (j_decompress_ptr cinfo) * This will be repeated at the beginning of each pass. */ { - my_main_ptr main_ptr = (my_main_ptr) cinfo->main; + my_main_ptr main_ptr = (my_main_ptr)cinfo->main; int ci, i, rgroup; int M = cinfo->_min_DCT_scaled_size; jpeg_component_info *compptr; @@ -191,8 +194,8 @@ make_funny_pointers (j_decompress_ptr cinfo) } /* In the second list, put the last four row groups in swapped order */ for (i = 0; i < rgroup * 2; i++) { - xbuf1[rgroup*(M-2) + i] = buf[rgroup*M + i]; - xbuf1[rgroup*M + i] = buf[rgroup*(M-2) + i]; + xbuf1[rgroup * (M - 2) + i] = buf[rgroup * M + i]; + xbuf1[rgroup * M + i] = buf[rgroup * (M - 2) + i]; } /* The wraparound pointers at top and bottom will be filled later * (see set_wraparound_pointers, below). Initially we want the "above" @@ -207,13 +210,13 @@ make_funny_pointers (j_decompress_ptr cinfo) LOCAL(void) -set_bottom_pointers (j_decompress_ptr cinfo) +set_bottom_pointers(j_decompress_ptr cinfo) /* Change the pointer lists to duplicate the last sample row at the bottom * of the image. whichptr indicates which xbuffer holds the final iMCU row. * Also sets rowgroups_avail to indicate number of nondummy row groups in row. */ { - my_main_ptr main_ptr = (my_main_ptr) cinfo->main; + my_main_ptr main_ptr = (my_main_ptr)cinfo->main; int ci, i, rgroup, iMCUheight, rows_left; jpeg_component_info *compptr; JSAMPARRAY xbuf; @@ -224,20 +227,20 @@ set_bottom_pointers (j_decompress_ptr cinfo) iMCUheight = compptr->v_samp_factor * compptr->_DCT_scaled_size; rgroup = iMCUheight / cinfo->_min_DCT_scaled_size; /* Count nondummy sample rows remaining for this component */ - rows_left = (int) (compptr->downsampled_height % (JDIMENSION) iMCUheight); + rows_left = (int)(compptr->downsampled_height % (JDIMENSION)iMCUheight); if (rows_left == 0) rows_left = iMCUheight; /* Count nondummy row groups. Should get same answer for each component, * so we need only do it once. */ if (ci == 0) { - main_ptr->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1); + main_ptr->rowgroups_avail = (JDIMENSION)((rows_left - 1) / rgroup + 1); } /* Duplicate the last real sample row rgroup*2 times; this pads out the * last partial rowgroup and ensures at least one full rowgroup of context. */ xbuf = main_ptr->xbuffer[main_ptr->whichptr][ci]; for (i = 0; i < rgroup * 2; i++) { - xbuf[rows_left + i] = xbuf[rows_left-1]; + xbuf[rows_left + i] = xbuf[rows_left - 1]; } } } @@ -248,9 +251,9 @@ set_bottom_pointers (j_decompress_ptr cinfo) */ METHODDEF(void) -start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode) +start_pass_main(j_decompress_ptr cinfo, J_BUF_MODE pass_mode) { - my_main_ptr main_ptr = (my_main_ptr) cinfo->main; + my_main_ptr main_ptr = (my_main_ptr)cinfo->main; switch (pass_mode) { case JBUF_PASS_THRU: @@ -286,22 +289,21 @@ start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode) */ METHODDEF(void) -process_data_simple_main (j_decompress_ptr cinfo, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) +process_data_simple_main(j_decompress_ptr cinfo, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail) { - my_main_ptr main_ptr = (my_main_ptr) cinfo->main; + my_main_ptr main_ptr = (my_main_ptr)cinfo->main; JDIMENSION rowgroups_avail; /* Read input data if we haven't filled the main buffer yet */ - if (! main_ptr->buffer_full) { - if (! (*cinfo->coef->decompress_data) (cinfo, main_ptr->buffer)) + if (!main_ptr->buffer_full) { + if (!(*cinfo->coef->decompress_data) (cinfo, main_ptr->buffer)) return; /* suspension forced, can do nothing more */ main_ptr->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ } /* There are always min_DCT_scaled_size row groups in an iMCU row. */ - rowgroups_avail = (JDIMENSION) cinfo->_min_DCT_scaled_size; + rowgroups_avail = (JDIMENSION)cinfo->_min_DCT_scaled_size; /* Note: at the bottom of the image, we may pass extra garbage row groups * to the postprocessor. The postprocessor has to check for bottom * of image anyway (at row resolution), so no point in us doing it too. @@ -326,16 +328,15 @@ process_data_simple_main (j_decompress_ptr cinfo, */ METHODDEF(void) -process_data_context_main (j_decompress_ptr cinfo, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) +process_data_context_main(j_decompress_ptr cinfo, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail) { - my_main_ptr main_ptr = (my_main_ptr) cinfo->main; + my_main_ptr main_ptr = (my_main_ptr)cinfo->main; /* Read input data if we haven't filled the main buffer yet */ - if (! main_ptr->buffer_full) { - if (! (*cinfo->coef->decompress_data) (cinfo, - main_ptr->xbuffer[main_ptr->whichptr])) + if (!main_ptr->buffer_full) { + if (!(*cinfo->coef->decompress_data) (cinfo, + main_ptr->xbuffer[main_ptr->whichptr])) return; /* suspension forced, can do nothing more */ main_ptr->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ main_ptr->iMCU_row_ctr++; /* count rows received */ @@ -349,9 +350,11 @@ process_data_context_main (j_decompress_ptr cinfo, switch (main_ptr->context_state) { case CTX_POSTPONED_ROW: /* Call postprocessor using previously set pointers for postponed row */ - (*cinfo->post->post_process_data) (cinfo, main_ptr->xbuffer[main_ptr->whichptr], - &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail, - output_buf, out_row_ctr, out_rows_avail); + (*cinfo->post->post_process_data) (cinfo, + main_ptr->xbuffer[main_ptr->whichptr], + &main_ptr->rowgroup_ctr, + main_ptr->rowgroups_avail, output_buf, + out_row_ctr, out_rows_avail); if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail) return; /* Need to suspend */ main_ptr->context_state = CTX_PREPARE_FOR_IMCU; @@ -361,7 +364,7 @@ process_data_context_main (j_decompress_ptr cinfo, case CTX_PREPARE_FOR_IMCU: /* Prepare to process first M-1 row groups of this iMCU row */ main_ptr->rowgroup_ctr = 0; - main_ptr->rowgroups_avail = (JDIMENSION) (cinfo->_min_DCT_scaled_size - 1); + main_ptr->rowgroups_avail = (JDIMENSION)(cinfo->_min_DCT_scaled_size - 1); /* Check for bottom of image: if so, tweak pointers to "duplicate" * the last sample row, and adjust rowgroups_avail to ignore padding rows. */ @@ -371,9 +374,11 @@ process_data_context_main (j_decompress_ptr cinfo, /*FALLTHROUGH*/ case CTX_PROCESS_IMCU: /* Call postprocessor using previously set pointers */ - (*cinfo->post->post_process_data) (cinfo, main_ptr->xbuffer[main_ptr->whichptr], - &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail, - output_buf, out_row_ctr, out_rows_avail); + (*cinfo->post->post_process_data) (cinfo, + main_ptr->xbuffer[main_ptr->whichptr], + &main_ptr->rowgroup_ctr, + main_ptr->rowgroups_avail, output_buf, + out_row_ctr, out_rows_avail); if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail) return; /* Need to suspend */ /* After the first iMCU, change wraparound pointers to normal state */ @@ -384,8 +389,8 @@ process_data_context_main (j_decompress_ptr cinfo, main_ptr->buffer_full = FALSE; /* Still need to process last row group of this iMCU row, */ /* which is saved at index M+1 of the other xbuffer */ - main_ptr->rowgroup_ctr = (JDIMENSION) (cinfo->_min_DCT_scaled_size + 1); - main_ptr->rowgroups_avail = (JDIMENSION) (cinfo->_min_DCT_scaled_size + 2); + main_ptr->rowgroup_ctr = (JDIMENSION)(cinfo->_min_DCT_scaled_size + 1); + main_ptr->rowgroups_avail = (JDIMENSION)(cinfo->_min_DCT_scaled_size + 2); main_ptr->context_state = CTX_POSTPONED_ROW; } } @@ -400,12 +405,11 @@ process_data_context_main (j_decompress_ptr cinfo, #ifdef QUANT_2PASS_SUPPORTED METHODDEF(void) -process_data_crank_post (j_decompress_ptr cinfo, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) +process_data_crank_post(j_decompress_ptr cinfo, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail) { - (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE) NULL, - (JDIMENSION *) NULL, (JDIMENSION) 0, + (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE)NULL, + (JDIMENSION *)NULL, (JDIMENSION)0, output_buf, out_row_ctr, out_rows_avail); } @@ -417,16 +421,16 @@ process_data_crank_post (j_decompress_ptr cinfo, */ GLOBAL(void) -jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer) +jinit_d_main_controller(j_decompress_ptr cinfo, boolean need_full_buffer) { my_main_ptr main_ptr; int ci, rgroup, ngroups; jpeg_component_info *compptr; main_ptr = (my_main_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_main_controller)); - cinfo->main = (struct jpeg_d_main_controller *) main_ptr; + cinfo->main = (struct jpeg_d_main_controller *)main_ptr; main_ptr->pub.start_pass = start_pass_main; if (need_full_buffer) /* shouldn't happen */ @@ -449,8 +453,8 @@ jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer) rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) / cinfo->_min_DCT_scaled_size; /* height of a row group of component */ main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, + ((j_common_ptr)cinfo, JPOOL_IMAGE, compptr->width_in_blocks * compptr->_DCT_scaled_size, - (JDIMENSION) (rgroup * ngroups)); + (JDIMENSION)(rgroup * ngroups)); } } diff --git a/jdmainct.h b/jdmainct.h index 3090301..37b201c 100644 --- a/jdmainct.h +++ b/jdmainct.h @@ -44,12 +44,12 @@ typedef my_main_controller *my_main_ptr; LOCAL(void) -set_wraparound_pointers (j_decompress_ptr cinfo) +set_wraparound_pointers(j_decompress_ptr cinfo) /* Set up the "wraparound" pointers at top and bottom of the pointer lists. * This changes the pointer list state from top-of-image to the normal state. */ { - my_main_ptr main_ptr = (my_main_ptr) cinfo->main; + my_main_ptr main_ptr = (my_main_ptr)cinfo->main; int ci, i, rgroup; int M = cinfo->_min_DCT_scaled_size; jpeg_component_info *compptr; @@ -62,10 +62,10 @@ set_wraparound_pointers (j_decompress_ptr cinfo) xbuf0 = main_ptr->xbuffer[0][ci]; xbuf1 = main_ptr->xbuffer[1][ci]; for (i = 0; i < rgroup; i++) { - xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i]; - xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i]; - xbuf0[rgroup*(M+2) + i] = xbuf0[i]; - xbuf1[rgroup*(M+2) + i] = xbuf1[i]; + xbuf0[i - rgroup] = xbuf0[rgroup * (M + 1) + i]; + xbuf1[i - rgroup] = xbuf1[rgroup * (M + 1) + i]; + xbuf0[rgroup * (M + 2) + i] = xbuf0[i]; + xbuf1[rgroup * (M + 2) + i] = xbuf1[i]; } } } diff --git a/jdmarker.c b/jdmarker.c index e3b612c..c9c7ef6 100644 --- a/jdmarker.c +++ b/jdmarker.c @@ -119,50 +119,50 @@ typedef my_marker_reader *my_marker_ptr; */ /* Declare and initialize local copies of input pointer/count */ -#define INPUT_VARS(cinfo) \ - struct jpeg_source_mgr *datasrc = (cinfo)->src; \ - const JOCTET *next_input_byte = datasrc->next_input_byte; \ - size_t bytes_in_buffer = datasrc->bytes_in_buffer +#define INPUT_VARS(cinfo) \ + struct jpeg_source_mgr *datasrc = (cinfo)->src; \ + const JOCTET *next_input_byte = datasrc->next_input_byte; \ + size_t bytes_in_buffer = datasrc->bytes_in_buffer /* Unload the local copies --- do this only at a restart boundary */ -#define INPUT_SYNC(cinfo) \ - ( datasrc->next_input_byte = next_input_byte, \ - datasrc->bytes_in_buffer = bytes_in_buffer ) +#define INPUT_SYNC(cinfo) \ + ( datasrc->next_input_byte = next_input_byte, \ + datasrc->bytes_in_buffer = bytes_in_buffer ) /* Reload the local copies --- used only in MAKE_BYTE_AVAIL */ -#define INPUT_RELOAD(cinfo) \ - ( next_input_byte = datasrc->next_input_byte, \ - bytes_in_buffer = datasrc->bytes_in_buffer ) +#define INPUT_RELOAD(cinfo) \ + ( next_input_byte = datasrc->next_input_byte, \ + bytes_in_buffer = datasrc->bytes_in_buffer ) /* Internal macro for INPUT_BYTE and INPUT_2BYTES: make a byte available. * Note we do *not* do INPUT_SYNC before calling fill_input_buffer, * but we must reload the local copies after a successful fill. */ -#define MAKE_BYTE_AVAIL(cinfo,action) \ - if (bytes_in_buffer == 0) { \ - if (! (*datasrc->fill_input_buffer) (cinfo)) \ - { action; } \ - INPUT_RELOAD(cinfo); \ - } +#define MAKE_BYTE_AVAIL(cinfo, action) \ + if (bytes_in_buffer == 0) { \ + if (!(*datasrc->fill_input_buffer) (cinfo)) \ + { action; } \ + INPUT_RELOAD(cinfo); \ + } /* Read a byte into variable V. * If must suspend, take the specified action (typically "return FALSE"). */ -#define INPUT_BYTE(cinfo,V,action) \ - MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \ - bytes_in_buffer--; \ - V = GETJOCTET(*next_input_byte++); ) +#define INPUT_BYTE(cinfo, V, action) \ + MAKESTMT( MAKE_BYTE_AVAIL(cinfo, action); \ + bytes_in_buffer--; \ + V = GETJOCTET(*next_input_byte++); ) /* As above, but read two bytes interpreted as an unsigned 16-bit integer. * V should be declared unsigned int or perhaps JLONG. */ -#define INPUT_2BYTES(cinfo,V,action) \ - MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \ - bytes_in_buffer--; \ - V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \ - MAKE_BYTE_AVAIL(cinfo,action); \ - bytes_in_buffer--; \ - V += GETJOCTET(*next_input_byte++); ) +#define INPUT_2BYTES(cinfo, V, action) \ + MAKESTMT( MAKE_BYTE_AVAIL(cinfo, action); \ + bytes_in_buffer--; \ + V = ((unsigned int)GETJOCTET(*next_input_byte++)) << 8; \ + MAKE_BYTE_AVAIL(cinfo, action); \ + bytes_in_buffer--; \ + V += GETJOCTET(*next_input_byte++); ) /* @@ -197,7 +197,7 @@ typedef my_marker_reader *my_marker_ptr; LOCAL(boolean) -get_soi (j_decompress_ptr cinfo) +get_soi(j_decompress_ptr cinfo) /* Process an SOI marker */ { int i; @@ -237,7 +237,7 @@ get_soi (j_decompress_ptr cinfo) LOCAL(boolean) -get_sof (j_decompress_ptr cinfo, boolean is_prog, boolean is_arith) +get_sof(j_decompress_ptr cinfo, boolean is_prog, boolean is_arith) /* Process a SOFn marker */ { JLONG length; @@ -258,7 +258,7 @@ get_sof (j_decompress_ptr cinfo, boolean is_prog, boolean is_arith) length -= 8; TRACEMS4(cinfo, 1, JTRC_SOF, cinfo->unread_marker, - (int) cinfo->image_width, (int) cinfo->image_height, + (int)cinfo->image_width, (int)cinfo->image_height, cinfo->num_components); if (cinfo->marker->saw_SOF) @@ -267,16 +267,16 @@ get_sof (j_decompress_ptr cinfo, boolean is_prog, boolean is_arith) /* We don't support files in which the image height is initially specified */ /* as 0 and is later redefined by DNL. As long as we have to check that, */ /* might as well have a general sanity check. */ - if (cinfo->image_height <= 0 || cinfo->image_width <= 0 - || cinfo->num_components <= 0) + if (cinfo->image_height <= 0 || cinfo->image_width <= 0 || + cinfo->num_components <= 0) ERREXIT(cinfo, JERR_EMPTY_IMAGE); if (length != (cinfo->num_components * 3)) ERREXIT(cinfo, JERR_BAD_LENGTH); if (cinfo->comp_info == NULL) /* do only once, even if suspend */ - cinfo->comp_info = (jpeg_component_info *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, + cinfo->comp_info = (jpeg_component_info *)(*cinfo->mem->alloc_small) + ((j_common_ptr)cinfo, JPOOL_IMAGE, cinfo->num_components * sizeof(jpeg_component_info)); for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; @@ -301,7 +301,7 @@ get_sof (j_decompress_ptr cinfo, boolean is_prog, boolean is_arith) LOCAL(boolean) -get_sos (j_decompress_ptr cinfo) +get_sos(j_decompress_ptr cinfo) /* Process a SOS marker */ { JLONG length; @@ -309,7 +309,7 @@ get_sos (j_decompress_ptr cinfo) jpeg_component_info *compptr; INPUT_VARS(cinfo); - if (! cinfo->marker->saw_SOF) + if (!cinfo->marker->saw_SOF) ERREXIT(cinfo, JERR_SOS_NO_SOF); INPUT_2BYTES(cinfo, length, return FALSE); @@ -341,7 +341,7 @@ get_sos (j_decompress_ptr cinfo) ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc); - id_found: +id_found: cinfo->cur_comp_info[i] = compptr; compptr->dc_tbl_no = (c >> 4) & 15; @@ -384,7 +384,7 @@ get_sos (j_decompress_ptr cinfo) #ifdef D_ARITH_CODING_SUPPORTED LOCAL(boolean) -get_dac (j_decompress_ptr cinfo) +get_dac(j_decompress_ptr cinfo) /* Process a DAC marker */ { JLONG length; @@ -402,14 +402,14 @@ get_dac (j_decompress_ptr cinfo) TRACEMS2(cinfo, 1, JTRC_DAC, index, val); - if (index < 0 || index >= (2*NUM_ARITH_TBLS)) + if (index < 0 || index >= (2 * NUM_ARITH_TBLS)) ERREXIT1(cinfo, JERR_DAC_INDEX, index); if (index >= NUM_ARITH_TBLS) { /* define AC table */ - cinfo->arith_ac_K[index-NUM_ARITH_TBLS] = (UINT8) val; + cinfo->arith_ac_K[index - NUM_ARITH_TBLS] = (UINT8)val; } else { /* define DC table */ - cinfo->arith_dc_L[index] = (UINT8) (val & 0x0F); - cinfo->arith_dc_U[index] = (UINT8) (val >> 4); + cinfo->arith_dc_L[index] = (UINT8)(val & 0x0F); + cinfo->arith_dc_U[index] = (UINT8)(val >> 4); if (cinfo->arith_dc_L[index] > cinfo->arith_dc_U[index]) ERREXIT1(cinfo, JERR_DAC_VALUE, val); } @@ -422,7 +422,7 @@ get_dac (j_decompress_ptr cinfo) return TRUE; } -#else /* ! D_ARITH_CODING_SUPPORTED */ +#else /* !D_ARITH_CODING_SUPPORTED */ #define get_dac(cinfo) skip_variable(cinfo) @@ -430,7 +430,7 @@ get_dac (j_decompress_ptr cinfo) LOCAL(boolean) -get_dht (j_decompress_ptr cinfo) +get_dht(j_decompress_ptr cinfo) /* Process a DHT marker */ { JLONG length; @@ -467,7 +467,7 @@ get_dht (j_decompress_ptr cinfo) /* Here we just do minimal validation of the counts to avoid walking * off the end of our table space. jdhuff.c will check more carefully. */ - if (count > 256 || ((JLONG) count) > length) + if (count > 256 || ((JLONG)count) > length) ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); for (i = 0; i < count; i++) @@ -489,7 +489,7 @@ get_dht (j_decompress_ptr cinfo) } if (*htblptr == NULL) - *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); + *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo); MEMCOPY((*htblptr)->bits, bits, sizeof((*htblptr)->bits)); MEMCOPY((*htblptr)->huffval, huffval, sizeof((*htblptr)->huffval)); @@ -504,7 +504,7 @@ get_dht (j_decompress_ptr cinfo) LOCAL(boolean) -get_dqt (j_decompress_ptr cinfo) +get_dqt(j_decompress_ptr cinfo) /* Process a DQT marker */ { JLONG length; @@ -527,7 +527,7 @@ get_dqt (j_decompress_ptr cinfo) ERREXIT1(cinfo, JERR_DQT_INDEX, n); if (cinfo->quant_tbl_ptrs[n] == NULL) - cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) cinfo); + cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr)cinfo); quant_ptr = cinfo->quant_tbl_ptrs[n]; for (i = 0; i < DCTSIZE2; i++) { @@ -536,20 +536,20 @@ get_dqt (j_decompress_ptr cinfo) else INPUT_BYTE(cinfo, tmp, return FALSE); /* We convert the zigzag-order table to natural array order. */ - quant_ptr->quantval[jpeg_natural_order[i]] = (UINT16) tmp; + quant_ptr->quantval[jpeg_natural_order[i]] = (UINT16)tmp; } if (cinfo->err->trace_level >= 2) { for (i = 0; i < DCTSIZE2; i += 8) { TRACEMS8(cinfo, 2, JTRC_QUANTVALS, - quant_ptr->quantval[i], quant_ptr->quantval[i+1], - quant_ptr->quantval[i+2], quant_ptr->quantval[i+3], - quant_ptr->quantval[i+4], quant_ptr->quantval[i+5], - quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]); + quant_ptr->quantval[i], quant_ptr->quantval[i + 1], + quant_ptr->quantval[i + 2], quant_ptr->quantval[i + 3], + quant_ptr->quantval[i + 4], quant_ptr->quantval[i + 5], + quant_ptr->quantval[i + 6], quant_ptr->quantval[i + 7]); } } - length -= DCTSIZE2+1; + length -= DCTSIZE2 + 1; if (prec) length -= DCTSIZE2; } @@ -562,7 +562,7 @@ get_dqt (j_decompress_ptr cinfo) LOCAL(boolean) -get_dri (j_decompress_ptr cinfo) +get_dri(j_decompress_ptr cinfo) /* Process a DRI marker */ { JLONG length; @@ -598,14 +598,14 @@ get_dri (j_decompress_ptr cinfo) LOCAL(void) -examine_app0 (j_decompress_ptr cinfo, JOCTET *data, - unsigned int datalen, JLONG remaining) +examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen, + JLONG remaining) /* Examine first few bytes from an APP0. * Take appropriate action if it is a JFIF marker. * datalen is # of bytes at data[], remaining is length of rest of marker data. */ { - JLONG totallen = (JLONG) datalen + remaining; + JLONG totallen = (JLONG)datalen + remaining; if (datalen >= APP0_DATA_LEN && GETJOCTET(data[0]) == 0x4A && @@ -639,43 +639,43 @@ examine_app0 (j_decompress_ptr cinfo, JOCTET *data, GETJOCTET(data[12]), GETJOCTET(data[13])); totallen -= APP0_DATA_LEN; if (totallen != - ((JLONG)GETJOCTET(data[12]) * (JLONG)GETJOCTET(data[13]) * (JLONG) 3)) - TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int) totallen); + ((JLONG)GETJOCTET(data[12]) * (JLONG)GETJOCTET(data[13]) * (JLONG)3)) + TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int)totallen); } else if (datalen >= 6 && - GETJOCTET(data[0]) == 0x4A && - GETJOCTET(data[1]) == 0x46 && - GETJOCTET(data[2]) == 0x58 && - GETJOCTET(data[3]) == 0x58 && - GETJOCTET(data[4]) == 0) { + GETJOCTET(data[0]) == 0x4A && + GETJOCTET(data[1]) == 0x46 && + GETJOCTET(data[2]) == 0x58 && + GETJOCTET(data[3]) == 0x58 && + GETJOCTET(data[4]) == 0) { /* Found JFIF "JFXX" extension APP0 marker */ /* The library doesn't actually do anything with these, * but we try to produce a helpful trace message. */ switch (GETJOCTET(data[5])) { case 0x10: - TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int) totallen); + TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int)totallen); break; case 0x11: - TRACEMS1(cinfo, 1, JTRC_THUMB_PALETTE, (int) totallen); + TRACEMS1(cinfo, 1, JTRC_THUMB_PALETTE, (int)totallen); break; case 0x13: - TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int) totallen); + TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int)totallen); break; default: TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION, - GETJOCTET(data[5]), (int) totallen); + GETJOCTET(data[5]), (int)totallen); break; } } else { /* Start of APP0 does not match "JFIF" or "JFXX", or too short */ - TRACEMS1(cinfo, 1, JTRC_APP0, (int) totallen); + TRACEMS1(cinfo, 1, JTRC_APP0, (int)totallen); } } LOCAL(void) -examine_app14 (j_decompress_ptr cinfo, JOCTET *data, - unsigned int datalen, JLONG remaining) +examine_app14(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen, + JLONG remaining) /* Examine first few bytes from an APP14. * Take appropriate action if it is an Adobe marker. * datalen is # of bytes at data[], remaining is length of rest of marker data. @@ -696,16 +696,16 @@ examine_app14 (j_decompress_ptr cinfo, JOCTET *data, transform = GETJOCTET(data[11]); TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform); cinfo->saw_Adobe_marker = TRUE; - cinfo->Adobe_transform = (UINT8) transform; + cinfo->Adobe_transform = (UINT8)transform; } else { /* Start of APP14 does not match "Adobe", or too short */ - TRACEMS1(cinfo, 1, JTRC_APP14, (int) (datalen + remaining)); + TRACEMS1(cinfo, 1, JTRC_APP14, (int)(datalen + remaining)); } } METHODDEF(boolean) -get_interesting_appn (j_decompress_ptr cinfo) +get_interesting_appn(j_decompress_ptr cinfo) /* Process an APP0 or APP14 marker without saving it */ { JLONG length; @@ -720,7 +720,7 @@ get_interesting_appn (j_decompress_ptr cinfo) if (length >= APPN_DATA_LEN) numtoread = APPN_DATA_LEN; else if (length > 0) - numtoread = (unsigned int) length; + numtoread = (unsigned int)length; else numtoread = 0; for (i = 0; i < numtoread; i++) @@ -730,10 +730,10 @@ get_interesting_appn (j_decompress_ptr cinfo) /* process it */ switch (cinfo->unread_marker) { case M_APP0: - examine_app0(cinfo, (JOCTET *) b, numtoread, length); + examine_app0(cinfo, (JOCTET *)b, numtoread, length); break; case M_APP14: - examine_app14(cinfo, (JOCTET *) b, numtoread, length); + examine_app14(cinfo, (JOCTET *)b, numtoread, length); break; default: /* can't get here unless jpeg_save_markers chooses wrong processor */ @@ -744,7 +744,7 @@ get_interesting_appn (j_decompress_ptr cinfo) /* skip any remaining data -- could be lots */ INPUT_SYNC(cinfo); if (length > 0) - (*cinfo->src->skip_input_data) (cinfo, (long) length); + (*cinfo->src->skip_input_data) (cinfo, (long)length); return TRUE; } @@ -753,10 +753,10 @@ get_interesting_appn (j_decompress_ptr cinfo) #ifdef SAVE_MARKERS_SUPPORTED METHODDEF(boolean) -save_marker (j_decompress_ptr cinfo) +save_marker(j_decompress_ptr cinfo) /* Save an APPn or COM marker into the marker list */ { - my_marker_ptr marker = (my_marker_ptr) cinfo->marker; + my_marker_ptr marker = (my_marker_ptr)cinfo->marker; jpeg_saved_marker_ptr cur_marker = marker->cur_marker; unsigned int bytes_read, data_length; JOCTET *data; @@ -770,22 +770,22 @@ save_marker (j_decompress_ptr cinfo) if (length >= 0) { /* watch out for bogus length word */ /* figure out how much we want to save */ unsigned int limit; - if (cinfo->unread_marker == (int) M_COM) + if (cinfo->unread_marker == (int)M_COM) limit = marker->length_limit_COM; else - limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0]; - if ((unsigned int) length < limit) - limit = (unsigned int) length; + limit = marker->length_limit_APPn[cinfo->unread_marker - (int)M_APP0]; + if ((unsigned int)length < limit) + limit = (unsigned int)length; /* allocate and initialize the marker item */ cur_marker = (jpeg_saved_marker_ptr) - (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(struct jpeg_marker_struct) + limit); cur_marker->next = NULL; - cur_marker->marker = (UINT8) cinfo->unread_marker; - cur_marker->original_length = (unsigned int) length; + cur_marker->marker = (UINT8)cinfo->unread_marker; + cur_marker->original_length = (unsigned int)length; cur_marker->data_length = limit; /* data area is just beyond the jpeg_marker_struct */ - data = cur_marker->data = (JOCTET *) (cur_marker + 1); + data = cur_marker->data = (JOCTET *)(cur_marker + 1); marker->cur_marker = cur_marker; marker->bytes_read = 0; bytes_read = 0; @@ -843,14 +843,14 @@ save_marker (j_decompress_ptr cinfo) break; default: TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, - (int) (data_length + length)); + (int)(data_length + length)); break; } /* skip any remaining data -- could be lots */ INPUT_SYNC(cinfo); /* do before skip_input_data */ if (length > 0) - (*cinfo->src->skip_input_data) (cinfo, (long) length); + (*cinfo->src->skip_input_data) (cinfo, (long)length); return TRUE; } @@ -859,7 +859,7 @@ save_marker (j_decompress_ptr cinfo) METHODDEF(boolean) -skip_variable (j_decompress_ptr cinfo) +skip_variable(j_decompress_ptr cinfo) /* Skip over an unknown or uninteresting variable-length marker */ { JLONG length; @@ -868,11 +868,11 @@ skip_variable (j_decompress_ptr cinfo) INPUT_2BYTES(cinfo, length, return FALSE); length -= 2; - TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, (int) length); + TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, (int)length); INPUT_SYNC(cinfo); /* do before skip_input_data */ if (length > 0) - (*cinfo->src->skip_input_data) (cinfo, (long) length); + (*cinfo->src->skip_input_data) (cinfo, (long)length); return TRUE; } @@ -888,7 +888,7 @@ skip_variable (j_decompress_ptr cinfo) */ LOCAL(boolean) -next_marker (j_decompress_ptr cinfo) +next_marker(j_decompress_ptr cinfo) { int c; INPUT_VARS(cinfo); @@ -935,7 +935,7 @@ next_marker (j_decompress_ptr cinfo) LOCAL(boolean) -first_marker (j_decompress_ptr cinfo) +first_marker(j_decompress_ptr cinfo) /* Like next_marker, but used to obtain the initial SOI marker. */ /* For this marker, we do not allow preceding garbage or fill; otherwise, * we might well scan an entire input file before realizing it ain't JPEG. @@ -948,7 +948,7 @@ first_marker (j_decompress_ptr cinfo) INPUT_BYTE(cinfo, c, return FALSE); INPUT_BYTE(cinfo, c2, return FALSE); - if (c != 0xFF || c2 != (int) M_SOI) + if (c != 0xFF || c2 != (int)M_SOI) ERREXIT2(cinfo, JERR_NO_SOI, c, c2); cinfo->unread_marker = c2; @@ -966,18 +966,18 @@ first_marker (j_decompress_ptr cinfo) */ METHODDEF(int) -read_markers (j_decompress_ptr cinfo) +read_markers(j_decompress_ptr cinfo) { /* Outer loop repeats once for each marker. */ for (;;) { /* Collect the marker proper, unless we already did. */ /* NB: first_marker() enforces the requirement that SOI appear first. */ if (cinfo->unread_marker == 0) { - if (! cinfo->marker->saw_SOI) { - if (! first_marker(cinfo)) + if (!cinfo->marker->saw_SOI) { + if (!first_marker(cinfo)) return JPEG_SUSPENDED; } else { - if (! next_marker(cinfo)) + if (!next_marker(cinfo)) return JPEG_SUSPENDED; } } @@ -987,28 +987,28 @@ read_markers (j_decompress_ptr cinfo) */ switch (cinfo->unread_marker) { case M_SOI: - if (! get_soi(cinfo)) + if (!get_soi(cinfo)) return JPEG_SUSPENDED; break; case M_SOF0: /* Baseline */ case M_SOF1: /* Extended sequential, Huffman */ - if (! get_sof(cinfo, FALSE, FALSE)) + if (!get_sof(cinfo, FALSE, FALSE)) return JPEG_SUSPENDED; break; case M_SOF2: /* Progressive, Huffman */ - if (! get_sof(cinfo, TRUE, FALSE)) + if (!get_sof(cinfo, TRUE, FALSE)) return JPEG_SUSPENDED; break; case M_SOF9: /* Extended sequential, arithmetic */ - if (! get_sof(cinfo, FALSE, TRUE)) + if (!get_sof(cinfo, FALSE, TRUE)) return JPEG_SUSPENDED; break; case M_SOF10: /* Progressive, arithmetic */ - if (! get_sof(cinfo, TRUE, TRUE)) + if (!get_sof(cinfo, TRUE, TRUE)) return JPEG_SUSPENDED; break; @@ -1026,7 +1026,7 @@ read_markers (j_decompress_ptr cinfo) break; case M_SOS: - if (! get_sos(cinfo)) + if (!get_sos(cinfo)) return JPEG_SUSPENDED; cinfo->unread_marker = 0; /* processed the marker */ return JPEG_REACHED_SOS; @@ -1037,22 +1037,22 @@ read_markers (j_decompress_ptr cinfo) return JPEG_REACHED_EOI; case M_DAC: - if (! get_dac(cinfo)) + if (!get_dac(cinfo)) return JPEG_SUSPENDED; break; case M_DHT: - if (! get_dht(cinfo)) + if (!get_dht(cinfo)) return JPEG_SUSPENDED; break; case M_DQT: - if (! get_dqt(cinfo)) + if (!get_dqt(cinfo)) return JPEG_SUSPENDED; break; case M_DRI: - if (! get_dri(cinfo)) + if (!get_dri(cinfo)) return JPEG_SUSPENDED; break; @@ -1072,13 +1072,13 @@ read_markers (j_decompress_ptr cinfo) case M_APP13: case M_APP14: case M_APP15: - if (! (*((my_marker_ptr) cinfo->marker)->process_APPn[ - cinfo->unread_marker - (int) M_APP0]) (cinfo)) + if (!(*((my_marker_ptr)cinfo->marker)->process_APPn[ + cinfo->unread_marker - (int)M_APP0]) (cinfo)) return JPEG_SUSPENDED; break; case M_COM: - if (! (*((my_marker_ptr) cinfo->marker)->process_COM) (cinfo)) + if (!(*((my_marker_ptr)cinfo->marker)->process_COM) (cinfo)) return JPEG_SUSPENDED; break; @@ -1095,7 +1095,7 @@ read_markers (j_decompress_ptr cinfo) break; case M_DNL: /* Ignore DNL ... perhaps the wrong thing */ - if (! skip_variable(cinfo)) + if (!skip_variable(cinfo)) return JPEG_SUSPENDED; break; @@ -1127,25 +1127,25 @@ read_markers (j_decompress_ptr cinfo) */ METHODDEF(boolean) -read_restart_marker (j_decompress_ptr cinfo) +read_restart_marker(j_decompress_ptr cinfo) { /* Obtain a marker unless we already did. */ /* Note that next_marker will complain if it skips any data. */ if (cinfo->unread_marker == 0) { - if (! next_marker(cinfo)) + if (!next_marker(cinfo)) return FALSE; } if (cinfo->unread_marker == - ((int) M_RST0 + cinfo->marker->next_restart_num)) { + ((int)M_RST0 + cinfo->marker->next_restart_num)) { /* Normal case --- swallow the marker and let entropy decoder continue */ TRACEMS1(cinfo, 3, JTRC_RST, cinfo->marker->next_restart_num); cinfo->unread_marker = 0; } else { /* Uh-oh, the restart markers have been messed up. */ /* Let the data source manager determine how to resync. */ - if (! (*cinfo->src->resync_to_restart) (cinfo, - cinfo->marker->next_restart_num)) + if (!(*cinfo->src->resync_to_restart) (cinfo, + cinfo->marker->next_restart_num)) return FALSE; } @@ -1206,7 +1206,7 @@ read_restart_marker (j_decompress_ptr cinfo) */ GLOBAL(boolean) -jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired) +jpeg_resync_to_restart(j_decompress_ptr cinfo, int desired) { int marker = cinfo->unread_marker; int action = 1; @@ -1216,16 +1216,16 @@ jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired) /* Outer loop handles repeated decision after scanning forward. */ for (;;) { - if (marker < (int) M_SOF0) + if (marker < (int)M_SOF0) action = 2; /* invalid marker */ - else if (marker < (int) M_RST0 || marker > (int) M_RST7) + else if (marker < (int)M_RST0 || marker > (int)M_RST7) action = 3; /* valid non-restart marker */ else { - if (marker == ((int) M_RST0 + ((desired+1) & 7)) || - marker == ((int) M_RST0 + ((desired+2) & 7))) + if (marker == ((int)M_RST0 + ((desired + 1) & 7)) || + marker == ((int)M_RST0 + ((desired + 2) & 7))) action = 3; /* one of the next two expected restarts */ - else if (marker == ((int) M_RST0 + ((desired-1) & 7)) || - marker == ((int) M_RST0 + ((desired-2) & 7))) + else if (marker == ((int)M_RST0 + ((desired - 1) & 7)) || + marker == ((int)M_RST0 + ((desired - 2) & 7))) action = 2; /* a prior restart, so advance */ else action = 1; /* desired restart or too far away */ @@ -1238,7 +1238,7 @@ jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired) return TRUE; case 2: /* Scan to the next marker, and repeat the decision loop. */ - if (! next_marker(cinfo)) + if (!next_marker(cinfo)) return FALSE; marker = cinfo->unread_marker; break; @@ -1256,9 +1256,9 @@ jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired) */ METHODDEF(void) -reset_marker_reader (j_decompress_ptr cinfo) +reset_marker_reader(j_decompress_ptr cinfo) { - my_marker_ptr marker = (my_marker_ptr) cinfo->marker; + my_marker_ptr marker = (my_marker_ptr)cinfo->marker; cinfo->comp_info = NULL; /* until allocated by get_sof */ cinfo->input_scan_number = 0; /* no SOS seen yet */ @@ -1276,16 +1276,16 @@ reset_marker_reader (j_decompress_ptr cinfo) */ GLOBAL(void) -jinit_marker_reader (j_decompress_ptr cinfo) +jinit_marker_reader(j_decompress_ptr cinfo) { my_marker_ptr marker; int i; /* Create subobject in permanent pool */ marker = (my_marker_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT, sizeof(my_marker_reader)); - cinfo->marker = (struct jpeg_marker_reader *) marker; + cinfo->marker = (struct jpeg_marker_reader *)marker; /* Initialize public method pointers */ marker->pub.reset_marker_reader = reset_marker_reader; marker->pub.read_markers = read_markers; @@ -1314,10 +1314,10 @@ jinit_marker_reader (j_decompress_ptr cinfo) #ifdef SAVE_MARKERS_SUPPORTED GLOBAL(void) -jpeg_save_markers (j_decompress_ptr cinfo, int marker_code, - unsigned int length_limit) +jpeg_save_markers(j_decompress_ptr cinfo, int marker_code, + unsigned int length_limit) { - my_marker_ptr marker = (my_marker_ptr) cinfo->marker; + my_marker_ptr marker = (my_marker_ptr)cinfo->marker; long maxlength; jpeg_marker_parser_method processor; @@ -1325,8 +1325,8 @@ jpeg_save_markers (j_decompress_ptr cinfo, int marker_code, * (should only be a concern in a 16-bit environment). */ maxlength = cinfo->mem->max_alloc_chunk - sizeof(struct jpeg_marker_struct); - if (((long) length_limit) > maxlength) - length_limit = (unsigned int) maxlength; + if (((long)length_limit) > maxlength) + length_limit = (unsigned int)maxlength; /* Choose processor routine to use. * APP0/APP14 have special requirements. @@ -1334,23 +1334,23 @@ jpeg_save_markers (j_decompress_ptr cinfo, int marker_code, if (length_limit) { processor = save_marker; /* If saving APP0/APP14, save at least enough for our internal use. */ - if (marker_code == (int) M_APP0 && length_limit < APP0_DATA_LEN) + if (marker_code == (int)M_APP0 && length_limit < APP0_DATA_LEN) length_limit = APP0_DATA_LEN; - else if (marker_code == (int) M_APP14 && length_limit < APP14_DATA_LEN) + else if (marker_code == (int)M_APP14 && length_limit < APP14_DATA_LEN) length_limit = APP14_DATA_LEN; } else { processor = skip_variable; /* If discarding APP0/APP14, use our regular on-the-fly processor. */ - if (marker_code == (int) M_APP0 || marker_code == (int) M_APP14) + if (marker_code == (int)M_APP0 || marker_code == (int)M_APP14) processor = get_interesting_appn; } - if (marker_code == (int) M_COM) { + if (marker_code == (int)M_COM) { marker->process_COM = processor; marker->length_limit_COM = length_limit; - } else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15) { - marker->process_APPn[marker_code - (int) M_APP0] = processor; - marker->length_limit_APPn[marker_code - (int) M_APP0] = length_limit; + } else if (marker_code >= (int)M_APP0 && marker_code <= (int)M_APP15) { + marker->process_APPn[marker_code - (int)M_APP0] = processor; + marker->length_limit_APPn[marker_code - (int)M_APP0] = length_limit; } else ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code); } @@ -1363,15 +1363,15 @@ jpeg_save_markers (j_decompress_ptr cinfo, int marker_code, */ GLOBAL(void) -jpeg_set_marker_processor (j_decompress_ptr cinfo, int marker_code, - jpeg_marker_parser_method routine) +jpeg_set_marker_processor(j_decompress_ptr cinfo, int marker_code, + jpeg_marker_parser_method routine) { - my_marker_ptr marker = (my_marker_ptr) cinfo->marker; + my_marker_ptr marker = (my_marker_ptr)cinfo->marker; - if (marker_code == (int) M_COM) + if (marker_code == (int)M_COM) marker->process_COM = routine; - else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15) - marker->process_APPn[marker_code - (int) M_APP0] = routine; + else if (marker_code >= (int)M_APP0 && marker_code <= (int)M_APP15) + marker->process_APPn[marker_code - (int)M_APP0] = routine; else ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code); } diff --git a/jdmaster.c b/jdmaster.c index 9079dda..b209064 100644 --- a/jdmaster.c +++ b/jdmaster.c @@ -31,7 +31,7 @@ */ LOCAL(boolean) -use_merged_upsample (j_decompress_ptr cinfo) +use_merged_upsample(j_decompress_ptr cinfo) { #ifdef UPSAMPLE_MERGING_SUPPORTED /* Merging is the equivalent of plain box-filter upsampling */ @@ -40,22 +40,22 @@ use_merged_upsample (j_decompress_ptr cinfo) /* jdmerge.c only supports YCC=>RGB and YCC=>RGB565 color conversion */ if (cinfo->jpeg_color_space != JCS_YCbCr || cinfo->num_components != 3 || (cinfo->out_color_space != JCS_RGB && - cinfo->out_color_space != JCS_RGB565 && - cinfo->out_color_space != JCS_EXT_RGB && - cinfo->out_color_space != JCS_EXT_RGBX && - cinfo->out_color_space != JCS_EXT_BGR && - cinfo->out_color_space != JCS_EXT_BGRX && - cinfo->out_color_space != JCS_EXT_XBGR && - cinfo->out_color_space != JCS_EXT_XRGB && - cinfo->out_color_space != JCS_EXT_RGBA && - cinfo->out_color_space != JCS_EXT_BGRA && - cinfo->out_color_space != JCS_EXT_ABGR && - cinfo->out_color_space != JCS_EXT_ARGB)) + cinfo->out_color_space != JCS_RGB565 && + cinfo->out_color_space != JCS_EXT_RGB && + cinfo->out_color_space != JCS_EXT_RGBX && + cinfo->out_color_space != JCS_EXT_BGR && + cinfo->out_color_space != JCS_EXT_BGRX && + cinfo->out_color_space != JCS_EXT_XBGR && + cinfo->out_color_space != JCS_EXT_XRGB && + cinfo->out_color_space != JCS_EXT_RGBA && + cinfo->out_color_space != JCS_EXT_BGRA && + cinfo->out_color_space != JCS_EXT_ABGR && + cinfo->out_color_space != JCS_EXT_ARGB)) return FALSE; if ((cinfo->out_color_space == JCS_RGB565 && - cinfo->out_color_components != 3) || + cinfo->out_color_components != 3) || (cinfo->out_color_space != JCS_RGB565 && - cinfo->out_color_components != rgb_pixelsize[cinfo->out_color_space])) + cinfo->out_color_components != rgb_pixelsize[cinfo->out_color_space])) return FALSE; /* and it only handles 2h1v or 2h2v sampling ratios */ if (cinfo->comp_info[0].h_samp_factor != 2 || @@ -100,7 +100,7 @@ GLOBAL(void) #else LOCAL(void) #endif -jpeg_core_output_dimensions (j_decompress_ptr cinfo) +jpeg_core_output_dimensions(j_decompress_ptr cinfo) /* Do computations that are needed before master selection phase. * This function is used for transcoding and full decompression. */ @@ -113,129 +113,129 @@ jpeg_core_output_dimensions (j_decompress_ptr cinfo) if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom) { /* Provide 1/block_size scaling */ cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_width, (long)DCTSIZE); cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_height, (long)DCTSIZE); cinfo->_min_DCT_h_scaled_size = 1; cinfo->_min_DCT_v_scaled_size = 1; } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 2) { /* Provide 2/block_size scaling */ cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 2L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_width * 2L, (long)DCTSIZE); cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 2L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_height * 2L, (long)DCTSIZE); cinfo->_min_DCT_h_scaled_size = 2; cinfo->_min_DCT_v_scaled_size = 2; } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 3) { /* Provide 3/block_size scaling */ cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 3L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_width * 3L, (long)DCTSIZE); cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 3L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_height * 3L, (long)DCTSIZE); cinfo->_min_DCT_h_scaled_size = 3; cinfo->_min_DCT_v_scaled_size = 3; } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 4) { /* Provide 4/block_size scaling */ cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 4L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_width * 4L, (long)DCTSIZE); cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 4L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_height * 4L, (long)DCTSIZE); cinfo->_min_DCT_h_scaled_size = 4; cinfo->_min_DCT_v_scaled_size = 4; } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 5) { /* Provide 5/block_size scaling */ cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 5L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_width * 5L, (long)DCTSIZE); cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 5L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_height * 5L, (long)DCTSIZE); cinfo->_min_DCT_h_scaled_size = 5; cinfo->_min_DCT_v_scaled_size = 5; } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 6) { /* Provide 6/block_size scaling */ cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 6L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_width * 6L, (long)DCTSIZE); cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 6L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_height * 6L, (long)DCTSIZE); cinfo->_min_DCT_h_scaled_size = 6; cinfo->_min_DCT_v_scaled_size = 6; } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 7) { /* Provide 7/block_size scaling */ cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 7L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_width * 7L, (long)DCTSIZE); cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 7L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_height * 7L, (long)DCTSIZE); cinfo->_min_DCT_h_scaled_size = 7; cinfo->_min_DCT_v_scaled_size = 7; } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 8) { /* Provide 8/block_size scaling */ cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 8L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_width * 8L, (long)DCTSIZE); cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 8L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_height * 8L, (long)DCTSIZE); cinfo->_min_DCT_h_scaled_size = 8; cinfo->_min_DCT_v_scaled_size = 8; } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 9) { /* Provide 9/block_size scaling */ cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 9L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_width * 9L, (long)DCTSIZE); cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 9L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_height * 9L, (long)DCTSIZE); cinfo->_min_DCT_h_scaled_size = 9; cinfo->_min_DCT_v_scaled_size = 9; } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 10) { /* Provide 10/block_size scaling */ cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 10L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_width * 10L, (long)DCTSIZE); cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 10L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_height * 10L, (long)DCTSIZE); cinfo->_min_DCT_h_scaled_size = 10; cinfo->_min_DCT_v_scaled_size = 10; } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 11) { /* Provide 11/block_size scaling */ cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 11L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_width * 11L, (long)DCTSIZE); cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 11L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_height * 11L, (long)DCTSIZE); cinfo->_min_DCT_h_scaled_size = 11; cinfo->_min_DCT_v_scaled_size = 11; } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 12) { /* Provide 12/block_size scaling */ cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 12L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_width * 12L, (long)DCTSIZE); cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 12L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_height * 12L, (long)DCTSIZE); cinfo->_min_DCT_h_scaled_size = 12; cinfo->_min_DCT_v_scaled_size = 12; } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 13) { /* Provide 13/block_size scaling */ cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 13L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_width * 13L, (long)DCTSIZE); cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 13L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_height * 13L, (long)DCTSIZE); cinfo->_min_DCT_h_scaled_size = 13; cinfo->_min_DCT_v_scaled_size = 13; } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 14) { /* Provide 14/block_size scaling */ cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 14L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_width * 14L, (long)DCTSIZE); cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 14L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_height * 14L, (long)DCTSIZE); cinfo->_min_DCT_h_scaled_size = 14; cinfo->_min_DCT_v_scaled_size = 14; } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 15) { /* Provide 15/block_size scaling */ cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 15L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_width * 15L, (long)DCTSIZE); cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 15L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_height * 15L, (long)DCTSIZE); cinfo->_min_DCT_h_scaled_size = 15; cinfo->_min_DCT_v_scaled_size = 15; } else { /* Provide 16/block_size scaling */ cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * 16L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_width * 16L, (long)DCTSIZE); cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * 16L, (long) DCTSIZE); + jdiv_round_up((long)cinfo->image_height * 16L, (long)DCTSIZE); cinfo->_min_DCT_h_scaled_size = 16; cinfo->_min_DCT_v_scaled_size = 16; } @@ -268,7 +268,7 @@ jpeg_core_output_dimensions (j_decompress_ptr cinfo) */ GLOBAL(void) -jpeg_calc_output_dimensions (j_decompress_ptr cinfo) +jpeg_calc_output_dimensions(j_decompress_ptr cinfo) /* Do computations that are needed before master selection phase */ { #ifdef IDCT_SCALING_SUPPORTED @@ -314,13 +314,13 @@ jpeg_calc_output_dimensions (j_decompress_ptr cinfo) ci++, compptr++) { /* Size in samples, after IDCT scaling */ compptr->downsampled_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * - (long) (compptr->h_samp_factor * compptr->_DCT_scaled_size), - (long) (cinfo->max_h_samp_factor * DCTSIZE)); + jdiv_round_up((long)cinfo->image_width * + (long)(compptr->h_samp_factor * compptr->_DCT_scaled_size), + (long)(cinfo->max_h_samp_factor * DCTSIZE)); compptr->downsampled_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * - (long) (compptr->v_samp_factor * compptr->_DCT_scaled_size), - (long) (cinfo->max_v_samp_factor * DCTSIZE)); + jdiv_round_up((long)cinfo->image_height * + (long)(compptr->v_samp_factor * compptr->_DCT_scaled_size), + (long)(cinfo->max_v_samp_factor * DCTSIZE)); } #else /* !IDCT_SCALING_SUPPORTED */ @@ -417,30 +417,30 @@ jpeg_calc_output_dimensions (j_decompress_ptr cinfo) */ LOCAL(void) -prepare_range_limit_table (j_decompress_ptr cinfo) +prepare_range_limit_table(j_decompress_ptr cinfo) /* Allocate and fill in the sample_range_limit table */ { JSAMPLE *table; int i; table = (JSAMPLE *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (5 * (MAXJSAMPLE+1) + CENTERJSAMPLE) * sizeof(JSAMPLE)); - table += (MAXJSAMPLE+1); /* allow negative subscripts of simple table */ + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + (5 * (MAXJSAMPLE + 1) + CENTERJSAMPLE) * sizeof(JSAMPLE)); + table += (MAXJSAMPLE + 1); /* allow negative subscripts of simple table */ cinfo->sample_range_limit = table; /* First segment of "simple" table: limit[x] = 0 for x < 0 */ - MEMZERO(table - (MAXJSAMPLE+1), (MAXJSAMPLE+1) * sizeof(JSAMPLE)); + MEMZERO(table - (MAXJSAMPLE + 1), (MAXJSAMPLE + 1) * sizeof(JSAMPLE)); /* Main part of "simple" table: limit[x] = x */ for (i = 0; i <= MAXJSAMPLE; i++) - table[i] = (JSAMPLE) i; + table[i] = (JSAMPLE)i; table += CENTERJSAMPLE; /* Point to where post-IDCT table starts */ /* End of simple table, rest of first half of post-IDCT table */ - for (i = CENTERJSAMPLE; i < 2*(MAXJSAMPLE+1); i++) + for (i = CENTERJSAMPLE; i < 2 * (MAXJSAMPLE + 1); i++) table[i] = MAXJSAMPLE; /* Second half of post-IDCT table */ - MEMZERO(table + (2 * (MAXJSAMPLE+1)), - (2 * (MAXJSAMPLE+1) - CENTERJSAMPLE) * sizeof(JSAMPLE)); - MEMCOPY(table + (4 * (MAXJSAMPLE+1) - CENTERJSAMPLE), + MEMZERO(table + (2 * (MAXJSAMPLE + 1)), + (2 * (MAXJSAMPLE + 1) - CENTERJSAMPLE) * sizeof(JSAMPLE)); + MEMCOPY(table + (4 * (MAXJSAMPLE + 1) - CENTERJSAMPLE), cinfo->sample_range_limit, CENTERJSAMPLE * sizeof(JSAMPLE)); } @@ -457,9 +457,9 @@ prepare_range_limit_table (j_decompress_ptr cinfo) */ LOCAL(void) -master_selection (j_decompress_ptr cinfo) +master_selection(j_decompress_ptr cinfo) { - my_master_ptr master = (my_master_ptr) cinfo->master; + my_master_ptr master = (my_master_ptr)cinfo->master; boolean use_c_buffer; long samplesperrow; JDIMENSION jd_samplesperrow; @@ -469,9 +469,10 @@ master_selection (j_decompress_ptr cinfo) prepare_range_limit_table(cinfo); /* Width of an output scanline must be representable as JDIMENSION. */ - samplesperrow = (long) cinfo->output_width * (long) cinfo->out_color_components; - jd_samplesperrow = (JDIMENSION) samplesperrow; - if ((long) jd_samplesperrow != samplesperrow) + samplesperrow = (long)cinfo->output_width * + (long)cinfo->out_color_components; + jd_samplesperrow = (JDIMENSION)samplesperrow; + if ((long)jd_samplesperrow != samplesperrow) ERREXIT(cinfo, JERR_WIDTH_OVERFLOW); /* Initialize my private state */ @@ -482,7 +483,7 @@ master_selection (j_decompress_ptr cinfo) master->quantizer_1pass = NULL; master->quantizer_2pass = NULL; /* No mode changes if not using buffered-image mode. */ - if (! cinfo->quantize_colors || ! cinfo->buffered_image) { + if (!cinfo->quantize_colors || !cinfo->buffered_image) { cinfo->enable_1pass_quant = FALSE; cinfo->enable_external_quant = FALSE; cinfo->enable_2pass_quant = FALSE; @@ -528,7 +529,7 @@ master_selection (j_decompress_ptr cinfo) } /* Post-processing: in particular, color conversion first */ - if (! cinfo->raw_data_out) { + if (!cinfo->raw_data_out) { if (master->using_merged_upsample) { #ifdef UPSAMPLE_MERGING_SUPPORTED jinit_merged_upsampler(cinfo); /* does color conversion too */ @@ -565,11 +566,11 @@ master_selection (j_decompress_ptr cinfo) use_c_buffer = cinfo->inputctl->has_multiple_scans || cinfo->buffered_image; jinit_d_coef_controller(cinfo, use_c_buffer); - if (! cinfo->raw_data_out) + if (!cinfo->raw_data_out) jinit_d_main_controller(cinfo, FALSE /* never need full buffer here */); /* We can now tell the memory manager to allocate virtual arrays. */ - (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo); + (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo); /* Initialize input side of decompressor to consume first scan. */ (*cinfo->inputctl->start_input_pass) (cinfo); @@ -585,7 +586,7 @@ master_selection (j_decompress_ptr cinfo) * progress monitoring appropriately. The input step is counted * as one pass. */ - if (cinfo->progress != NULL && ! cinfo->buffered_image && + if (cinfo->progress != NULL && !cinfo->buffered_image && cinfo->inputctl->has_multiple_scans) { int nscans; /* Estimate number of scans to set pass_limit. */ @@ -597,7 +598,7 @@ master_selection (j_decompress_ptr cinfo) nscans = cinfo->num_components; } cinfo->progress->pass_counter = 0L; - cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans; + cinfo->progress->pass_limit = (long)cinfo->total_iMCU_rows * nscans; cinfo->progress->completed_passes = 0; cinfo->progress->total_passes = (cinfo->enable_2pass_quant ? 3 : 2); /* Count the input pass as done */ @@ -617,9 +618,9 @@ master_selection (j_decompress_ptr cinfo) */ METHODDEF(void) -prepare_for_output_pass (j_decompress_ptr cinfo) +prepare_for_output_pass(j_decompress_ptr cinfo) { - my_master_ptr master = (my_master_ptr) cinfo->master; + my_master_ptr master = (my_master_ptr)cinfo->master; if (master->pub.is_dummy_pass) { #ifdef QUANT_2PASS_SUPPORTED @@ -645,8 +646,8 @@ prepare_for_output_pass (j_decompress_ptr cinfo) } (*cinfo->idct->start_pass) (cinfo); (*cinfo->coef->start_output_pass) (cinfo); - if (! cinfo->raw_data_out) { - if (! master->using_merged_upsample) + if (!cinfo->raw_data_out) { + if (!master->using_merged_upsample) (*cinfo->cconvert->start_pass) (cinfo); (*cinfo->upsample->start_pass) (cinfo); if (cinfo->quantize_colors) @@ -665,7 +666,7 @@ prepare_for_output_pass (j_decompress_ptr cinfo) /* In buffered-image mode, we assume one more output pass if EOI not * yet reached, but no more passes if EOI has been reached. */ - if (cinfo->buffered_image && ! cinfo->inputctl->eoi_reached) { + if (cinfo->buffered_image && !cinfo->inputctl->eoi_reached) { cinfo->progress->total_passes += (cinfo->enable_2pass_quant ? 2 : 1); } } @@ -677,9 +678,9 @@ prepare_for_output_pass (j_decompress_ptr cinfo) */ METHODDEF(void) -finish_output_pass (j_decompress_ptr cinfo) +finish_output_pass(j_decompress_ptr cinfo) { - my_master_ptr master = (my_master_ptr) cinfo->master; + my_master_ptr master = (my_master_ptr)cinfo->master; if (cinfo->quantize_colors) (*cinfo->cquantize->finish_pass) (cinfo); @@ -694,9 +695,9 @@ finish_output_pass (j_decompress_ptr cinfo) */ GLOBAL(void) -jpeg_new_colormap (j_decompress_ptr cinfo) +jpeg_new_colormap(j_decompress_ptr cinfo) { - my_master_ptr master = (my_master_ptr) cinfo->master; + my_master_ptr master = (my_master_ptr)cinfo->master; /* Prevent application from calling me at wrong times */ if (cinfo->global_state != DSTATE_BUFIMAGE) @@ -722,9 +723,9 @@ jpeg_new_colormap (j_decompress_ptr cinfo) */ GLOBAL(void) -jinit_master_decompress (j_decompress_ptr cinfo) +jinit_master_decompress(j_decompress_ptr cinfo) { - my_master_ptr master = (my_master_ptr) cinfo->master; + my_master_ptr master = (my_master_ptr)cinfo->master; master->pub.prepare_for_output_pass = prepare_for_output_pass; master->pub.finish_output_pass = finish_output_pass; diff --git a/jdmerge.c b/jdmerge.c index ca6f16c..b3fec04 100644 --- a/jdmerge.c +++ b/jdmerge.c @@ -76,8 +76,8 @@ typedef struct { typedef my_upsampler *my_upsample_ptr; #define SCALEBITS 16 /* speediest right-shift on some machines */ -#define ONE_HALF ((JLONG) 1 << (SCALEBITS-1)) -#define FIX(x) ((JLONG) ((x) * (1L<upsample; + my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample; int i; JLONG x; SHIFT_TEMPS upsample->Cr_r_tab = (int *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * sizeof(int)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + (MAXJSAMPLE + 1) * sizeof(int)); upsample->Cb_b_tab = (int *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * sizeof(int)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + (MAXJSAMPLE + 1) * sizeof(int)); upsample->Cr_g_tab = (JLONG *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * sizeof(JLONG)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + (MAXJSAMPLE + 1) * sizeof(JLONG)); upsample->Cb_g_tab = (JLONG *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (MAXJSAMPLE+1) * sizeof(JLONG)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + (MAXJSAMPLE + 1) * sizeof(JLONG)); for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) { /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */ @@ -217,10 +217,10 @@ build_ycc_rgb_table (j_decompress_ptr cinfo) upsample->Cb_b_tab[i] = (int) RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS); /* Cr=>G value is scaled-up -0.71414 * x */ - upsample->Cr_g_tab[i] = (- FIX(0.71414)) * x; + upsample->Cr_g_tab[i] = (-FIX(0.71414)) * x; /* Cb=>G value is scaled-up -0.34414 * x */ /* We also add in ONE_HALF so that need not do it in inner loop */ - upsample->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF; + upsample->Cb_g_tab[i] = (-FIX(0.34414)) * x + ONE_HALF; } } @@ -230,9 +230,9 @@ build_ycc_rgb_table (j_decompress_ptr cinfo) */ METHODDEF(void) -start_pass_merged_upsample (j_decompress_ptr cinfo) +start_pass_merged_upsample(j_decompress_ptr cinfo) { - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample; /* Mark the spare buffer empty */ upsample->spare_full = FALSE; @@ -248,14 +248,13 @@ start_pass_merged_upsample (j_decompress_ptr cinfo) */ METHODDEF(void) -merged_2v_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) +merged_2v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail) /* 2:1 vertical sampling case: may need a spare row. */ { - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample; JSAMPROW work_ptrs[2]; JDIMENSION num_rows; /* number of rows returned to caller */ @@ -264,8 +263,8 @@ merged_2v_upsample (j_decompress_ptr cinfo, JDIMENSION size = upsample->out_row_width; if (cinfo->out_color_space == JCS_RGB565) size = cinfo->output_width * 2; - jcopy_sample_rows(& upsample->spare_row, 0, output_buf + *out_row_ctr, 0, - 1, size); + jcopy_sample_rows(&upsample->spare_row, 0, output_buf + *out_row_ctr, 0, 1, + size); num_rows = 1; upsample->spare_full = FALSE; } else { @@ -294,20 +293,19 @@ merged_2v_upsample (j_decompress_ptr cinfo, *out_row_ctr += num_rows; upsample->rows_to_go -= num_rows; /* When the buffer is emptied, declare this input row group consumed */ - if (! upsample->spare_full) + if (!upsample->spare_full) (*in_row_group_ctr)++; } METHODDEF(void) -merged_1v_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) +merged_1v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail) /* 1:1 vertical sampling case: much easier, never need a spare row. */ { - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample; /* Just do the upsampling. */ (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr, @@ -333,43 +331,42 @@ merged_1v_upsample (j_decompress_ptr cinfo, */ METHODDEF(void) -h2v1_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) { switch (cinfo->out_color_space) { - case JCS_EXT_RGB: - extrgb_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, - output_buf); - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - extrgbx_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, - output_buf); - break; - case JCS_EXT_BGR: - extbgr_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, - output_buf); - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - extbgrx_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, - output_buf); - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - extxbgr_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, - output_buf); - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - extxrgb_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, - output_buf); - break; - default: - h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, - output_buf); - break; + case JCS_EXT_RGB: + extrgb_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, + output_buf); + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + extrgbx_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, + output_buf); + break; + case JCS_EXT_BGR: + extbgr_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, + output_buf); + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + extbgrx_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, + output_buf); + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + extxbgr_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, + output_buf); + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + extxrgb_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, + output_buf); + break; + default: + h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, + output_buf); + break; } } @@ -379,43 +376,42 @@ h2v1_merged_upsample (j_decompress_ptr cinfo, */ METHODDEF(void) -h2v2_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) { switch (cinfo->out_color_space) { - case JCS_EXT_RGB: - extrgb_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, - output_buf); - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - extrgbx_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, - output_buf); - break; - case JCS_EXT_BGR: - extbgr_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, - output_buf); - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - extbgrx_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, - output_buf); - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - extxbgr_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, - output_buf); - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - extxrgb_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, - output_buf); - break; - default: - h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, - output_buf); - break; + case JCS_EXT_RGB: + extrgb_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, + output_buf); + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + extrgbx_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, + output_buf); + break; + case JCS_EXT_BGR: + extbgr_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, + output_buf); + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + extbgrx_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, + output_buf); + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + extxbgr_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, + output_buf); + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + extxrgb_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, + output_buf); + break; + default: + h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, + output_buf); + break; } } @@ -424,24 +420,24 @@ h2v2_merged_upsample (j_decompress_ptr cinfo, * RGB565 conversion */ -#define PACK_SHORT_565_LE(r, g, b) ((((r) << 8) & 0xF800) | \ - (((g) << 3) & 0x7E0) | ((b) >> 3)) -#define PACK_SHORT_565_BE(r, g, b) (((r) & 0xF8) | ((g) >> 5) | \ - (((g) << 11) & 0xE000) | \ - (((b) << 5) & 0x1F00)) +#define PACK_SHORT_565_LE(r, g, b) ((((r) << 8) & 0xF800) | \ + (((g) << 3) & 0x7E0) | ((b) >> 3)) +#define PACK_SHORT_565_BE(r, g, b) (((r) & 0xF8) | ((g) >> 5) | \ + (((g) << 11) & 0xE000) | \ + (((b) << 5) & 0x1F00)) -#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l) -#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r) +#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l) +#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r) -#define PACK_NEED_ALIGNMENT(ptr) (((size_t)(ptr)) & 3) +#define PACK_NEED_ALIGNMENT(ptr) (((size_t)(ptr)) & 3) -#define WRITE_TWO_PIXELS_LE(addr, pixels) { \ - ((INT16*)(addr))[0] = (INT16)(pixels); \ - ((INT16*)(addr))[1] = (INT16)((pixels) >> 16); \ +#define WRITE_TWO_PIXELS_LE(addr, pixels) { \ + ((INT16 *)(addr))[0] = (INT16)(pixels); \ + ((INT16 *)(addr))[1] = (INT16)((pixels) >> 16); \ } -#define WRITE_TWO_PIXELS_BE(addr, pixels) { \ - ((INT16*)(addr))[1] = (INT16)(pixels); \ - ((INT16*)(addr))[0] = (INT16)((pixels) >> 16); \ +#define WRITE_TWO_PIXELS_BE(addr, pixels) { \ + ((INT16 *)(addr))[1] = (INT16)(pixels); \ + ((INT16 *)(addr))[0] = (INT16)((pixels) >> 16); \ } #define DITHER_565_R(r, dither) ((r) + ((dither) & 0xFF)) @@ -452,7 +448,7 @@ h2v2_merged_upsample (j_decompress_ptr cinfo, /* Declarations for ordered dithering * * We use a 4x4 ordered dither array packed into 32 bits. This array is - * sufficent for dithering RGB888 to RGB565. + * sufficient for dithering RGB888 to RGB565. */ #define DITHER_MASK 0x3 @@ -467,13 +463,13 @@ static const JLONG dither_matrix[4] = { /* Include inline routines for RGB565 conversion */ -#define PACK_SHORT_565 PACK_SHORT_565_LE -#define PACK_TWO_PIXELS PACK_TWO_PIXELS_LE -#define WRITE_TWO_PIXELS WRITE_TWO_PIXELS_LE -#define h2v1_merged_upsample_565_internal h2v1_merged_upsample_565_le -#define h2v1_merged_upsample_565D_internal h2v1_merged_upsample_565D_le -#define h2v2_merged_upsample_565_internal h2v2_merged_upsample_565_le -#define h2v2_merged_upsample_565D_internal h2v2_merged_upsample_565D_le +#define PACK_SHORT_565 PACK_SHORT_565_LE +#define PACK_TWO_PIXELS PACK_TWO_PIXELS_LE +#define WRITE_TWO_PIXELS WRITE_TWO_PIXELS_LE +#define h2v1_merged_upsample_565_internal h2v1_merged_upsample_565_le +#define h2v1_merged_upsample_565D_internal h2v1_merged_upsample_565D_le +#define h2v2_merged_upsample_565_internal h2v2_merged_upsample_565_le +#define h2v2_merged_upsample_565D_internal h2v2_merged_upsample_565D_le #include "jdmrg565.c" #undef PACK_SHORT_565 #undef PACK_TWO_PIXELS @@ -483,13 +479,13 @@ static const JLONG dither_matrix[4] = { #undef h2v2_merged_upsample_565_internal #undef h2v2_merged_upsample_565D_internal -#define PACK_SHORT_565 PACK_SHORT_565_BE -#define PACK_TWO_PIXELS PACK_TWO_PIXELS_BE -#define WRITE_TWO_PIXELS WRITE_TWO_PIXELS_BE -#define h2v1_merged_upsample_565_internal h2v1_merged_upsample_565_be -#define h2v1_merged_upsample_565D_internal h2v1_merged_upsample_565D_be -#define h2v2_merged_upsample_565_internal h2v2_merged_upsample_565_be -#define h2v2_merged_upsample_565D_internal h2v2_merged_upsample_565D_be +#define PACK_SHORT_565 PACK_SHORT_565_BE +#define PACK_TWO_PIXELS PACK_TWO_PIXELS_BE +#define WRITE_TWO_PIXELS WRITE_TWO_PIXELS_BE +#define h2v1_merged_upsample_565_internal h2v1_merged_upsample_565_be +#define h2v1_merged_upsample_565D_internal h2v1_merged_upsample_565D_be +#define h2v2_merged_upsample_565_internal h2v2_merged_upsample_565_be +#define h2v2_merged_upsample_565D_internal h2v2_merged_upsample_565D_be #include "jdmrg565.c" #undef PACK_SHORT_565 #undef PACK_TWO_PIXELS @@ -510,9 +506,8 @@ static INLINE boolean is_big_endian(void) METHODDEF(void) -h2v1_merged_upsample_565 (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +h2v1_merged_upsample_565(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) { if (is_big_endian()) h2v1_merged_upsample_565_be(cinfo, input_buf, in_row_group_ctr, @@ -520,13 +515,12 @@ h2v1_merged_upsample_565 (j_decompress_ptr cinfo, else h2v1_merged_upsample_565_le(cinfo, input_buf, in_row_group_ctr, output_buf); - } +} METHODDEF(void) -h2v1_merged_upsample_565D (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +h2v1_merged_upsample_565D(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) { if (is_big_endian()) h2v1_merged_upsample_565D_be(cinfo, input_buf, in_row_group_ctr, @@ -538,9 +532,8 @@ h2v1_merged_upsample_565D (j_decompress_ptr cinfo, METHODDEF(void) -h2v2_merged_upsample_565 (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +h2v2_merged_upsample_565(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) { if (is_big_endian()) h2v2_merged_upsample_565_be(cinfo, input_buf, in_row_group_ctr, @@ -552,9 +545,8 @@ h2v2_merged_upsample_565 (j_decompress_ptr cinfo, METHODDEF(void) -h2v2_merged_upsample_565D (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +h2v2_merged_upsample_565D(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) { if (is_big_endian()) h2v2_merged_upsample_565D_be(cinfo, input_buf, in_row_group_ctr, @@ -574,14 +566,14 @@ h2v2_merged_upsample_565D (j_decompress_ptr cinfo, */ GLOBAL(void) -jinit_merged_upsampler (j_decompress_ptr cinfo) +jinit_merged_upsampler(j_decompress_ptr cinfo) { my_upsample_ptr upsample; upsample = (my_upsample_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_upsampler)); - cinfo->upsample = (struct jpeg_upsampler *) upsample; + cinfo->upsample = (struct jpeg_upsampler *)upsample; upsample->pub.start_pass = start_pass_merged_upsample; upsample->pub.need_context_rows = FALSE; @@ -602,8 +594,8 @@ jinit_merged_upsampler (j_decompress_ptr cinfo) } /* Allocate a spare row buffer */ upsample->spare_row = (JSAMPROW) - (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (size_t) (upsample->out_row_width * sizeof(JSAMPLE))); + (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE, + (size_t)(upsample->out_row_width * sizeof(JSAMPLE))); } else { upsample->pub.upsample = merged_1v_upsample; if (jsimd_can_h2v1_merged_upsample()) diff --git a/jdmrg565.c b/jdmrg565.c index 18287b3..1b87e37 100644 --- a/jdmrg565.c +++ b/jdmrg565.c @@ -5,7 +5,7 @@ * Copyright (C) 1994-1996, Thomas G. Lane. * libjpeg-turbo Modifications: * Copyright (C) 2013, Linaro Limited. - * Copyright (C) 2014-2015, D. R. Commander. + * Copyright (C) 2014-2015, 2018, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -15,23 +15,22 @@ INLINE LOCAL(void) -h2v1_merged_upsample_565_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) { - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample; register int y, cred, cgreen, cblue; int cb, cr; register JSAMPROW outptr; JSAMPROW inptr0, inptr1, inptr2; JDIMENSION col; /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - int * Crrtab = upsample->Cr_r_tab; - int * Cbbtab = upsample->Cb_b_tab; - JLONG * Crgtab = upsample->Cr_g_tab; - JLONG * Cbgtab = upsample->Cb_g_tab; + register JSAMPLE *range_limit = cinfo->sample_range_limit; + int *Crrtab = upsample->Cr_r_tab; + int *Cbbtab = upsample->Cb_b_tab; + JLONG *Crgtab = upsample->Cr_g_tab; + JLONG *Cbgtab = upsample->Cb_g_tab; unsigned int r, g, b; JLONG rgb; SHIFT_TEMPS @@ -47,7 +46,7 @@ h2v1_merged_upsample_565_internal (j_decompress_ptr cinfo, cb = GETJSAMPLE(*inptr1++); cr = GETJSAMPLE(*inptr2++); cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 2 Y values and emit 2 pixels */ @@ -72,37 +71,37 @@ h2v1_merged_upsample_565_internal (j_decompress_ptr cinfo, cb = GETJSAMPLE(*inptr1); cr = GETJSAMPLE(*inptr2); cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; y = GETJSAMPLE(*inptr0); r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr = (INT16)rgb; - } - } + *(INT16 *)outptr = (INT16)rgb; + } +} INLINE LOCAL(void) -h2v1_merged_upsample_565D_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) { - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample; register int y, cred, cgreen, cblue; int cb, cr; register JSAMPROW outptr; JSAMPROW inptr0, inptr1, inptr2; JDIMENSION col; /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - int * Crrtab = upsample->Cr_r_tab; - int * Cbbtab = upsample->Cb_b_tab; - JLONG * Crgtab = upsample->Cr_g_tab; - JLONG * Cbgtab = upsample->Cb_g_tab; + register JSAMPLE *range_limit = cinfo->sample_range_limit; + int *Crrtab = upsample->Cr_r_tab; + int *Cbbtab = upsample->Cb_b_tab; + JLONG *Crgtab = upsample->Cr_g_tab; + JLONG *Cbgtab = upsample->Cb_g_tab; JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; unsigned int r, g, b; JLONG rgb; @@ -119,7 +118,7 @@ h2v1_merged_upsample_565D_internal (j_decompress_ptr cinfo, cb = GETJSAMPLE(*inptr1++); cr = GETJSAMPLE(*inptr2++); cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 2 Y values and emit 2 pixels */ @@ -146,37 +145,36 @@ h2v1_merged_upsample_565D_internal (j_decompress_ptr cinfo, cb = GETJSAMPLE(*inptr1); cr = GETJSAMPLE(*inptr2); cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; y = GETJSAMPLE(*inptr0); r = range_limit[DITHER_565_R(y + cred, d0)]; g = range_limit[DITHER_565_G(y + cgreen, d0)]; b = range_limit[DITHER_565_B(y + cblue, d0)]; rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr = (INT16)rgb; + *(INT16 *)outptr = (INT16)rgb; } } INLINE LOCAL(void) -h2v2_merged_upsample_565_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) { - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample; register int y, cred, cgreen, cblue; int cb, cr; register JSAMPROW outptr0, outptr1; JSAMPROW inptr00, inptr01, inptr1, inptr2; JDIMENSION col; /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - int * Crrtab = upsample->Cr_r_tab; - int * Cbbtab = upsample->Cb_b_tab; - JLONG * Crgtab = upsample->Cr_g_tab; - JLONG * Cbgtab = upsample->Cb_g_tab; + register JSAMPLE *range_limit = cinfo->sample_range_limit; + int *Crrtab = upsample->Cr_r_tab; + int *Cbbtab = upsample->Cb_b_tab; + JLONG *Crgtab = upsample->Cr_g_tab; + JLONG *Cbgtab = upsample->Cb_g_tab; unsigned int r, g, b; JLONG rgb; SHIFT_TEMPS @@ -194,7 +192,7 @@ h2v2_merged_upsample_565_internal (j_decompress_ptr cinfo, cb = GETJSAMPLE(*inptr1++); cr = GETJSAMPLE(*inptr2++); cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 4 Y values and emit 4 pixels */ @@ -234,7 +232,7 @@ h2v2_merged_upsample_565_internal (j_decompress_ptr cinfo, cb = GETJSAMPLE(*inptr1); cr = GETJSAMPLE(*inptr2); cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; y = GETJSAMPLE(*inptr00); @@ -242,45 +240,45 @@ h2v2_merged_upsample_565_internal (j_decompress_ptr cinfo, g = range_limit[y + cgreen]; b = range_limit[y + cblue]; rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr0 = (INT16)rgb; + *(INT16 *)outptr0 = (INT16)rgb; y = GETJSAMPLE(*inptr01); r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr1 = (INT16)rgb; + *(INT16 *)outptr1 = (INT16)rgb; } } INLINE LOCAL(void) -h2v2_merged_upsample_565D_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) { - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample; register int y, cred, cgreen, cblue; int cb, cr; register JSAMPROW outptr0, outptr1; JSAMPROW inptr00, inptr01, inptr1, inptr2; JDIMENSION col; /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - int * Crrtab = upsample->Cr_r_tab; - int * Cbbtab = upsample->Cb_b_tab; - JLONG * Crgtab = upsample->Cr_g_tab; - JLONG * Cbgtab = upsample->Cb_g_tab; + register JSAMPLE *range_limit = cinfo->sample_range_limit; + int *Crrtab = upsample->Cr_r_tab; + int *Cbbtab = upsample->Cb_b_tab; + JLONG *Crgtab = upsample->Cr_g_tab; + JLONG *Cbgtab = upsample->Cb_g_tab; JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK]; - JLONG d1 = dither_matrix[(cinfo->output_scanline+1) & DITHER_MASK]; + JLONG d1 = dither_matrix[(cinfo->output_scanline + 1) & DITHER_MASK]; unsigned int r, g, b; JLONG rgb; SHIFT_TEMPS - inptr00 = input_buf[0][in_row_group_ctr*2]; - inptr01 = input_buf[0][in_row_group_ctr*2 + 1]; + inptr00 = input_buf[0][in_row_group_ctr * 2]; + inptr01 = input_buf[0][in_row_group_ctr * 2 + 1]; inptr1 = input_buf[1][in_row_group_ctr]; inptr2 = input_buf[2][in_row_group_ctr]; outptr0 = output_buf[0]; @@ -292,7 +290,7 @@ h2v2_merged_upsample_565D_internal (j_decompress_ptr cinfo, cb = GETJSAMPLE(*inptr1++); cr = GETJSAMPLE(*inptr2++); cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 4 Y values and emit 4 pixels */ @@ -304,20 +302,20 @@ h2v2_merged_upsample_565D_internal (j_decompress_ptr cinfo, rgb = PACK_SHORT_565(r, g, b); y = GETJSAMPLE(*inptr00++); - r = range_limit[DITHER_565_R(y + cred, d1)]; - g = range_limit[DITHER_565_G(y + cgreen, d1)]; - b = range_limit[DITHER_565_B(y + cblue, d1)]; - d1 = DITHER_ROTATE(d1); + r = range_limit[DITHER_565_R(y + cred, d0)]; + g = range_limit[DITHER_565_G(y + cgreen, d0)]; + b = range_limit[DITHER_565_B(y + cblue, d0)]; + d0 = DITHER_ROTATE(d0); rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); WRITE_TWO_PIXELS(outptr0, rgb); outptr0 += 4; y = GETJSAMPLE(*inptr01++); - r = range_limit[DITHER_565_R(y + cred, d0)]; - g = range_limit[DITHER_565_G(y + cgreen, d0)]; - b = range_limit[DITHER_565_B(y + cblue, d0)]; - d0 = DITHER_ROTATE(d0); + r = range_limit[DITHER_565_R(y + cred, d1)]; + g = range_limit[DITHER_565_G(y + cgreen, d1)]; + b = range_limit[DITHER_565_B(y + cblue, d1)]; + d1 = DITHER_ROTATE(d1); rgb = PACK_SHORT_565(r, g, b); y = GETJSAMPLE(*inptr01++); @@ -336,7 +334,7 @@ h2v2_merged_upsample_565D_internal (j_decompress_ptr cinfo, cb = GETJSAMPLE(*inptr1); cr = GETJSAMPLE(*inptr2); cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; y = GETJSAMPLE(*inptr00); @@ -344,13 +342,13 @@ h2v2_merged_upsample_565D_internal (j_decompress_ptr cinfo, g = range_limit[DITHER_565_G(y + cgreen, d0)]; b = range_limit[DITHER_565_B(y + cblue, d0)]; rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr0 = (INT16)rgb; + *(INT16 *)outptr0 = (INT16)rgb; y = GETJSAMPLE(*inptr01); r = range_limit[DITHER_565_R(y + cred, d1)]; g = range_limit[DITHER_565_G(y + cgreen, d1)]; b = range_limit[DITHER_565_B(y + cblue, d1)]; rgb = PACK_SHORT_565(r, g, b); - *(INT16*)outptr1 = (INT16)rgb; + *(INT16 *)outptr1 = (INT16)rgb; } } diff --git a/jdmrgext.c b/jdmrgext.c index 9d7d2af..b1c27df 100644 --- a/jdmrgext.c +++ b/jdmrgext.c @@ -21,23 +21,22 @@ INLINE LOCAL(void) -h2v1_merged_upsample_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) { - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample; register int y, cred, cgreen, cblue; int cb, cr; register JSAMPROW outptr; JSAMPROW inptr0, inptr1, inptr2; JDIMENSION col; /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - int * Crrtab = upsample->Cr_r_tab; - int * Cbbtab = upsample->Cb_b_tab; - JLONG * Crgtab = upsample->Cr_g_tab; - JLONG * Cbgtab = upsample->Cb_g_tab; + register JSAMPLE *range_limit = cinfo->sample_range_limit; + int *Crrtab = upsample->Cr_r_tab; + int *Cbbtab = upsample->Cb_b_tab; + JLONG *Crgtab = upsample->Cr_g_tab; + JLONG *Cbgtab = upsample->Cb_g_tab; SHIFT_TEMPS inptr0 = input_buf[0][in_row_group_ctr]; @@ -50,7 +49,7 @@ h2v1_merged_upsample_internal (j_decompress_ptr cinfo, cb = GETJSAMPLE(*inptr1++); cr = GETJSAMPLE(*inptr2++); cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 2 Y values and emit 2 pixels */ y = GETJSAMPLE(*inptr0++); @@ -75,7 +74,7 @@ h2v1_merged_upsample_internal (j_decompress_ptr cinfo, cb = GETJSAMPLE(*inptr1); cr = GETJSAMPLE(*inptr2); cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; y = GETJSAMPLE(*inptr0); outptr[RGB_RED] = range_limit[y + cred]; @@ -94,27 +93,26 @@ h2v1_merged_upsample_internal (j_decompress_ptr cinfo, INLINE LOCAL(void) -h2v2_merged_upsample_internal (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) { - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample; register int y, cred, cgreen, cblue; int cb, cr; register JSAMPROW outptr0, outptr1; JSAMPROW inptr00, inptr01, inptr1, inptr2; JDIMENSION col; /* copy these pointers into registers if possible */ - register JSAMPLE * range_limit = cinfo->sample_range_limit; - int * Crrtab = upsample->Cr_r_tab; - int * Cbbtab = upsample->Cb_b_tab; - JLONG * Crgtab = upsample->Cr_g_tab; - JLONG * Cbgtab = upsample->Cb_g_tab; + register JSAMPLE *range_limit = cinfo->sample_range_limit; + int *Crrtab = upsample->Cr_r_tab; + int *Cbbtab = upsample->Cb_b_tab; + JLONG *Crgtab = upsample->Cr_g_tab; + JLONG *Cbgtab = upsample->Cb_g_tab; SHIFT_TEMPS - inptr00 = input_buf[0][in_row_group_ctr*2]; - inptr01 = input_buf[0][in_row_group_ctr*2 + 1]; + inptr00 = input_buf[0][in_row_group_ctr * 2]; + inptr01 = input_buf[0][in_row_group_ctr * 2 + 1]; inptr1 = input_buf[1][in_row_group_ctr]; inptr2 = input_buf[2][in_row_group_ctr]; outptr0 = output_buf[0]; @@ -125,7 +123,7 @@ h2v2_merged_upsample_internal (j_decompress_ptr cinfo, cb = GETJSAMPLE(*inptr1++); cr = GETJSAMPLE(*inptr2++); cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 4 Y values and emit 4 pixels */ y = GETJSAMPLE(*inptr00++); @@ -166,7 +164,7 @@ h2v2_merged_upsample_internal (j_decompress_ptr cinfo, cb = GETJSAMPLE(*inptr1); cr = GETJSAMPLE(*inptr2); cred = Crrtab[cr]; - cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); + cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; y = GETJSAMPLE(*inptr00); outptr0[RGB_RED] = range_limit[y + cred]; diff --git a/jdphuff.c b/jdphuff.c index c927ffa..9e82636 100644 --- a/jdphuff.c +++ b/jdphuff.c @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1995-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2015-2016, D. R. Commander. + * Copyright (C) 2015-2016, 2018, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -15,12 +15,16 @@ * up to the start of the current MCU. To do this, we copy state variables * into local working storage, and update them back to the permanent * storage only upon successful completion of an MCU. + * + * NOTE: All referenced figures are from + * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994. */ #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" #include "jdhuff.h" /* Declarations shared with jdhuff.c */ +#include #ifdef D_PROGRESSIVE_SUPPORTED @@ -43,15 +47,15 @@ typedef struct { */ #ifndef NO_STRUCT_ASSIGN -#define ASSIGN_STATE(dest,src) ((dest) = (src)) +#define ASSIGN_STATE(dest, src) ((dest) = (src)) #else #if MAX_COMPS_IN_SCAN == 4 -#define ASSIGN_STATE(dest,src) \ - ((dest).EOBRUN = (src).EOBRUN, \ - (dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) +#define ASSIGN_STATE(dest, src) \ + ((dest).EOBRUN = (src).EOBRUN, \ + (dest).last_dc_val[0] = (src).last_dc_val[0], \ + (dest).last_dc_val[1] = (src).last_dc_val[1], \ + (dest).last_dc_val[2] = (src).last_dc_val[2], \ + (dest).last_dc_val[3] = (src).last_dc_val[3]) #endif #endif @@ -77,14 +81,14 @@ typedef struct { typedef phuff_entropy_decoder *phuff_entropy_ptr; /* Forward declarations */ -METHODDEF(boolean) decode_mcu_DC_first (j_decompress_ptr cinfo, +METHODDEF(boolean) decode_mcu_DC_first(j_decompress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(boolean) decode_mcu_AC_first(j_decompress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(boolean) decode_mcu_DC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data); -METHODDEF(boolean) decode_mcu_AC_first (j_decompress_ptr cinfo, +METHODDEF(boolean) decode_mcu_AC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data); -METHODDEF(boolean) decode_mcu_DC_refine (j_decompress_ptr cinfo, - JBLOCKROW *MCU_data); -METHODDEF(boolean) decode_mcu_AC_refine (j_decompress_ptr cinfo, - JBLOCKROW *MCU_data); /* @@ -92,9 +96,9 @@ METHODDEF(boolean) decode_mcu_AC_refine (j_decompress_ptr cinfo, */ METHODDEF(void) -start_pass_phuff_decoder (j_decompress_ptr cinfo) +start_pass_phuff_decoder(j_decompress_ptr cinfo) { - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; boolean is_DC_band, bad; int ci, coefi, tbl; d_derived_tbl **pdtbl; @@ -118,7 +122,7 @@ start_pass_phuff_decoder (j_decompress_ptr cinfo) } if (cinfo->Ah != 0) { /* Successive approximation refinement scan: must have Al = Ah-1. */ - if (cinfo->Al != cinfo->Ah-1) + if (cinfo->Al != cinfo->Ah - 1) bad = TRUE; } if (cinfo->Al > 13) /* need not check for < 0 */ @@ -138,7 +142,7 @@ start_pass_phuff_decoder (j_decompress_ptr cinfo) */ for (ci = 0; ci < cinfo->comps_in_scan; ci++) { int cindex = cinfo->cur_comp_info[ci]->component_index; - coef_bit_ptr = & cinfo->coef_bits[cindex][0]; + coef_bit_ptr = &cinfo->coef_bits[cindex][0]; if (!is_DC_band && coef_bit_ptr[0] < 0) /* AC without prior DC scan */ WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0); for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) { @@ -205,22 +209,26 @@ start_pass_phuff_decoder (j_decompress_ptr cinfo) #define AVOID_TABLES #ifdef AVOID_TABLES -#define NEG_1 ((unsigned)-1) -#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((NEG_1)<<(s)) + 1) : (x)) +#define NEG_1 ((unsigned)-1) +#define HUFF_EXTEND(x, s) \ + ((x) < (1 << ((s) - 1)) ? (x) + (((NEG_1) << (s)) + 1) : (x)) #else -#define HUFF_EXTEND(x,s) ((x) < extend_test[s] ? (x) + extend_offset[s] : (x)) +#define HUFF_EXTEND(x, s) \ + ((x) < extend_test[s] ? (x) + extend_offset[s] : (x)) -static const int extend_test[16] = /* entry n is 2**(n-1) */ - { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, - 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 }; +static const int extend_test[16] = { /* entry n is 2**(n-1) */ + 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, + 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 +}; -static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */ - { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, - ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, - ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, - ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 }; +static const int extend_offset[16] = { /* entry n is (-1 << n) + 1 */ + 0, ((-1) << 1) + 1, ((-1) << 2) + 1, ((-1) << 3) + 1, ((-1) << 4) + 1, + ((-1) << 5) + 1, ((-1) << 6) + 1, ((-1) << 7) + 1, ((-1) << 8) + 1, + ((-1) << 9) + 1, ((-1) << 10) + 1, ((-1) << 11) + 1, ((-1) << 12) + 1, + ((-1) << 13) + 1, ((-1) << 14) + 1, ((-1) << 15) + 1 +}; #endif /* AVOID_TABLES */ @@ -231,9 +239,9 @@ static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */ */ LOCAL(boolean) -process_restart (j_decompress_ptr cinfo) +process_restart(j_decompress_ptr cinfo) { - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; int ci; /* Throw away any unused bits remaining in bit buffer; */ @@ -242,7 +250,7 @@ process_restart (j_decompress_ptr cinfo) entropy->bitstate.bits_left = 0; /* Advance past the RSTn marker */ - if (! (*cinfo->marker->read_restart_marker) (cinfo)) + if (!(*cinfo->marker->read_restart_marker) (cinfo)) return FALSE; /* Re-initialize DC predictions to 0 */ @@ -289,9 +297,9 @@ process_restart (j_decompress_ptr cinfo) */ METHODDEF(boolean) -decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +decode_mcu_DC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) { - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; int Al = cinfo->Al; register int s, r; int blkn, ci; @@ -304,17 +312,17 @@ decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Process restart marker if needed; may have to suspend */ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) - if (! process_restart(cinfo)) + if (!process_restart(cinfo)) return FALSE; } /* If we've run out of data, just leave the MCU set to zeroes. * This way, we return uniform gray for the remainder of the segment. */ - if (! entropy->pub.insufficient_data) { + if (!entropy->pub.insufficient_data) { /* Load up working state */ - BITREAD_LOAD_STATE(cinfo,entropy->bitstate); + BITREAD_LOAD_STATE(cinfo, entropy->bitstate); ASSIGN_STATE(state, entropy->saved); /* Outer loop handles each block in the MCU */ @@ -336,14 +344,18 @@ decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) } /* Convert DC difference to actual value, update last_dc_val */ + if ((state.last_dc_val[ci] >= 0 && + s > INT_MAX - state.last_dc_val[ci]) || + (state.last_dc_val[ci] < 0 && s < INT_MIN - state.last_dc_val[ci])) + ERREXIT(cinfo, JERR_BAD_DCT_COEF); s += state.last_dc_val[ci]; state.last_dc_val[ci] = s; /* Scale and output the coefficient (assumes jpeg_natural_order[0]=0) */ - (*block)[0] = (JCOEF) LEFT_SHIFT(s, Al); + (*block)[0] = (JCOEF)LEFT_SHIFT(s, Al); } /* Completed MCU, so update state */ - BITREAD_SAVE_STATE(cinfo,entropy->bitstate); + BITREAD_SAVE_STATE(cinfo, entropy->bitstate); ASSIGN_STATE(entropy->saved, state); } @@ -360,9 +372,9 @@ decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) */ METHODDEF(boolean) -decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +decode_mcu_AC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) { - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; int Se = cinfo->Se; int Al = cinfo->Al; register int s, k, r; @@ -374,14 +386,14 @@ decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Process restart marker if needed; may have to suspend */ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) - if (! process_restart(cinfo)) + if (!process_restart(cinfo)) return FALSE; } /* If we've run out of data, just leave the MCU set to zeroes. * This way, we return uniform gray for the remainder of the segment. */ - if (! entropy->pub.insufficient_data) { + if (!entropy->pub.insufficient_data) { /* Load up working state. * We can avoid loading/saving bitread state if in an EOB run. @@ -393,7 +405,7 @@ decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) if (EOBRUN > 0) /* if it's a band of zeroes... */ EOBRUN--; /* ...process it now (we do nothing) */ else { - BITREAD_LOAD_STATE(cinfo,entropy->bitstate); + BITREAD_LOAD_STATE(cinfo, entropy->bitstate); block = MCU_data[0]; tbl = entropy->ac_derived_tbl; @@ -407,7 +419,7 @@ decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) r = GET_BITS(s); s = HUFF_EXTEND(r, s); /* Scale and output coefficient in natural (dezigzagged) order */ - (*block)[jpeg_natural_order[k]] = (JCOEF) LEFT_SHIFT(s, Al); + (*block)[jpeg_natural_order[k]] = (JCOEF)LEFT_SHIFT(s, Al); } else { if (r == 15) { /* ZRL */ k += 15; /* skip 15 zeroes in band */ @@ -424,7 +436,7 @@ decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) } } - BITREAD_SAVE_STATE(cinfo,entropy->bitstate); + BITREAD_SAVE_STATE(cinfo, entropy->bitstate); } /* Completed MCU, so update state */ @@ -445,9 +457,9 @@ decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) */ METHODDEF(boolean) -decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +decode_mcu_DC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) { - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; int p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ int blkn; JBLOCKROW block; @@ -456,7 +468,7 @@ decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Process restart marker if needed; may have to suspend */ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) - if (! process_restart(cinfo)) + if (!process_restart(cinfo)) return FALSE; } @@ -465,7 +477,7 @@ decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) */ /* Load up working state */ - BITREAD_LOAD_STATE(cinfo,entropy->bitstate); + BITREAD_LOAD_STATE(cinfo, entropy->bitstate); /* Outer loop handles each block in the MCU */ @@ -480,7 +492,7 @@ decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) } /* Completed MCU, so update state */ - BITREAD_SAVE_STATE(cinfo,entropy->bitstate); + BITREAD_SAVE_STATE(cinfo, entropy->bitstate); /* Account for restart interval (no-op if not using restarts) */ entropy->restarts_to_go--; @@ -494,9 +506,9 @@ decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) */ METHODDEF(boolean) -decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +decode_mcu_AC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) { - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; int Se = cinfo->Se; int p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ int m1 = (NEG_1) << cinfo->Al; /* -1 in the bit position being coded */ @@ -512,16 +524,16 @@ decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Process restart marker if needed; may have to suspend */ if (cinfo->restart_interval) { if (entropy->restarts_to_go == 0) - if (! process_restart(cinfo)) + if (!process_restart(cinfo)) return FALSE; } /* If we've run out of data, don't modify the MCU. */ - if (! entropy->pub.insufficient_data) { + if (!entropy->pub.insufficient_data) { /* Load up working state */ - BITREAD_LOAD_STATE(cinfo,entropy->bitstate); + BITREAD_LOAD_STATE(cinfo, entropy->bitstate); EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */ /* There is always only one block per MCU */ @@ -589,7 +601,7 @@ decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) if (s) { int pos = jpeg_natural_order[k]; /* Output newly nonzero coefficient */ - (*block)[pos] = (JCOEF) s; + (*block)[pos] = (JCOEF)s; /* Remember its position in case we have to suspend */ newnz_pos[num_newnz++] = pos; } @@ -621,7 +633,7 @@ decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) } /* Completed MCU, so update state */ - BITREAD_SAVE_STATE(cinfo,entropy->bitstate); + BITREAD_SAVE_STATE(cinfo, entropy->bitstate); entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */ } @@ -644,16 +656,16 @@ undoit: */ GLOBAL(void) -jinit_phuff_decoder (j_decompress_ptr cinfo) +jinit_phuff_decoder(j_decompress_ptr cinfo) { phuff_entropy_ptr entropy; int *coef_bit_ptr; int ci, i; entropy = (phuff_entropy_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(phuff_entropy_decoder)); - cinfo->entropy = (struct jpeg_entropy_decoder *) entropy; + cinfo->entropy = (struct jpeg_entropy_decoder *)entropy; entropy->pub.start_pass = start_pass_phuff_decoder; /* Mark derived tables unallocated */ @@ -663,9 +675,10 @@ jinit_phuff_decoder (j_decompress_ptr cinfo) /* Create progression status table */ cinfo->coef_bits = (int (*)[DCTSIZE2]) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->num_components*DCTSIZE2*sizeof(int)); - coef_bit_ptr = & cinfo->coef_bits[0][0]; + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + cinfo->num_components * DCTSIZE2 * + sizeof(int)); + coef_bit_ptr = &cinfo->coef_bits[0][0]; for (ci = 0; ci < cinfo->num_components; ci++) for (i = 0; i < DCTSIZE2; i++) *coef_bit_ptr++ = -1; diff --git a/jdpostct.c b/jdpostct.c index 601fc2a..6a2cf5c 100644 --- a/jdpostct.c +++ b/jdpostct.c @@ -46,22 +46,28 @@ typedef my_post_controller *my_post_ptr; /* Forward declarations */ -METHODDEF(void) post_process_1pass - (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, - JDIMENSION *in_row_group_ctr, JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail); +METHODDEF(void) post_process_1pass(j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail); #ifdef QUANT_2PASS_SUPPORTED -METHODDEF(void) post_process_prepass - (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, - JDIMENSION *in_row_group_ctr, JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail); -METHODDEF(void) post_process_2pass - (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, - JDIMENSION *in_row_group_ctr, JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail); +METHODDEF(void) post_process_prepass(j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail); +METHODDEF(void) post_process_2pass(j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail); #endif @@ -70,9 +76,9 @@ METHODDEF(void) post_process_2pass */ METHODDEF(void) -start_pass_dpost (j_decompress_ptr cinfo, J_BUF_MODE pass_mode) +start_pass_dpost(j_decompress_ptr cinfo, J_BUF_MODE pass_mode) { - my_post_ptr post = (my_post_ptr) cinfo->post; + my_post_ptr post = (my_post_ptr)cinfo->post; switch (pass_mode) { case JBUF_PASS_THRU: @@ -85,8 +91,8 @@ start_pass_dpost (j_decompress_ptr cinfo, J_BUF_MODE pass_mode) */ if (post->buffer == NULL) { post->buffer = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, post->whole_image, - (JDIMENSION) 0, post->strip_height, TRUE); + ((j_common_ptr)cinfo, post->whole_image, + (JDIMENSION)0, post->strip_height, TRUE); } } else { /* For single-pass processing without color quantization, @@ -123,13 +129,12 @@ start_pass_dpost (j_decompress_ptr cinfo, J_BUF_MODE pass_mode) */ METHODDEF(void) -post_process_1pass (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) +post_process_1pass(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail) { - my_post_ptr post = (my_post_ptr) cinfo->post; + my_post_ptr post = (my_post_ptr)cinfo->post; JDIMENSION num_rows, max_rows; /* Fill the buffer, but not more than what we can dump out in one go. */ @@ -138,12 +143,13 @@ post_process_1pass (j_decompress_ptr cinfo, if (max_rows > post->strip_height) max_rows = post->strip_height; num_rows = 0; - (*cinfo->upsample->upsample) (cinfo, - input_buf, in_row_group_ctr, in_row_groups_avail, - post->buffer, &num_rows, max_rows); + (*cinfo->upsample->upsample) (cinfo, input_buf, in_row_group_ctr, + in_row_groups_avail, post->buffer, &num_rows, + max_rows); /* Quantize and emit data. */ - (*cinfo->cquantize->color_quantize) (cinfo, - post->buffer, output_buf + *out_row_ctr, (int) num_rows); + (*cinfo->cquantize->color_quantize) (cinfo, post->buffer, + output_buf + *out_row_ctr, + (int)num_rows); *out_row_ctr += num_rows; } @@ -155,34 +161,33 @@ post_process_1pass (j_decompress_ptr cinfo, */ METHODDEF(void) -post_process_prepass (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) +post_process_prepass(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail) { - my_post_ptr post = (my_post_ptr) cinfo->post; + my_post_ptr post = (my_post_ptr)cinfo->post; JDIMENSION old_next_row, num_rows; /* Reposition virtual buffer if at start of strip. */ if (post->next_row == 0) { post->buffer = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, post->whole_image, + ((j_common_ptr)cinfo, post->whole_image, post->starting_row, post->strip_height, TRUE); } /* Upsample some data (up to a strip height's worth). */ old_next_row = post->next_row; - (*cinfo->upsample->upsample) (cinfo, - input_buf, in_row_group_ctr, in_row_groups_avail, - post->buffer, &post->next_row, post->strip_height); + (*cinfo->upsample->upsample) (cinfo, input_buf, in_row_group_ctr, + in_row_groups_avail, post->buffer, + &post->next_row, post->strip_height); /* Allow quantizer to scan new data. No data is emitted, */ /* but we advance out_row_ctr so outer loop can tell when we're done. */ if (post->next_row > old_next_row) { num_rows = post->next_row - old_next_row; (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + old_next_row, - (JSAMPARRAY) NULL, (int) num_rows); + (JSAMPARRAY)NULL, (int)num_rows); *out_row_ctr += num_rows; } @@ -199,19 +204,18 @@ post_process_prepass (j_decompress_ptr cinfo, */ METHODDEF(void) -post_process_2pass (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) +post_process_2pass(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, + JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf, + JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail) { - my_post_ptr post = (my_post_ptr) cinfo->post; + my_post_ptr post = (my_post_ptr)cinfo->post; JDIMENSION num_rows, max_rows; /* Reposition virtual buffer if at start of strip. */ if (post->next_row == 0) { post->buffer = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, post->whole_image, + ((j_common_ptr)cinfo, post->whole_image, post->starting_row, post->strip_height, FALSE); } @@ -226,9 +230,9 @@ post_process_2pass (j_decompress_ptr cinfo, num_rows = max_rows; /* Quantize and emit data. */ - (*cinfo->cquantize->color_quantize) (cinfo, - post->buffer + post->next_row, output_buf + *out_row_ctr, - (int) num_rows); + (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + post->next_row, + output_buf + *out_row_ctr, + (int)num_rows); *out_row_ctr += num_rows; /* Advance if we filled the strip. */ @@ -247,14 +251,14 @@ post_process_2pass (j_decompress_ptr cinfo, */ GLOBAL(void) -jinit_d_post_controller (j_decompress_ptr cinfo, boolean need_full_buffer) +jinit_d_post_controller(j_decompress_ptr cinfo, boolean need_full_buffer) { my_post_ptr post; post = (my_post_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_post_controller)); - cinfo->post = (struct jpeg_d_post_controller *) post; + cinfo->post = (struct jpeg_d_post_controller *)post; post->pub.start_pass = start_pass_dpost; post->whole_image = NULL; /* flag for no virtual arrays */ post->buffer = NULL; /* flag for no strip buffer */ @@ -265,16 +269,16 @@ jinit_d_post_controller (j_decompress_ptr cinfo, boolean need_full_buffer) * an efficient number of rows for upsampling to return. * (In the presence of output rescaling, we might want to be smarter?) */ - post->strip_height = (JDIMENSION) cinfo->max_v_samp_factor; + post->strip_height = (JDIMENSION)cinfo->max_v_samp_factor; if (need_full_buffer) { /* Two-pass color quantization: need full-image storage. */ /* We round up the number of rows to a multiple of the strip height. */ #ifdef QUANT_2PASS_SUPPORTED post->whole_image = (*cinfo->mem->request_virt_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, + ((j_common_ptr)cinfo, JPOOL_IMAGE, FALSE, cinfo->output_width * cinfo->out_color_components, - (JDIMENSION) jround_up((long) cinfo->output_height, - (long) post->strip_height), + (JDIMENSION)jround_up((long)cinfo->output_height, + (long)post->strip_height), post->strip_height); #else ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); @@ -282,7 +286,7 @@ jinit_d_post_controller (j_decompress_ptr cinfo, boolean need_full_buffer) } else { /* One-pass color quantization: just make a strip buffer. */ post->buffer = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, + ((j_common_ptr)cinfo, JPOOL_IMAGE, cinfo->output_width * cinfo->out_color_components, post->strip_height); } diff --git a/jdsample.c b/jdsample.c index b1378e1..52ee9af 100644 --- a/jdsample.c +++ b/jdsample.c @@ -36,9 +36,9 @@ */ METHODDEF(void) -start_pass_upsample (j_decompress_ptr cinfo) +start_pass_upsample(j_decompress_ptr cinfo) { - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample; /* Mark the conversion buffer empty */ upsample->next_row_out = cinfo->max_v_samp_factor; @@ -56,13 +56,12 @@ start_pass_upsample (j_decompress_ptr cinfo) */ METHODDEF(void) -sep_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, - JDIMENSION in_row_groups_avail, - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) +sep_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION *in_row_group_ctr, JDIMENSION in_row_groups_avail, + JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, + JDIMENSION out_rows_avail) { - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample; int ci; jpeg_component_info *compptr; JDIMENSION num_rows; @@ -84,7 +83,7 @@ sep_upsample (j_decompress_ptr cinfo, /* Color-convert and emit rows */ /* How many we have in the buffer: */ - num_rows = (JDIMENSION) (cinfo->max_v_samp_factor - upsample->next_row_out); + num_rows = (JDIMENSION)(cinfo->max_v_samp_factor - upsample->next_row_out); /* Not more than the distance to the end of the image. Need this test * in case the image height is not a multiple of max_v_samp_factor: */ @@ -96,9 +95,8 @@ sep_upsample (j_decompress_ptr cinfo, num_rows = out_rows_avail; (*cinfo->cconvert->color_convert) (cinfo, upsample->color_buf, - (JDIMENSION) upsample->next_row_out, - output_buf + *out_row_ctr, - (int) num_rows); + (JDIMENSION)upsample->next_row_out, + output_buf + *out_row_ctr, (int)num_rows); /* Adjust counts */ *out_row_ctr += num_rows; @@ -124,8 +122,8 @@ sep_upsample (j_decompress_ptr cinfo, */ METHODDEF(void) -fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +fullsize_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { *output_data_ptr = input_data; } @@ -137,8 +135,8 @@ fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ METHODDEF(void) -noop_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +noop_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { *output_data_ptr = NULL; /* safety check */ } @@ -156,10 +154,10 @@ noop_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ METHODDEF(void) -int_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; + my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample; JSAMPARRAY output_data = *output_data_ptr; register JSAMPROW inptr, outptr; register JSAMPLE invalue; @@ -185,8 +183,8 @@ int_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, } /* Generate any additional output rows by duplicating the first one */ if (v_expand > 1) { - jcopy_sample_rows(output_data, outrow, output_data, outrow+1, - v_expand-1, cinfo->output_width); + jcopy_sample_rows(output_data, outrow, output_data, outrow + 1, + v_expand - 1, cinfo->output_width); } inrow++; outrow += v_expand; @@ -200,8 +198,8 @@ int_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ METHODDEF(void) -h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; register JSAMPROW inptr, outptr; @@ -228,8 +226,8 @@ h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ METHODDEF(void) -h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; register JSAMPROW inptr, outptr; @@ -247,8 +245,8 @@ h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, *outptr++ = invalue; *outptr++ = invalue; } - jcopy_sample_rows(output_data, outrow, output_data, outrow+1, - 1, cinfo->output_width); + jcopy_sample_rows(output_data, outrow, output_data, outrow + 1, 1, + cinfo->output_width); inrow++; outrow += 2; } @@ -271,8 +269,8 @@ h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ METHODDEF(void) -h2v1_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; register JSAMPROW inptr, outptr; @@ -285,20 +283,20 @@ h2v1_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, outptr = output_data[inrow]; /* Special case for first column */ invalue = GETJSAMPLE(*inptr++); - *outptr++ = (JSAMPLE) invalue; - *outptr++ = (JSAMPLE) ((invalue * 3 + GETJSAMPLE(*inptr) + 2) >> 2); + *outptr++ = (JSAMPLE)invalue; + *outptr++ = (JSAMPLE)((invalue * 3 + GETJSAMPLE(*inptr) + 2) >> 2); for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) { /* General case: 3/4 * nearer pixel + 1/4 * further pixel */ invalue = GETJSAMPLE(*inptr++) * 3; - *outptr++ = (JSAMPLE) ((invalue + GETJSAMPLE(inptr[-2]) + 1) >> 2); - *outptr++ = (JSAMPLE) ((invalue + GETJSAMPLE(*inptr) + 2) >> 2); + *outptr++ = (JSAMPLE)((invalue + GETJSAMPLE(inptr[-2]) + 1) >> 2); + *outptr++ = (JSAMPLE)((invalue + GETJSAMPLE(*inptr) + 2) >> 2); } /* Special case for last column */ invalue = GETJSAMPLE(*inptr); - *outptr++ = (JSAMPLE) ((invalue * 3 + GETJSAMPLE(inptr[-1]) + 1) >> 2); - *outptr++ = (JSAMPLE) invalue; + *outptr++ = (JSAMPLE)((invalue * 3 + GETJSAMPLE(inptr[-1]) + 1) >> 2); + *outptr++ = (JSAMPLE)invalue; } } @@ -311,8 +309,8 @@ h2v1_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ METHODDEF(void) -h1v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; JSAMPROW inptr0, inptr1, outptr; @@ -330,14 +328,14 @@ h1v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* inptr0 points to nearest input row, inptr1 points to next nearest */ inptr0 = input_data[inrow]; if (v == 0) /* next nearest is row above */ - inptr1 = input_data[inrow-1]; + inptr1 = input_data[inrow - 1]; else /* next nearest is row below */ - inptr1 = input_data[inrow+1]; + inptr1 = input_data[inrow + 1]; outptr = output_data[outrow++]; - for(colctr = 0; colctr < compptr->downsampled_width; colctr++) { + for (colctr = 0; colctr < compptr->downsampled_width; colctr++) { thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); - *outptr++ = (JSAMPLE) ((thiscolsum + 1) >> 2); + *outptr++ = (JSAMPLE)((thiscolsum + 1) >> 2); } } inrow++; @@ -354,8 +352,8 @@ h1v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ METHODDEF(void) -h2v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; register JSAMPROW inptr0, inptr1, outptr; @@ -373,30 +371,30 @@ h2v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* inptr0 points to nearest input row, inptr1 points to next nearest */ inptr0 = input_data[inrow]; if (v == 0) /* next nearest is row above */ - inptr1 = input_data[inrow-1]; + inptr1 = input_data[inrow - 1]; else /* next nearest is row below */ - inptr1 = input_data[inrow+1]; + inptr1 = input_data[inrow + 1]; outptr = output_data[outrow++]; /* Special case for first column */ thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); - *outptr++ = (JSAMPLE) ((thiscolsum * 4 + 8) >> 4); - *outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4); - lastcolsum = thiscolsum; thiscolsum = nextcolsum; + *outptr++ = (JSAMPLE)((thiscolsum * 4 + 8) >> 4); + *outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4); + lastcolsum = thiscolsum; thiscolsum = nextcolsum; for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) { /* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */ /* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */ nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); - *outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4); - *outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4); - lastcolsum = thiscolsum; thiscolsum = nextcolsum; + *outptr++ = (JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4); + *outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4); + lastcolsum = thiscolsum; thiscolsum = nextcolsum; } /* Special case for last column */ - *outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4); - *outptr++ = (JSAMPLE) ((thiscolsum * 4 + 7) >> 4); + *outptr++ = (JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4); + *outptr++ = (JSAMPLE)((thiscolsum * 4 + 7) >> 4); } inrow++; } @@ -408,7 +406,7 @@ h2v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jinit_upsampler (j_decompress_ptr cinfo) +jinit_upsampler(j_decompress_ptr cinfo) { my_upsample_ptr upsample; int ci; @@ -418,14 +416,14 @@ jinit_upsampler (j_decompress_ptr cinfo) if (!cinfo->master->jinit_upsampler_no_alloc) { upsample = (my_upsample_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_upsampler)); - cinfo->upsample = (struct jpeg_upsampler *) upsample; + cinfo->upsample = (struct jpeg_upsampler *)upsample; upsample->pub.start_pass = start_pass_upsample; upsample->pub.upsample = sep_upsample; upsample->pub.need_context_rows = FALSE; /* until we find out differently */ } else - upsample = (my_upsample_ptr) cinfo->upsample; + upsample = (my_upsample_ptr)cinfo->upsample; if (cinfo->CCIR601_sampling) /* this isn't supported */ ERREXIT(cinfo, JERR_CCIR601_NOTIMPL); @@ -451,7 +449,7 @@ jinit_upsampler (j_decompress_ptr cinfo) v_out_group = cinfo->max_v_samp_factor; upsample->rowgroup_height[ci] = v_in_group; /* save for use later */ need_buffer = TRUE; - if (! compptr->component_needed) { + if (!compptr->component_needed) { /* Don't bother to upsample an uninteresting component. */ upsample->methods[ci] = noop_upsample; need_buffer = FALSE; @@ -459,8 +457,7 @@ jinit_upsampler (j_decompress_ptr cinfo) /* Fullsize components can be processed without any work. */ upsample->methods[ci] = fullsize_upsample; need_buffer = FALSE; - } else if (h_in_group * 2 == h_out_group && - v_in_group == v_out_group) { + } else if (h_in_group * 2 == h_out_group && v_in_group == v_out_group) { /* Special cases for 2h1v upsampling */ if (do_fancy && compptr->downsampled_width > 2) { if (jsimd_can_h2v1_fancy_upsample()) @@ -502,16 +499,16 @@ jinit_upsampler (j_decompress_ptr cinfo) else #endif upsample->methods[ci] = int_upsample; - upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group); - upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group); + upsample->h_expand[ci] = (UINT8)(h_out_group / h_in_group); + upsample->v_expand[ci] = (UINT8)(v_out_group / v_in_group); } else ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL); if (need_buffer && !cinfo->master->jinit_upsampler_no_alloc) { upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) jround_up((long) cinfo->output_width, - (long) cinfo->max_h_samp_factor), - (JDIMENSION) cinfo->max_v_samp_factor); + ((j_common_ptr)cinfo, JPOOL_IMAGE, + (JDIMENSION)jround_up((long)cinfo->output_width, + (long)cinfo->max_h_samp_factor), + (JDIMENSION)cinfo->max_v_samp_factor); } } } diff --git a/jdtrans.c b/jdtrans.c index cfc85dd..56713ef 100644 --- a/jdtrans.c +++ b/jdtrans.c @@ -19,7 +19,7 @@ /* Forward declarations */ -LOCAL(void) transdecode_master_selection (j_decompress_ptr cinfo); +LOCAL(void) transdecode_master_selection(j_decompress_ptr cinfo); /* @@ -45,7 +45,7 @@ LOCAL(void) transdecode_master_selection (j_decompress_ptr cinfo); */ GLOBAL(jvirt_barray_ptr *) -jpeg_read_coefficients (j_decompress_ptr cinfo) +jpeg_read_coefficients(j_decompress_ptr cinfo) { if (cinfo->global_state == DSTATE_READY) { /* First call: initialize active modules */ @@ -58,7 +58,7 @@ jpeg_read_coefficients (j_decompress_ptr cinfo) int retcode; /* Call progress monitor hook if present */ if (cinfo->progress != NULL) - (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo); + (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo); /* Absorb some more input */ retcode = (*cinfo->inputctl->consume_input) (cinfo); if (retcode == JPEG_SUSPENDED) @@ -70,7 +70,7 @@ jpeg_read_coefficients (j_decompress_ptr cinfo) (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) { if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) { /* startup underestimated number of scans; ratchet up one scan */ - cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows; + cinfo->progress->pass_limit += (long)cinfo->total_iMCU_rows; } } } @@ -97,7 +97,7 @@ jpeg_read_coefficients (j_decompress_ptr cinfo) */ LOCAL(void) -transdecode_master_selection (j_decompress_ptr cinfo) +transdecode_master_selection(j_decompress_ptr cinfo) { /* This is effectively a buffered-image operation. */ cinfo->buffered_image = TRUE; @@ -129,7 +129,7 @@ transdecode_master_selection (j_decompress_ptr cinfo) jinit_d_coef_controller(cinfo, TRUE); /* We can now tell the memory manager to allocate virtual arrays. */ - (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo); + (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo); /* Initialize input side of decompressor to consume first scan. */ (*cinfo->inputctl->start_input_pass) (cinfo); @@ -148,7 +148,7 @@ transdecode_master_selection (j_decompress_ptr cinfo) nscans = 1; } cinfo->progress->pass_counter = 0L; - cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans; + cinfo->progress->pass_limit = (long)cinfo->total_iMCU_rows * nscans; cinfo->progress->completed_passes = 0; cinfo->progress->total_passes = 1; } diff --git a/jerror.c b/jerror.c index c31acd9..936c4f5 100644 --- a/jerror.c +++ b/jerror.c @@ -44,7 +44,7 @@ * want to refer to it directly. */ -#define JMESSAGE(code,string) string , +#define JMESSAGE(code, string) string, const char * const jpeg_std_message_table[] = { #include "jerror.h" @@ -66,7 +66,7 @@ const char * const jpeg_std_message_table[] = { */ METHODDEF(void) -error_exit (j_common_ptr cinfo) +error_exit(j_common_ptr cinfo) { /* Always display the message */ (*cinfo->err->output_message) (cinfo); @@ -94,7 +94,7 @@ error_exit (j_common_ptr cinfo) */ METHODDEF(void) -output_message (j_common_ptr cinfo) +output_message(j_common_ptr cinfo) { char buffer[JMSG_LENGTH_MAX]; @@ -124,7 +124,7 @@ output_message (j_common_ptr cinfo) */ METHODDEF(void) -emit_message (j_common_ptr cinfo, int msg_level) +emit_message(j_common_ptr cinfo, int msg_level) { struct jpeg_error_mgr *err = cinfo->err; @@ -153,7 +153,7 @@ emit_message (j_common_ptr cinfo, int msg_level) */ METHODDEF(void) -format_message (j_common_ptr cinfo, char *buffer) +format_message(j_common_ptr cinfo, char *buffer) { struct jpeg_error_mgr *err = cinfo->err; int msg_code = err->msg_code; @@ -208,7 +208,7 @@ format_message (j_common_ptr cinfo, char *buffer) */ METHODDEF(void) -reset_error_mgr (j_common_ptr cinfo) +reset_error_mgr(j_common_ptr cinfo) { cinfo->err->num_warnings = 0; /* trace_level is not reset since it is an application-supplied parameter */ @@ -227,7 +227,7 @@ reset_error_mgr (j_common_ptr cinfo) */ GLOBAL(struct jpeg_error_mgr *) -jpeg_std_error (struct jpeg_error_mgr *err) +jpeg_std_error(struct jpeg_error_mgr *err) { err->error_exit = error_exit; err->emit_message = emit_message; @@ -241,7 +241,7 @@ jpeg_std_error (struct jpeg_error_mgr *err) /* Initialize message table pointers */ err->jpeg_message_table = jpeg_std_message_table; - err->last_jpeg_message = (int) JMSG_LASTMSGCODE - 1; + err->last_jpeg_message = (int)JMSG_LASTMSGCODE - 1; err->addon_message_table = NULL; err->first_addon_message = 0; /* for safety */ diff --git a/jerror.h b/jerror.h index 11a07cb..933a369 100644 --- a/jerror.h +++ b/jerror.h @@ -5,7 +5,7 @@ * Copyright (C) 1994-1997, Thomas G. Lane. * Modified 1997-2009 by Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2014, D. R. Commander. + * Copyright (C) 2014, 2017, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -28,7 +28,7 @@ #define JMAKE_ENUM_LIST #else /* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */ -#define JMESSAGE(code,string) +#define JMESSAGE(code, string) #endif /* JERROR_H */ #endif /* JMESSAGE */ @@ -36,7 +36,7 @@ typedef enum { -#define JMESSAGE(code,string) code , +#define JMESSAGE(code, string) code, #endif /* JMAKE_ENUM_LIST */ @@ -44,8 +44,7 @@ JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */ /* For maintenance convenience, list is alphabetical by message code name */ #if JPEG_LIB_VERSION < 70 -JMESSAGE(JERR_ARITH_NOTIMPL, - "Sorry, arithmetic coding is not implemented") +JMESSAGE(JERR_ARITH_NOTIMPL, "Sorry, arithmetic coding is not implemented") #endif JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix") JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix") @@ -154,8 +153,7 @@ JMESSAGE(JTRC_HUFFBITS, " %3d %3d %3d %3d %3d %3d %3d %3d") JMESSAGE(JTRC_JFIF, "JFIF APP0 marker: version %d.%02d, density %dx%d %d") JMESSAGE(JTRC_JFIF_BADTHUMBNAILSIZE, "Warning: thumbnail image size does not match data length %u") -JMESSAGE(JTRC_JFIF_EXTENSION, - "JFIF extension marker: type 0x%02x, length %u") +JMESSAGE(JTRC_JFIF_EXTENSION, "JFIF extension marker: type 0x%02x, length %u") JMESSAGE(JTRC_JFIF_THUMBNAIL, " with %d x %d thumbnail image") JMESSAGE(JTRC_MISC_MARKER, "Miscellaneous marker 0x%02x, length %u") JMESSAGE(JTRC_PARMLESS_MARKER, "Unexpected marker 0x%02x") @@ -208,6 +206,7 @@ JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined") JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code") #endif #endif +JMESSAGE(JWRN_BOGUS_ICC, "Corrupt JPEG data: bad ICC marker") #ifdef JMAKE_ENUM_LIST @@ -228,90 +227,90 @@ JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code") /* The first parameter is either type of cinfo pointer */ /* Fatal errors (print message and exit) */ -#define ERREXIT(cinfo,code) \ +#define ERREXIT(cinfo, code) \ ((cinfo)->err->msg_code = (code), \ - (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo))) -#define ERREXIT1(cinfo,code,p1) \ + (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo))) +#define ERREXIT1(cinfo, code, p1) \ ((cinfo)->err->msg_code = (code), \ (cinfo)->err->msg_parm.i[0] = (p1), \ - (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo))) -#define ERREXIT2(cinfo,code,p1,p2) \ + (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo))) +#define ERREXIT2(cinfo, code, p1, p2) \ ((cinfo)->err->msg_code = (code), \ (cinfo)->err->msg_parm.i[0] = (p1), \ (cinfo)->err->msg_parm.i[1] = (p2), \ - (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo))) -#define ERREXIT3(cinfo,code,p1,p2,p3) \ + (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo))) +#define ERREXIT3(cinfo, code, p1, p2, p3) \ ((cinfo)->err->msg_code = (code), \ (cinfo)->err->msg_parm.i[0] = (p1), \ (cinfo)->err->msg_parm.i[1] = (p2), \ (cinfo)->err->msg_parm.i[2] = (p3), \ - (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo))) -#define ERREXIT4(cinfo,code,p1,p2,p3,p4) \ + (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo))) +#define ERREXIT4(cinfo, code, p1, p2, p3, p4) \ ((cinfo)->err->msg_code = (code), \ (cinfo)->err->msg_parm.i[0] = (p1), \ (cinfo)->err->msg_parm.i[1] = (p2), \ (cinfo)->err->msg_parm.i[2] = (p3), \ (cinfo)->err->msg_parm.i[3] = (p4), \ - (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo))) -#define ERREXITS(cinfo,code,str) \ + (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo))) +#define ERREXITS(cinfo, code, str) \ ((cinfo)->err->msg_code = (code), \ strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \ - (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo))) + (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo))) #define MAKESTMT(stuff) do { stuff } while (0) /* Nonfatal errors (we can keep going, but the data is probably corrupt) */ -#define WARNMS(cinfo,code) \ +#define WARNMS(cinfo, code) \ ((cinfo)->err->msg_code = (code), \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1)) -#define WARNMS1(cinfo,code,p1) \ + (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), -1)) +#define WARNMS1(cinfo, code, p1) \ ((cinfo)->err->msg_code = (code), \ (cinfo)->err->msg_parm.i[0] = (p1), \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1)) -#define WARNMS2(cinfo,code,p1,p2) \ + (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), -1)) +#define WARNMS2(cinfo, code, p1, p2) \ ((cinfo)->err->msg_code = (code), \ (cinfo)->err->msg_parm.i[0] = (p1), \ (cinfo)->err->msg_parm.i[1] = (p2), \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1)) + (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), -1)) /* Informational/debugging messages */ -#define TRACEMS(cinfo,lvl,code) \ +#define TRACEMS(cinfo, lvl, code) \ ((cinfo)->err->msg_code = (code), \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl))) -#define TRACEMS1(cinfo,lvl,code,p1) \ + (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl))) +#define TRACEMS1(cinfo, lvl, code, p1) \ ((cinfo)->err->msg_code = (code), \ (cinfo)->err->msg_parm.i[0] = (p1), \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl))) -#define TRACEMS2(cinfo,lvl,code,p1,p2) \ + (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl))) +#define TRACEMS2(cinfo, lvl, code, p1, p2) \ ((cinfo)->err->msg_code = (code), \ (cinfo)->err->msg_parm.i[0] = (p1), \ (cinfo)->err->msg_parm.i[1] = (p2), \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl))) -#define TRACEMS3(cinfo,lvl,code,p1,p2,p3) \ - MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \ - _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \ + (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl))) +#define TRACEMS3(cinfo, lvl, code, p1, p2, p3) \ + MAKESTMT(int *_mp = (cinfo)->err->msg_parm.i; \ + _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \ (cinfo)->err->msg_code = (code); \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) -#define TRACEMS4(cinfo,lvl,code,p1,p2,p3,p4) \ - MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \ - _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ + (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl)); ) +#define TRACEMS4(cinfo, lvl, code, p1, p2, p3, p4) \ + MAKESTMT(int *_mp = (cinfo)->err->msg_parm.i; \ + _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ (cinfo)->err->msg_code = (code); \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) -#define TRACEMS5(cinfo,lvl,code,p1,p2,p3,p4,p5) \ - MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \ - _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ + (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl)); ) +#define TRACEMS5(cinfo, lvl, code, p1, p2, p3, p4, p5) \ + MAKESTMT(int *_mp = (cinfo)->err->msg_parm.i; \ + _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ _mp[4] = (p5); \ (cinfo)->err->msg_code = (code); \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) -#define TRACEMS8(cinfo,lvl,code,p1,p2,p3,p4,p5,p6,p7,p8) \ - MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \ - _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ - _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \ + (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl)); ) +#define TRACEMS8(cinfo, lvl, code, p1, p2, p3, p4, p5, p6, p7, p8) \ + MAKESTMT(int *_mp = (cinfo)->err->msg_parm.i; \ + _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \ + _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \ (cinfo)->err->msg_code = (code); \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); ) -#define TRACEMSS(cinfo,lvl,code,str) \ + (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl)); ) +#define TRACEMSS(cinfo, lvl, code, str) \ ((cinfo)->err->msg_code = (code), \ strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \ - (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl))) + (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl))) #endif /* JERROR_H */ diff --git a/jfdctflt.c b/jfdctflt.c index b3da3eb..ab6f6d0 100644 --- a/jfdctflt.c +++ b/jfdctflt.c @@ -57,7 +57,7 @@ */ GLOBAL(void) -jpeg_fdct_float (FAST_FLOAT *data) +jpeg_fdct_float(FAST_FLOAT *data) { FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; FAST_FLOAT tmp10, tmp11, tmp12, tmp13; @@ -68,7 +68,7 @@ jpeg_fdct_float (FAST_FLOAT *data) /* Pass 1: process rows. */ dataptr = data; - for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) { tmp0 = dataptr[0] + dataptr[7]; tmp7 = dataptr[0] - dataptr[7]; tmp1 = dataptr[1] + dataptr[6]; @@ -88,7 +88,7 @@ jpeg_fdct_float (FAST_FLOAT *data) dataptr[0] = tmp10 + tmp11; /* phase 3 */ dataptr[4] = tmp10 - tmp11; - z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */ + z1 = (tmp12 + tmp13) * ((FAST_FLOAT)0.707106781); /* c4 */ dataptr[2] = tmp13 + z1; /* phase 5 */ dataptr[6] = tmp13 - z1; @@ -99,10 +99,10 @@ jpeg_fdct_float (FAST_FLOAT *data) tmp12 = tmp6 + tmp7; /* The rotator is modified from fig 4-8 to avoid extra negations. */ - z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */ - z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */ - z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */ - z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */ + z5 = (tmp10 - tmp12) * ((FAST_FLOAT)0.382683433); /* c6 */ + z2 = ((FAST_FLOAT)0.541196100) * tmp10 + z5; /* c2-c6 */ + z4 = ((FAST_FLOAT)1.306562965) * tmp12 + z5; /* c2+c6 */ + z3 = tmp11 * ((FAST_FLOAT)0.707106781); /* c4 */ z11 = tmp7 + z3; /* phase 5 */ z13 = tmp7 - z3; @@ -118,15 +118,15 @@ jpeg_fdct_float (FAST_FLOAT *data) /* Pass 2: process columns. */ dataptr = data; - for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { - tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; - tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; - tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; - tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; - tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; - tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; - tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; - tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; + for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) { + tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7]; + tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7]; + tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6]; + tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6]; + tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5]; + tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5]; + tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4]; + tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4]; /* Even part */ @@ -135,12 +135,12 @@ jpeg_fdct_float (FAST_FLOAT *data) tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */ - dataptr[DCTSIZE*4] = tmp10 - tmp11; + dataptr[DCTSIZE * 0] = tmp10 + tmp11; /* phase 3 */ + dataptr[DCTSIZE * 4] = tmp10 - tmp11; - z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */ - dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */ - dataptr[DCTSIZE*6] = tmp13 - z1; + z1 = (tmp12 + tmp13) * ((FAST_FLOAT)0.707106781); /* c4 */ + dataptr[DCTSIZE * 2] = tmp13 + z1; /* phase 5 */ + dataptr[DCTSIZE * 6] = tmp13 - z1; /* Odd part */ @@ -149,18 +149,18 @@ jpeg_fdct_float (FAST_FLOAT *data) tmp12 = tmp6 + tmp7; /* The rotator is modified from fig 4-8 to avoid extra negations. */ - z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */ - z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */ - z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */ - z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */ + z5 = (tmp10 - tmp12) * ((FAST_FLOAT)0.382683433); /* c6 */ + z2 = ((FAST_FLOAT)0.541196100) * tmp10 + z5; /* c2-c6 */ + z4 = ((FAST_FLOAT)1.306562965) * tmp12 + z5; /* c2+c6 */ + z3 = tmp11 * ((FAST_FLOAT)0.707106781); /* c4 */ z11 = tmp7 + z3; /* phase 5 */ z13 = tmp7 - z3; - dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ - dataptr[DCTSIZE*3] = z13 - z2; - dataptr[DCTSIZE*1] = z11 + z4; - dataptr[DCTSIZE*7] = z11 - z4; + dataptr[DCTSIZE * 5] = z13 + z2; /* phase 6 */ + dataptr[DCTSIZE * 3] = z13 - z2; + dataptr[DCTSIZE * 1] = z11 + z4; + dataptr[DCTSIZE * 7] = z11 - z4; dataptr++; /* advance pointer to next column */ } diff --git a/jfdctfst.c b/jfdctfst.c index 5cd83a7..4c9ce0d 100644 --- a/jfdctfst.c +++ b/jfdctfst.c @@ -79,10 +79,10 @@ */ #if CONST_BITS == 8 -#define FIX_0_382683433 ((JLONG) 98) /* FIX(0.382683433) */ -#define FIX_0_541196100 ((JLONG) 139) /* FIX(0.541196100) */ -#define FIX_0_707106781 ((JLONG) 181) /* FIX(0.707106781) */ -#define FIX_1_306562965 ((JLONG) 334) /* FIX(1.306562965) */ +#define FIX_0_382683433 ((JLONG)98) /* FIX(0.382683433) */ +#define FIX_0_541196100 ((JLONG)139) /* FIX(0.541196100) */ +#define FIX_0_707106781 ((JLONG)181) /* FIX(0.707106781) */ +#define FIX_1_306562965 ((JLONG)334) /* FIX(1.306562965) */ #else #define FIX_0_382683433 FIX(0.382683433) #define FIX_0_541196100 FIX(0.541196100) @@ -98,7 +98,7 @@ #ifndef USE_ACCURATE_ROUNDING #undef DESCALE -#define DESCALE(x,n) RIGHT_SHIFT(x, n) +#define DESCALE(x, n) RIGHT_SHIFT(x, n) #endif @@ -106,7 +106,7 @@ * descale to yield a DCTELEM result. */ -#define MULTIPLY(var,const) ((DCTELEM) DESCALE((var) * (const), CONST_BITS)) +#define MULTIPLY(var, const) ((DCTELEM)DESCALE((var) * (const), CONST_BITS)) /* @@ -114,7 +114,7 @@ */ GLOBAL(void) -jpeg_fdct_ifast (DCTELEM *data) +jpeg_fdct_ifast(DCTELEM *data) { DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; DCTELEM tmp10, tmp11, tmp12, tmp13; @@ -126,7 +126,7 @@ jpeg_fdct_ifast (DCTELEM *data) /* Pass 1: process rows. */ dataptr = data; - for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) { tmp0 = dataptr[0] + dataptr[7]; tmp7 = dataptr[0] - dataptr[7]; tmp1 = dataptr[1] + dataptr[6]; @@ -176,15 +176,15 @@ jpeg_fdct_ifast (DCTELEM *data) /* Pass 2: process columns. */ dataptr = data; - for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { - tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; - tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; - tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; - tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; - tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; - tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; - tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; - tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; + for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) { + tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7]; + tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7]; + tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6]; + tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6]; + tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5]; + tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5]; + tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4]; + tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4]; /* Even part */ @@ -193,12 +193,12 @@ jpeg_fdct_ifast (DCTELEM *data) tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */ - dataptr[DCTSIZE*4] = tmp10 - tmp11; + dataptr[DCTSIZE * 0] = tmp10 + tmp11; /* phase 3 */ + dataptr[DCTSIZE * 4] = tmp10 - tmp11; z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ - dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */ - dataptr[DCTSIZE*6] = tmp13 - z1; + dataptr[DCTSIZE * 2] = tmp13 + z1; /* phase 5 */ + dataptr[DCTSIZE * 6] = tmp13 - z1; /* Odd part */ @@ -215,10 +215,10 @@ jpeg_fdct_ifast (DCTELEM *data) z11 = tmp7 + z3; /* phase 5 */ z13 = tmp7 - z3; - dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ - dataptr[DCTSIZE*3] = z13 - z2; - dataptr[DCTSIZE*1] = z11 + z4; - dataptr[DCTSIZE*7] = z11 - z4; + dataptr[DCTSIZE * 5] = z13 + z2; /* phase 6 */ + dataptr[DCTSIZE * 3] = z13 - z2; + dataptr[DCTSIZE * 1] = z11 + z4; + dataptr[DCTSIZE * 7] = z11 - z4; dataptr++; /* advance pointer to next column */ } diff --git a/jfdctint.c b/jfdctint.c index 169bb94..c0391a9 100644 --- a/jfdctint.c +++ b/jfdctint.c @@ -93,18 +93,18 @@ */ #if CONST_BITS == 13 -#define FIX_0_298631336 ((JLONG) 2446) /* FIX(0.298631336) */ -#define FIX_0_390180644 ((JLONG) 3196) /* FIX(0.390180644) */ -#define FIX_0_541196100 ((JLONG) 4433) /* FIX(0.541196100) */ -#define FIX_0_765366865 ((JLONG) 6270) /* FIX(0.765366865) */ -#define FIX_0_899976223 ((JLONG) 7373) /* FIX(0.899976223) */ -#define FIX_1_175875602 ((JLONG) 9633) /* FIX(1.175875602) */ -#define FIX_1_501321110 ((JLONG) 12299) /* FIX(1.501321110) */ -#define FIX_1_847759065 ((JLONG) 15137) /* FIX(1.847759065) */ -#define FIX_1_961570560 ((JLONG) 16069) /* FIX(1.961570560) */ -#define FIX_2_053119869 ((JLONG) 16819) /* FIX(2.053119869) */ -#define FIX_2_562915447 ((JLONG) 20995) /* FIX(2.562915447) */ -#define FIX_3_072711026 ((JLONG) 25172) /* FIX(3.072711026) */ +#define FIX_0_298631336 ((JLONG)2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((JLONG)3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((JLONG)4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((JLONG)6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((JLONG)7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((JLONG)9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((JLONG)12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((JLONG)15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((JLONG)16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((JLONG)16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((JLONG)20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((JLONG)25172) /* FIX(3.072711026) */ #else #define FIX_0_298631336 FIX(0.298631336) #define FIX_0_390180644 FIX(0.390180644) @@ -129,9 +129,9 @@ */ #if BITS_IN_JSAMPLE == 8 -#define MULTIPLY(var,const) MULTIPLY16C16(var,const) +#define MULTIPLY(var, const) MULTIPLY16C16(var, const) #else -#define MULTIPLY(var,const) ((var) * (const)) +#define MULTIPLY(var, const) ((var) * (const)) #endif @@ -140,7 +140,7 @@ */ GLOBAL(void) -jpeg_fdct_islow (DCTELEM *data) +jpeg_fdct_islow(DCTELEM *data) { JLONG tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; JLONG tmp10, tmp11, tmp12, tmp13; @@ -154,7 +154,7 @@ jpeg_fdct_islow (DCTELEM *data) /* furthermore, we scale the results by 2**PASS1_BITS. */ dataptr = data; - for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) { tmp0 = dataptr[0] + dataptr[7]; tmp7 = dataptr[0] - dataptr[7]; tmp1 = dataptr[1] + dataptr[6]; @@ -173,14 +173,14 @@ jpeg_fdct_islow (DCTELEM *data) tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - dataptr[0] = (DCTELEM) LEFT_SHIFT(tmp10 + tmp11, PASS1_BITS); - dataptr[4] = (DCTELEM) LEFT_SHIFT(tmp10 - tmp11, PASS1_BITS); + dataptr[0] = (DCTELEM)LEFT_SHIFT(tmp10 + tmp11, PASS1_BITS); + dataptr[4] = (DCTELEM)LEFT_SHIFT(tmp10 - tmp11, PASS1_BITS); z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); - dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), - CONST_BITS-PASS1_BITS); - dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), - CONST_BITS-PASS1_BITS); + dataptr[2] = (DCTELEM)DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), + CONST_BITS - PASS1_BITS); + dataptr[6] = (DCTELEM)DESCALE(z1 + MULTIPLY(tmp12, -FIX_1_847759065), + CONST_BITS - PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * cK represents cos(K*pi/16). @@ -197,18 +197,18 @@ jpeg_fdct_islow (DCTELEM *data) tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ - z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ - z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ - z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ - z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ + z1 = MULTIPLY(z1, -FIX_0_899976223); /* sqrt(2) * ( c7-c3) */ + z2 = MULTIPLY(z2, -FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ + z3 = MULTIPLY(z3, -FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ + z4 = MULTIPLY(z4, -FIX_0_390180644); /* sqrt(2) * ( c5-c3) */ z3 += z5; z4 += z5; - dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); - dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); - dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); - dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); + dataptr[7] = (DCTELEM)DESCALE(tmp4 + z1 + z3, CONST_BITS - PASS1_BITS); + dataptr[5] = (DCTELEM)DESCALE(tmp5 + z2 + z4, CONST_BITS - PASS1_BITS); + dataptr[3] = (DCTELEM)DESCALE(tmp6 + z2 + z3, CONST_BITS - PASS1_BITS); + dataptr[1] = (DCTELEM)DESCALE(tmp7 + z1 + z4, CONST_BITS - PASS1_BITS); dataptr += DCTSIZE; /* advance pointer to next row */ } @@ -219,15 +219,15 @@ jpeg_fdct_islow (DCTELEM *data) */ dataptr = data; - for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { - tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; - tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; - tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; - tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; - tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; - tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; - tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; - tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; + for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) { + tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7]; + tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7]; + tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6]; + tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6]; + tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5]; + tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5]; + tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4]; + tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4]; /* Even part per LL&M figure 1 --- note that published figure is faulty; * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". @@ -238,14 +238,16 @@ jpeg_fdct_islow (DCTELEM *data) tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); - dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); + dataptr[DCTSIZE * 0] = (DCTELEM)DESCALE(tmp10 + tmp11, PASS1_BITS); + dataptr[DCTSIZE * 4] = (DCTELEM)DESCALE(tmp10 - tmp11, PASS1_BITS); z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); - dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), - CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), - CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE * 2] = + (DCTELEM)DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), + CONST_BITS + PASS1_BITS); + dataptr[DCTSIZE * 6] = + (DCTELEM)DESCALE(z1 + MULTIPLY(tmp12, -FIX_1_847759065), + CONST_BITS + PASS1_BITS); /* Odd part per figure 8 --- note paper omits factor of sqrt(2). * cK represents cos(K*pi/16). @@ -262,22 +264,22 @@ jpeg_fdct_islow (DCTELEM *data) tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ - z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ - z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ - z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ - z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ + z1 = MULTIPLY(z1, -FIX_0_899976223); /* sqrt(2) * ( c7-c3) */ + z2 = MULTIPLY(z2, -FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ + z3 = MULTIPLY(z3, -FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ + z4 = MULTIPLY(z4, -FIX_0_390180644); /* sqrt(2) * ( c5-c3) */ z3 += z5; z4 += z5; - dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, - CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, - CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, - CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, - CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE * 7] = (DCTELEM)DESCALE(tmp4 + z1 + z3, + CONST_BITS + PASS1_BITS); + dataptr[DCTSIZE * 5] = (DCTELEM)DESCALE(tmp5 + z2 + z4, + CONST_BITS + PASS1_BITS); + dataptr[DCTSIZE * 3] = (DCTELEM)DESCALE(tmp6 + z2 + z3, + CONST_BITS + PASS1_BITS); + dataptr[DCTSIZE * 1] = (DCTELEM)DESCALE(tmp7 + z1 + z4, + CONST_BITS + PASS1_BITS); dataptr++; /* advance pointer to next column */ } diff --git a/jidctflt.c b/jidctflt.c index 68c521e..5aee74e 100644 --- a/jidctflt.c +++ b/jidctflt.c @@ -61,7 +61,7 @@ * entry; produce a float result. */ -#define DEQUANTIZE(coef,quantval) (((FAST_FLOAT) (coef)) * (quantval)) +#define DEQUANTIZE(coef, quantval) (((FAST_FLOAT)(coef)) * (quantval)) /* @@ -69,9 +69,9 @@ */ GLOBAL(void) -jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; FAST_FLOAT tmp10, tmp11, tmp12, tmp13; @@ -83,12 +83,12 @@ jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPLE *range_limit = cinfo->sample_range_limit; int ctr; FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */ - #define _0_125 ((FLOAT_MULT_TYPE)0.125) +#define _0_125 ((FLOAT_MULT_TYPE)0.125) /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; - quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table; + quantptr = (FLOAT_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = DCTSIZE; ctr > 0; ctr--) { /* Due to quantization, we will usually find that many of the input @@ -100,22 +100,22 @@ jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, * column DCT calculations can be simplified this way. */ - if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && - inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && - inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && - inptr[DCTSIZE*7] == 0) { + if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 2] == 0 && + inptr[DCTSIZE * 3] == 0 && inptr[DCTSIZE * 4] == 0 && + inptr[DCTSIZE * 5] == 0 && inptr[DCTSIZE * 6] == 0 && + inptr[DCTSIZE * 7] == 0) { /* AC terms all zero */ - FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], - quantptr[DCTSIZE*0] * _0_125); - - wsptr[DCTSIZE*0] = dcval; - wsptr[DCTSIZE*1] = dcval; - wsptr[DCTSIZE*2] = dcval; - wsptr[DCTSIZE*3] = dcval; - wsptr[DCTSIZE*4] = dcval; - wsptr[DCTSIZE*5] = dcval; - wsptr[DCTSIZE*6] = dcval; - wsptr[DCTSIZE*7] = dcval; + FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE * 0], + quantptr[DCTSIZE * 0] * _0_125); + + wsptr[DCTSIZE * 0] = dcval; + wsptr[DCTSIZE * 1] = dcval; + wsptr[DCTSIZE * 2] = dcval; + wsptr[DCTSIZE * 3] = dcval; + wsptr[DCTSIZE * 4] = dcval; + wsptr[DCTSIZE * 5] = dcval; + wsptr[DCTSIZE * 6] = dcval; + wsptr[DCTSIZE * 7] = dcval; inptr++; /* advance pointers to next column */ quantptr++; @@ -125,16 +125,16 @@ jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ - tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0] * _0_125); - tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2] * _0_125); - tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4] * _0_125); - tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6] * _0_125); + tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0] * _0_125); + tmp1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2] * _0_125); + tmp2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4] * _0_125); + tmp3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6] * _0_125); tmp10 = tmp0 + tmp2; /* phase 3 */ tmp11 = tmp0 - tmp2; tmp13 = tmp1 + tmp3; /* phases 5-3 */ - tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */ + tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT)1.414213562) - tmp13; /* 2*c4 */ tmp0 = tmp10 + tmp13; /* phase 2 */ tmp3 = tmp10 - tmp13; @@ -143,10 +143,10 @@ jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1] * _0_125); - tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3] * _0_125); - tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5] * _0_125); - tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7] * _0_125); + tmp4 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1] * _0_125); + tmp5 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3] * _0_125); + tmp6 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5] * _0_125); + tmp7 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7] * _0_125); z13 = tmp6 + tmp5; /* phase 6 */ z10 = tmp6 - tmp5; @@ -154,24 +154,24 @@ jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, z12 = tmp4 - tmp7; tmp7 = z11 + z13; /* phase 5 */ - tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */ + tmp11 = (z11 - z13) * ((FAST_FLOAT)1.414213562); /* 2*c4 */ - z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */ - tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */ - tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */ + z5 = (z10 + z12) * ((FAST_FLOAT)1.847759065); /* 2*c2 */ + tmp10 = z5 - z12 * ((FAST_FLOAT)1.082392200); /* 2*(c2-c6) */ + tmp12 = z5 - z10 * ((FAST_FLOAT)2.613125930); /* 2*(c2+c6) */ tmp6 = tmp12 - tmp7; /* phase 2 */ tmp5 = tmp11 - tmp6; tmp4 = tmp10 - tmp5; - wsptr[DCTSIZE*0] = tmp0 + tmp7; - wsptr[DCTSIZE*7] = tmp0 - tmp7; - wsptr[DCTSIZE*1] = tmp1 + tmp6; - wsptr[DCTSIZE*6] = tmp1 - tmp6; - wsptr[DCTSIZE*2] = tmp2 + tmp5; - wsptr[DCTSIZE*5] = tmp2 - tmp5; - wsptr[DCTSIZE*3] = tmp3 + tmp4; - wsptr[DCTSIZE*4] = tmp3 - tmp4; + wsptr[DCTSIZE * 0] = tmp0 + tmp7; + wsptr[DCTSIZE * 7] = tmp0 - tmp7; + wsptr[DCTSIZE * 1] = tmp1 + tmp6; + wsptr[DCTSIZE * 6] = tmp1 - tmp6; + wsptr[DCTSIZE * 2] = tmp2 + tmp5; + wsptr[DCTSIZE * 5] = tmp2 - tmp5; + wsptr[DCTSIZE * 3] = tmp3 + tmp4; + wsptr[DCTSIZE * 4] = tmp3 - tmp4; inptr++; /* advance pointers to next column */ quantptr++; @@ -192,12 +192,12 @@ jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ /* Apply signed->unsigned and prepare float->int conversion */ - z5 = wsptr[0] + ((FAST_FLOAT) CENTERJSAMPLE + (FAST_FLOAT) 0.5); + z5 = wsptr[0] + ((FAST_FLOAT)CENTERJSAMPLE + (FAST_FLOAT)0.5); tmp10 = z5 + wsptr[4]; tmp11 = z5 - wsptr[4]; tmp13 = wsptr[2] + wsptr[6]; - tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13; + tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT)1.414213562) - tmp13; tmp0 = tmp10 + tmp13; tmp3 = tmp10 - tmp13; @@ -212,11 +212,11 @@ jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, z12 = wsptr[1] - wsptr[7]; tmp7 = z11 + z13; - tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); + tmp11 = (z11 - z13) * ((FAST_FLOAT)1.414213562); - z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */ - tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */ - tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */ + z5 = (z10 + z12) * ((FAST_FLOAT)1.847759065); /* 2*c2 */ + tmp10 = z5 - z12 * ((FAST_FLOAT)1.082392200); /* 2*(c2-c6) */ + tmp12 = z5 - z10 * ((FAST_FLOAT)2.613125930); /* 2*(c2+c6) */ tmp6 = tmp12 - tmp7; tmp5 = tmp11 - tmp6; @@ -224,14 +224,14 @@ jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage: float->int conversion and range-limit */ - outptr[0] = range_limit[((int) (tmp0 + tmp7)) & RANGE_MASK]; - outptr[7] = range_limit[((int) (tmp0 - tmp7)) & RANGE_MASK]; - outptr[1] = range_limit[((int) (tmp1 + tmp6)) & RANGE_MASK]; - outptr[6] = range_limit[((int) (tmp1 - tmp6)) & RANGE_MASK]; - outptr[2] = range_limit[((int) (tmp2 + tmp5)) & RANGE_MASK]; - outptr[5] = range_limit[((int) (tmp2 - tmp5)) & RANGE_MASK]; - outptr[3] = range_limit[((int) (tmp3 + tmp4)) & RANGE_MASK]; - outptr[4] = range_limit[((int) (tmp3 - tmp4)) & RANGE_MASK]; + outptr[0] = range_limit[((int)(tmp0 + tmp7)) & RANGE_MASK]; + outptr[7] = range_limit[((int)(tmp0 - tmp7)) & RANGE_MASK]; + outptr[1] = range_limit[((int)(tmp1 + tmp6)) & RANGE_MASK]; + outptr[6] = range_limit[((int)(tmp1 - tmp6)) & RANGE_MASK]; + outptr[2] = range_limit[((int)(tmp2 + tmp5)) & RANGE_MASK]; + outptr[5] = range_limit[((int)(tmp2 - tmp5)) & RANGE_MASK]; + outptr[3] = range_limit[((int)(tmp3 + tmp4)) & RANGE_MASK]; + outptr[4] = range_limit[((int)(tmp3 - tmp4)) & RANGE_MASK]; wsptr += DCTSIZE; /* advance pointer to next row */ } diff --git a/jidctfst.c b/jidctfst.c index 10db739..89a20c9 100644 --- a/jidctfst.c +++ b/jidctfst.c @@ -92,10 +92,10 @@ */ #if CONST_BITS == 8 -#define FIX_1_082392200 ((JLONG) 277) /* FIX(1.082392200) */ -#define FIX_1_414213562 ((JLONG) 362) /* FIX(1.414213562) */ -#define FIX_1_847759065 ((JLONG) 473) /* FIX(1.847759065) */ -#define FIX_2_613125930 ((JLONG) 669) /* FIX(2.613125930) */ +#define FIX_1_082392200 ((JLONG)277) /* FIX(1.082392200) */ +#define FIX_1_414213562 ((JLONG)362) /* FIX(1.414213562) */ +#define FIX_1_847759065 ((JLONG)473) /* FIX(1.847759065) */ +#define FIX_2_613125930 ((JLONG)669) /* FIX(2.613125930) */ #else #define FIX_1_082392200 FIX(1.082392200) #define FIX_1_414213562 FIX(1.414213562) @@ -111,7 +111,7 @@ #ifndef USE_ACCURATE_ROUNDING #undef DESCALE -#define DESCALE(x,n) RIGHT_SHIFT(x, n) +#define DESCALE(x, n) RIGHT_SHIFT(x, n) #endif @@ -119,7 +119,7 @@ * descale to yield a DCTELEM result. */ -#define MULTIPLY(var,const) ((DCTELEM) DESCALE((var) * (const), CONST_BITS)) +#define MULTIPLY(var, const) ((DCTELEM)DESCALE((var) * (const), CONST_BITS)) /* Dequantize a coefficient by multiplying it by the multiplier-table @@ -129,10 +129,10 @@ */ #if BITS_IN_JSAMPLE == 8 -#define DEQUANTIZE(coef,quantval) (((IFAST_MULT_TYPE) (coef)) * (quantval)) +#define DEQUANTIZE(coef, quantval) (((IFAST_MULT_TYPE)(coef)) * (quantval)) #else -#define DEQUANTIZE(coef,quantval) \ - DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS) +#define DEQUANTIZE(coef, quantval) \ + DESCALE((coef) * (quantval), IFAST_SCALE_BITS - PASS1_BITS) #endif @@ -147,19 +147,19 @@ #else #define DCTELEMBITS 32 /* DCTELEM must be 32 bits */ #endif -#define IRIGHT_SHIFT(x,shft) \ - ((ishift_temp = (x)) < 0 ? \ - (ishift_temp >> (shft)) | ((~((DCTELEM) 0)) << (DCTELEMBITS-(shft))) : \ - (ishift_temp >> (shft))) +#define IRIGHT_SHIFT(x, shft) \ + ((ishift_temp = (x)) < 0 ? \ + (ishift_temp >> (shft)) | ((~((DCTELEM)0)) << (DCTELEMBITS - (shft))) : \ + (ishift_temp >> (shft))) #else #define ISHIFT_TEMPS -#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) +#define IRIGHT_SHIFT(x, shft) ((x) >> (shft)) #endif #ifdef USE_ACCURATE_ROUNDING -#define IDESCALE(x,n) ((int) IRIGHT_SHIFT((x) + (1 << ((n)-1)), n)) +#define IDESCALE(x, n) ((int)IRIGHT_SHIFT((x) + (1 << ((n) - 1)), n)) #else -#define IDESCALE(x,n) ((int) IRIGHT_SHIFT(x, n)) +#define IDESCALE(x, n) ((int)IRIGHT_SHIFT(x, n)) #endif @@ -168,9 +168,9 @@ */ GLOBAL(void) -jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; DCTELEM tmp10, tmp11, tmp12, tmp13; @@ -188,7 +188,7 @@ jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; - quantptr = (IFAST_MULT_TYPE *) compptr->dct_table; + quantptr = (IFAST_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = DCTSIZE; ctr > 0; ctr--) { /* Due to quantization, we will usually find that many of the input @@ -200,21 +200,21 @@ jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, * column DCT calculations can be simplified this way. */ - if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && - inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && - inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && - inptr[DCTSIZE*7] == 0) { + if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 2] == 0 && + inptr[DCTSIZE * 3] == 0 && inptr[DCTSIZE * 4] == 0 && + inptr[DCTSIZE * 5] == 0 && inptr[DCTSIZE * 6] == 0 && + inptr[DCTSIZE * 7] == 0) { /* AC terms all zero */ - int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + int dcval = (int)DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); - wsptr[DCTSIZE*0] = dcval; - wsptr[DCTSIZE*1] = dcval; - wsptr[DCTSIZE*2] = dcval; - wsptr[DCTSIZE*3] = dcval; - wsptr[DCTSIZE*4] = dcval; - wsptr[DCTSIZE*5] = dcval; - wsptr[DCTSIZE*6] = dcval; - wsptr[DCTSIZE*7] = dcval; + wsptr[DCTSIZE * 0] = dcval; + wsptr[DCTSIZE * 1] = dcval; + wsptr[DCTSIZE * 2] = dcval; + wsptr[DCTSIZE * 3] = dcval; + wsptr[DCTSIZE * 4] = dcval; + wsptr[DCTSIZE * 5] = dcval; + wsptr[DCTSIZE * 6] = dcval; + wsptr[DCTSIZE * 7] = dcval; inptr++; /* advance pointers to next column */ quantptr++; @@ -224,10 +224,10 @@ jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ - tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); - tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); + tmp1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); + tmp2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]); + tmp3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]); tmp10 = tmp0 + tmp2; /* phase 3 */ tmp11 = tmp0 - tmp2; @@ -242,10 +242,10 @@ jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + tmp4 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); + tmp5 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); + tmp6 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); + tmp7 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]); z13 = tmp6 + tmp5; /* phase 6 */ z10 = tmp6 - tmp5; @@ -257,20 +257,20 @@ jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */ tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */ - tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */ + tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5; /* -2*(c2+c6) */ tmp6 = tmp12 - tmp7; /* phase 2 */ tmp5 = tmp11 - tmp6; tmp4 = tmp10 + tmp5; - wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); - wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); - wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); - wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); - wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5); - wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); - wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); - wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); + wsptr[DCTSIZE * 0] = (int)(tmp0 + tmp7); + wsptr[DCTSIZE * 7] = (int)(tmp0 - tmp7); + wsptr[DCTSIZE * 1] = (int)(tmp1 + tmp6); + wsptr[DCTSIZE * 6] = (int)(tmp1 - tmp6); + wsptr[DCTSIZE * 2] = (int)(tmp2 + tmp5); + wsptr[DCTSIZE * 5] = (int)(tmp2 - tmp5); + wsptr[DCTSIZE * 4] = (int)(tmp3 + tmp4); + wsptr[DCTSIZE * 3] = (int)(tmp3 - tmp4); inptr++; /* advance pointers to next column */ quantptr++; @@ -296,8 +296,8 @@ jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { /* AC terms all zero */ - JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3) - & RANGE_MASK]; + JSAMPLE dcval = + range_limit[IDESCALE(wsptr[0], PASS1_BITS + 3) & RANGE_MASK]; outptr[0] = dcval; outptr[1] = dcval; @@ -315,12 +315,12 @@ jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ - tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); - tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); + tmp10 = ((DCTELEM)wsptr[0] + (DCTELEM)wsptr[4]); + tmp11 = ((DCTELEM)wsptr[0] - (DCTELEM)wsptr[4]); - tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); - tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562) - - tmp13; + tmp13 = ((DCTELEM)wsptr[2] + (DCTELEM)wsptr[6]); + tmp12 = + MULTIPLY((DCTELEM)wsptr[2] - (DCTELEM)wsptr[6], FIX_1_414213562) - tmp13; tmp0 = tmp10 + tmp13; tmp3 = tmp10 - tmp13; @@ -329,17 +329,17 @@ jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; - z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; - z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; - z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; + z13 = (DCTELEM)wsptr[5] + (DCTELEM)wsptr[3]; + z10 = (DCTELEM)wsptr[5] - (DCTELEM)wsptr[3]; + z11 = (DCTELEM)wsptr[1] + (DCTELEM)wsptr[7]; + z12 = (DCTELEM)wsptr[1] - (DCTELEM)wsptr[7]; tmp7 = z11 + z13; /* phase 5 */ tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */ tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */ - tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */ + tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5; /* -2*(c2+c6) */ tmp6 = tmp12 - tmp7; /* phase 2 */ tmp5 = tmp11 - tmp6; @@ -347,22 +347,22 @@ jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage: scale down by a factor of 8 and range-limit */ - outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) - & RANGE_MASK]; - outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) - & RANGE_MASK]; - outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) - & RANGE_MASK]; - outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) - & RANGE_MASK]; - outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) - & RANGE_MASK]; - outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) - & RANGE_MASK]; - outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) - & RANGE_MASK]; - outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) - & RANGE_MASK]; + outptr[0] = + range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS + 3) & RANGE_MASK]; + outptr[7] = + range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS + 3) & RANGE_MASK]; + outptr[1] = + range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS + 3) & RANGE_MASK]; + outptr[6] = + range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS + 3) & RANGE_MASK]; + outptr[2] = + range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS + 3) & RANGE_MASK]; + outptr[5] = + range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS + 3) & RANGE_MASK]; + outptr[4] = + range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS + 3) & RANGE_MASK]; + outptr[3] = + range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS + 3) & RANGE_MASK]; wsptr += DCTSIZE; /* advance pointer to next row */ } diff --git a/jidctint.c b/jidctint.c index 3ac6caf..5557342 100644 --- a/jidctint.c +++ b/jidctint.c @@ -115,18 +115,18 @@ */ #if CONST_BITS == 13 -#define FIX_0_298631336 ((JLONG) 2446) /* FIX(0.298631336) */ -#define FIX_0_390180644 ((JLONG) 3196) /* FIX(0.390180644) */ -#define FIX_0_541196100 ((JLONG) 4433) /* FIX(0.541196100) */ -#define FIX_0_765366865 ((JLONG) 6270) /* FIX(0.765366865) */ -#define FIX_0_899976223 ((JLONG) 7373) /* FIX(0.899976223) */ -#define FIX_1_175875602 ((JLONG) 9633) /* FIX(1.175875602) */ -#define FIX_1_501321110 ((JLONG) 12299) /* FIX(1.501321110) */ -#define FIX_1_847759065 ((JLONG) 15137) /* FIX(1.847759065) */ -#define FIX_1_961570560 ((JLONG) 16069) /* FIX(1.961570560) */ -#define FIX_2_053119869 ((JLONG) 16819) /* FIX(2.053119869) */ -#define FIX_2_562915447 ((JLONG) 20995) /* FIX(2.562915447) */ -#define FIX_3_072711026 ((JLONG) 25172) /* FIX(3.072711026) */ +#define FIX_0_298631336 ((JLONG)2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((JLONG)3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((JLONG)4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((JLONG)6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((JLONG)7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((JLONG)9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((JLONG)12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((JLONG)15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((JLONG)16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((JLONG)16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((JLONG)20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((JLONG)25172) /* FIX(3.072711026) */ #else #define FIX_0_298631336 FIX(0.298631336) #define FIX_0_390180644 FIX(0.390180644) @@ -151,9 +151,9 @@ */ #if BITS_IN_JSAMPLE == 8 -#define MULTIPLY(var,const) MULTIPLY16C16(var,const) +#define MULTIPLY(var, const) MULTIPLY16C16(var, const) #else -#define MULTIPLY(var,const) ((var) * (const)) +#define MULTIPLY(var, const) ((var) * (const)) #endif @@ -162,7 +162,7 @@ * are 16 bits or less, so either int or short multiply will work. */ -#define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval)) +#define DEQUANTIZE(coef, quantval) (((ISLOW_MULT_TYPE)(coef)) * (quantval)) /* @@ -170,9 +170,9 @@ */ GLOBAL(void) -jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { JLONG tmp0, tmp1, tmp2, tmp3; JLONG tmp10, tmp11, tmp12, tmp13; @@ -191,7 +191,7 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* furthermore, we scale the results by 2**PASS1_BITS. */ inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = DCTSIZE; ctr > 0; ctr--) { /* Due to quantization, we will usually find that many of the input @@ -203,22 +203,22 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, * column DCT calculations can be simplified this way. */ - if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && - inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && - inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && - inptr[DCTSIZE*7] == 0) { + if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 2] == 0 && + inptr[DCTSIZE * 3] == 0 && inptr[DCTSIZE * 4] == 0 && + inptr[DCTSIZE * 5] == 0 && inptr[DCTSIZE * 6] == 0 && + inptr[DCTSIZE * 7] == 0) { /* AC terms all zero */ - int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]), - PASS1_BITS); - - wsptr[DCTSIZE*0] = dcval; - wsptr[DCTSIZE*1] = dcval; - wsptr[DCTSIZE*2] = dcval; - wsptr[DCTSIZE*3] = dcval; - wsptr[DCTSIZE*4] = dcval; - wsptr[DCTSIZE*5] = dcval; - wsptr[DCTSIZE*6] = dcval; - wsptr[DCTSIZE*7] = dcval; + int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE * 0], + quantptr[DCTSIZE * 0]), PASS1_BITS); + + wsptr[DCTSIZE * 0] = dcval; + wsptr[DCTSIZE * 1] = dcval; + wsptr[DCTSIZE * 2] = dcval; + wsptr[DCTSIZE * 3] = dcval; + wsptr[DCTSIZE * 4] = dcval; + wsptr[DCTSIZE * 5] = dcval; + wsptr[DCTSIZE * 6] = dcval; + wsptr[DCTSIZE * 7] = dcval; inptr++; /* advance pointers to next column */ quantptr++; @@ -229,15 +229,15 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part: reverse the even part of the forward DCT. */ /* The rotator is sqrt(2)*c(-6). */ - z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]); z1 = MULTIPLY(z2 + z3, FIX_0_541196100); - tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); + tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065); tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); - z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]); tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); @@ -251,10 +251,10 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */ - tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); - tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + tmp0 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]); + tmp1 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); + tmp2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); + tmp3 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; @@ -266,10 +266,10 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ - z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ - z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ - z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ - z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ + z1 = MULTIPLY(z1, -FIX_0_899976223); /* sqrt(2) * ( c7-c3) */ + z2 = MULTIPLY(z2, -FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ + z3 = MULTIPLY(z3, -FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ + z4 = MULTIPLY(z4, -FIX_0_390180644); /* sqrt(2) * ( c5-c3) */ z3 += z5; z4 += z5; @@ -281,14 +281,14 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ - wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); - wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); - wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); - wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); - wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); - wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); - wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); - wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[DCTSIZE * 0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS - PASS1_BITS); + wsptr[DCTSIZE * 7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS - PASS1_BITS); + wsptr[DCTSIZE * 1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS - PASS1_BITS); + wsptr[DCTSIZE * 6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS - PASS1_BITS); + wsptr[DCTSIZE * 2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS - PASS1_BITS); + wsptr[DCTSIZE * 5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS - PASS1_BITS); + wsptr[DCTSIZE * 3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS - PASS1_BITS); + wsptr[DCTSIZE * 4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS - PASS1_BITS); inptr++; /* advance pointers to next column */ quantptr++; @@ -314,8 +314,8 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { /* AC terms all zero */ - JSAMPLE dcval = range_limit[(int) DESCALE((JLONG) wsptr[0], PASS1_BITS+3) - & RANGE_MASK]; + JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0], + PASS1_BITS + 3) & RANGE_MASK]; outptr[0] = dcval; outptr[1] = dcval; @@ -334,15 +334,15 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part: reverse the even part of the forward DCT. */ /* The rotator is sqrt(2)*c(-6). */ - z2 = (JLONG) wsptr[2]; - z3 = (JLONG) wsptr[6]; + z2 = (JLONG)wsptr[2]; + z3 = (JLONG)wsptr[6]; z1 = MULTIPLY(z2 + z3, FIX_0_541196100); - tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); + tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065); tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); - tmp0 = LEFT_SHIFT((JLONG) wsptr[0] + (JLONG) wsptr[4], CONST_BITS); - tmp1 = LEFT_SHIFT((JLONG) wsptr[0] - (JLONG) wsptr[4], CONST_BITS); + tmp0 = LEFT_SHIFT((JLONG)wsptr[0] + (JLONG)wsptr[4], CONST_BITS); + tmp1 = LEFT_SHIFT((JLONG)wsptr[0] - (JLONG)wsptr[4], CONST_BITS); tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; @@ -353,10 +353,10 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */ - tmp0 = (JLONG) wsptr[7]; - tmp1 = (JLONG) wsptr[5]; - tmp2 = (JLONG) wsptr[3]; - tmp3 = (JLONG) wsptr[1]; + tmp0 = (JLONG)wsptr[7]; + tmp1 = (JLONG)wsptr[5]; + tmp2 = (JLONG)wsptr[3]; + tmp3 = (JLONG)wsptr[1]; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; @@ -368,10 +368,10 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ - z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ - z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ - z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ - z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ + z1 = MULTIPLY(z1, -FIX_0_899976223); /* sqrt(2) * ( c7-c3) */ + z2 = MULTIPLY(z2, -FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ + z3 = MULTIPLY(z3, -FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ + z4 = MULTIPLY(z4, -FIX_0_390180644); /* sqrt(2) * ( c5-c3) */ z3 += z5; z4 += z5; @@ -383,30 +383,30 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ - outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + outptr[0] = range_limit[(int)DESCALE(tmp10 + tmp3, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[7] = range_limit[(int)DESCALE(tmp10 - tmp3, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[1] = range_limit[(int)DESCALE(tmp11 + tmp2, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[6] = range_limit[(int)DESCALE(tmp11 - tmp2, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[2] = range_limit[(int)DESCALE(tmp12 + tmp1, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[5] = range_limit[(int)DESCALE(tmp12 - tmp1, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[3] = range_limit[(int)DESCALE(tmp13 + tmp0, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[4] = range_limit[(int)DESCALE(tmp13 - tmp0, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; wsptr += DCTSIZE; /* advance pointer to next row */ } @@ -424,9 +424,9 @@ jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_7x7(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13; JLONG z1, z2, z3; @@ -436,25 +436,25 @@ jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[7*7]; /* buffers data between passes */ + int workspace[7 * 7]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */ - tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp13 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); tmp13 = LEFT_SHIFT(tmp13, CONST_BITS); /* Add fudge factor here for final descale. */ - tmp13 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp13 += ONE << (CONST_BITS - PASS1_BITS - 1); - z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); - z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]); tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ @@ -468,15 +468,15 @@ jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ tmp0 = tmp1 - tmp2; tmp1 += tmp2; - tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ + tmp2 = MULTIPLY(z2 + z3, -FIX(1.378756276)); /* -c1 */ tmp1 += tmp2; z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ tmp0 += z2; @@ -484,13 +484,13 @@ jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); - wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); - wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); - wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); - wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); - wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); - wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS); + wsptr[7 * 0] = (int)RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS - PASS1_BITS); + wsptr[7 * 6] = (int)RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS - PASS1_BITS); + wsptr[7 * 1] = (int)RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS - PASS1_BITS); + wsptr[7 * 5] = (int)RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS - PASS1_BITS); + wsptr[7 * 2] = (int)RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS - PASS1_BITS); + wsptr[7 * 4] = (int)RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS - PASS1_BITS); + wsptr[7 * 3] = (int)RIGHT_SHIFT(tmp13, CONST_BITS - PASS1_BITS); } /* Pass 2: process 7 rows from work array, store into output array. */ @@ -502,12 +502,12 @@ jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ /* Add fudge factor here for final descale. */ - tmp13 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp13 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2)); tmp13 = LEFT_SHIFT(tmp13, CONST_BITS); - z1 = (JLONG) wsptr[2]; - z2 = (JLONG) wsptr[4]; - z3 = (JLONG) wsptr[6]; + z1 = (JLONG)wsptr[2]; + z2 = (JLONG)wsptr[4]; + z3 = (JLONG)wsptr[6]; tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ @@ -521,15 +521,15 @@ jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z1 = (JLONG) wsptr[1]; - z2 = (JLONG) wsptr[3]; - z3 = (JLONG) wsptr[5]; + z1 = (JLONG)wsptr[1]; + z2 = (JLONG)wsptr[3]; + z3 = (JLONG)wsptr[5]; tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ tmp0 = tmp1 - tmp2; tmp1 += tmp2; - tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ + tmp2 = MULTIPLY(z2 + z3, -FIX(1.378756276)); /* -c1 */ tmp1 += tmp2; z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ tmp0 += z2; @@ -537,27 +537,27 @@ jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp12 + tmp2, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp12 - tmp2, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp13, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; wsptr += 7; /* advance pointer to next row */ } @@ -573,9 +573,9 @@ jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; JLONG z1, z2, z3; @@ -585,35 +585,35 @@ jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[6*6]; /* buffers data between passes */ + int workspace[6 * 6]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */ - tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); /* Add fudge factor here for final descale. */ - tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); - tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + tmp0 += ONE << (CONST_BITS - PASS1_BITS - 1); + tmp2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]); tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ tmp1 = tmp0 + tmp10; - tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS); - tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS - PASS1_BITS); + tmp10 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ tmp10 = tmp1 + tmp0; tmp12 = tmp1 - tmp0; /* Odd part */ - z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ tmp0 = tmp1 + LEFT_SHIFT(z1 + z2, CONST_BITS); tmp2 = tmp1 + LEFT_SHIFT(z3 - z2, CONST_BITS); @@ -621,12 +621,12 @@ jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); - wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); - wsptr[6*1] = (int) (tmp11 + tmp1); - wsptr[6*4] = (int) (tmp11 - tmp1); - wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); - wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); + wsptr[6 * 0] = (int)RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS - PASS1_BITS); + wsptr[6 * 5] = (int)RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS - PASS1_BITS); + wsptr[6 * 1] = (int)(tmp11 + tmp1); + wsptr[6 * 4] = (int)(tmp11 - tmp1); + wsptr[6 * 2] = (int)RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS - PASS1_BITS); + wsptr[6 * 3] = (int)RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS - PASS1_BITS); } /* Pass 2: process 6 rows from work array, store into output array. */ @@ -638,22 +638,22 @@ jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ /* Add fudge factor here for final descale. */ - tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2)); tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); - tmp2 = (JLONG) wsptr[4]; + tmp2 = (JLONG)wsptr[4]; tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ tmp1 = tmp0 + tmp10; tmp11 = tmp0 - tmp10 - tmp10; - tmp10 = (JLONG) wsptr[2]; + tmp10 = (JLONG)wsptr[2]; tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ tmp10 = tmp1 + tmp0; tmp12 = tmp1 - tmp0; /* Odd part */ - z1 = (JLONG) wsptr[1]; - z2 = (JLONG) wsptr[3]; - z3 = (JLONG) wsptr[5]; + z1 = (JLONG)wsptr[1]; + z2 = (JLONG)wsptr[3]; + z3 = (JLONG)wsptr[5]; tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ tmp0 = tmp1 + LEFT_SHIFT(z1 + z2, CONST_BITS); tmp2 = tmp1 + LEFT_SHIFT(z3 - z2, CONST_BITS); @@ -661,24 +661,24 @@ jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp12 + tmp2, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp12 - tmp2, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; wsptr += 6; /* advance pointer to next row */ } @@ -694,9 +694,9 @@ jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_5x5(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { JLONG tmp0, tmp1, tmp10, tmp11, tmp12; JLONG z1, z2, z3; @@ -706,23 +706,23 @@ jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[5*5]; /* buffers data between passes */ + int workspace[5 * 5]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */ - tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp12 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); tmp12 = LEFT_SHIFT(tmp12, CONST_BITS); /* Add fudge factor here for final descale. */ - tmp12 += ONE << (CONST_BITS-PASS1_BITS-1); - tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + tmp12 += ONE << (CONST_BITS - PASS1_BITS - 1); + tmp0 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); + tmp1 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]); z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ z3 = tmp12 + z2; @@ -732,8 +732,8 @@ jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ @@ -741,11 +741,11 @@ jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); - wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); - wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); - wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); - wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS); + wsptr[5 * 0] = (int)RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS - PASS1_BITS); + wsptr[5 * 4] = (int)RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS - PASS1_BITS); + wsptr[5 * 1] = (int)RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS - PASS1_BITS); + wsptr[5 * 3] = (int)RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS - PASS1_BITS); + wsptr[5 * 2] = (int)RIGHT_SHIFT(tmp12, CONST_BITS - PASS1_BITS); } /* Pass 2: process 5 rows from work array, store into output array. */ @@ -757,10 +757,10 @@ jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ /* Add fudge factor here for final descale. */ - tmp12 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp12 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2)); tmp12 = LEFT_SHIFT(tmp12, CONST_BITS); - tmp0 = (JLONG) wsptr[2]; - tmp1 = (JLONG) wsptr[4]; + tmp0 = (JLONG)wsptr[2]; + tmp1 = (JLONG)wsptr[4]; z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ z3 = tmp12 + z2; @@ -770,8 +770,8 @@ jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z2 = (JLONG) wsptr[1]; - z3 = (JLONG) wsptr[3]; + z2 = (JLONG)wsptr[1]; + z3 = (JLONG)wsptr[3]; z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ @@ -779,21 +779,21 @@ jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp12, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; wsptr += 5; /* advance pointer to next row */ } @@ -809,9 +809,9 @@ jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_3x3(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { JLONG tmp0, tmp2, tmp10, tmp12; JCOEFPTR inptr; @@ -820,36 +820,36 @@ jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[3*3]; /* buffers data between passes */ + int workspace[3 * 3]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */ - tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); /* Add fudge factor here for final descale. */ - tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); - tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + tmp0 += ONE << (CONST_BITS - PASS1_BITS - 1); + tmp2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ tmp10 = tmp0 + tmp12; tmp2 = tmp0 - tmp12 - tmp12; /* Odd part */ - tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + tmp12 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ /* Final output stage */ - wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); - wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); - wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); + wsptr[3 * 0] = (int)RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS - PASS1_BITS); + wsptr[3 * 2] = (int)RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS - PASS1_BITS); + wsptr[3 * 1] = (int)RIGHT_SHIFT(tmp2, CONST_BITS - PASS1_BITS); } /* Pass 2: process 3 rows from work array, store into output array. */ @@ -861,29 +861,29 @@ jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ /* Add fudge factor here for final descale. */ - tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2)); tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); - tmp2 = (JLONG) wsptr[2]; + tmp2 = (JLONG)wsptr[2]; tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ tmp10 = tmp0 + tmp12; tmp2 = tmp0 - tmp12 - tmp12; /* Odd part */ - tmp12 = (JLONG) wsptr[1]; + tmp12 = (JLONG)wsptr[1]; tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ /* Final output stage */ - outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp2, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; wsptr += 3; /* advance pointer to next row */ } @@ -899,9 +899,9 @@ jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_9x9(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14; JLONG z1, z2, z3, z4; @@ -911,25 +911,25 @@ jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[8*9]; /* buffers data between passes */ + int workspace[8 * 9]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */ - tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); /* Add fudge factor here for final descale. */ - tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp0 += ONE << (CONST_BITS - PASS1_BITS - 1); - z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); - z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]); tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ tmp1 = tmp0 + tmp3; @@ -949,12 +949,12 @@ jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); + z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]); - z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ + z2 = MULTIPLY(z2, -FIX(1.224744871)); /* -c3 */ tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ @@ -966,15 +966,15 @@ jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); - wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); - wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); - wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); - wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); - wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); - wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS); - wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS); - wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS); + wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS - PASS1_BITS); + wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS - PASS1_BITS); + wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS - PASS1_BITS); + wsptr[8 * 7] = (int)RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS - PASS1_BITS); + wsptr[8 * 2] = (int)RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS - PASS1_BITS); + wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS - PASS1_BITS); + wsptr[8 * 3] = (int)RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS - PASS1_BITS); + wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS - PASS1_BITS); + wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp14, CONST_BITS - PASS1_BITS); } /* Pass 2: process 9 rows from work array, store into output array. */ @@ -986,12 +986,12 @@ jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ /* Add fudge factor here for final descale. */ - tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2)); tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); - z1 = (JLONG) wsptr[2]; - z2 = (JLONG) wsptr[4]; - z3 = (JLONG) wsptr[6]; + z1 = (JLONG)wsptr[2]; + z2 = (JLONG)wsptr[4]; + z3 = (JLONG)wsptr[6]; tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ tmp1 = tmp0 + tmp3; @@ -1011,12 +1011,12 @@ jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z1 = (JLONG) wsptr[1]; - z2 = (JLONG) wsptr[3]; - z3 = (JLONG) wsptr[5]; - z4 = (JLONG) wsptr[7]; + z1 = (JLONG)wsptr[1]; + z2 = (JLONG)wsptr[3]; + z3 = (JLONG)wsptr[5]; + z4 = (JLONG)wsptr[7]; - z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ + z2 = MULTIPLY(z2, -FIX(1.224744871)); /* -c3 */ tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ @@ -1028,33 +1028,33 @@ jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp12 + tmp2, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp12 - tmp2, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp13 + tmp3, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp13 - tmp3, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp14, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; wsptr += 8; /* advance pointer to next row */ } @@ -1070,9 +1070,9 @@ jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_10x10(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { JLONG tmp10, tmp11, tmp12, tmp13, tmp14; JLONG tmp20, tmp21, tmp22, tmp23, tmp24; @@ -1083,32 +1083,32 @@ jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[8*10]; /* buffers data between passes */ + int workspace[8 * 10]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */ - z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); z3 = LEFT_SHIFT(z3, CONST_BITS); /* Add fudge factor here for final descale. */ - z3 += ONE << (CONST_BITS-PASS1_BITS-1); - z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z3 += ONE << (CONST_BITS - PASS1_BITS - 1); + z4 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]); z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ tmp10 = z3 + z1; tmp11 = z3 - z2; tmp22 = RIGHT_SHIFT(z3 - LEFT_SHIFT(z1 - z2, 1), - CONST_BITS-PASS1_BITS); /* c0 = (c4-c8)*2 */ + CONST_BITS - PASS1_BITS); /* c0 = (c4-c8)*2 */ - z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]); z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ @@ -1121,10 +1121,10 @@ jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); + z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]); tmp11 = z2 + z4; tmp13 = z2 - z4; @@ -1148,16 +1148,16 @@ jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); - wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); - wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); - wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); - wsptr[8*2] = (int) (tmp22 + tmp12); - wsptr[8*7] = (int) (tmp22 - tmp12); - wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); - wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); - wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); - wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS); + wsptr[8 * 9] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS); + wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS); + wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS); + wsptr[8 * 2] = (int)(tmp22 + tmp12); + wsptr[8 * 7] = (int)(tmp22 - tmp12); + wsptr[8 * 3] = (int)RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS - PASS1_BITS); + wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS - PASS1_BITS); + wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS); + wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS); } /* Pass 2: process 10 rows from work array, store into output array. */ @@ -1169,9 +1169,9 @@ jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ /* Add fudge factor here for final descale. */ - z3 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + z3 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2)); z3 = LEFT_SHIFT(z3, CONST_BITS); - z4 = (JLONG) wsptr[4]; + z4 = (JLONG)wsptr[4]; z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ tmp10 = z3 + z1; @@ -1179,8 +1179,8 @@ jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info *compptr, tmp22 = z3 - LEFT_SHIFT(z1 - z2, 1); /* c0 = (c4-c8)*2 */ - z2 = (JLONG) wsptr[2]; - z3 = (JLONG) wsptr[6]; + z2 = (JLONG)wsptr[2]; + z3 = (JLONG)wsptr[6]; z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ @@ -1193,11 +1193,11 @@ jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z1 = (JLONG) wsptr[1]; - z2 = (JLONG) wsptr[3]; - z3 = (JLONG) wsptr[5]; + z1 = (JLONG)wsptr[1]; + z2 = (JLONG)wsptr[3]; + z3 = (JLONG)wsptr[5]; z3 = LEFT_SHIFT(z3, CONST_BITS); - z4 = (JLONG) wsptr[7]; + z4 = (JLONG)wsptr[7]; tmp11 = z2 + z4; tmp13 = z2 - z4; @@ -1220,36 +1220,36 @@ jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[9] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; wsptr += 8; /* advance pointer to next row */ } @@ -1265,9 +1265,9 @@ jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_11x11(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { JLONG tmp10, tmp11, tmp12, tmp13, tmp14; JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; @@ -1278,30 +1278,30 @@ jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[8*11]; /* buffers data between passes */ + int workspace[8 * 11]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */ - tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp10 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); tmp10 = LEFT_SHIFT(tmp10, CONST_BITS); /* Add fudge factor here for final descale. */ - tmp10 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp10 += ONE << (CONST_BITS - PASS1_BITS - 1); - z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); - z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]); tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ z4 = z1 + z3; - tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ + tmp24 = MULTIPLY(z4, -FIX(1.155664402)); /* -(c2-c10) */ z4 -= z2; tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ tmp21 = tmp20 + tmp23 + tmp25 - @@ -1316,10 +1316,10 @@ jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); + z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]); tmp11 = z1 + z2; tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ @@ -1331,26 +1331,26 @@ jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info *compptr, z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ - z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ + z1 = MULTIPLY(z2 + z4, -FIX(1.798248910)); /* -(c1+c9) */ tmp11 += z1; tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ - tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ + tmp14 += MULTIPLY(z2, -FIX(1.467221301)) + /* -(c5+c9) */ MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ /* Final output stage */ - wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); - wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); - wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); - wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); - wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); - wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); - wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); - wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); - wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); - wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); - wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS); + wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS); + wsptr[8 * 10] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS); + wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS); + wsptr[8 * 9] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS); + wsptr[8 * 2] = (int)RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS - PASS1_BITS); + wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS - PASS1_BITS); + wsptr[8 * 3] = (int)RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS - PASS1_BITS); + wsptr[8 * 7] = (int)RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS - PASS1_BITS); + wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS); + wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS); + wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp25, CONST_BITS - PASS1_BITS); } /* Pass 2: process 11 rows from work array, store into output array. */ @@ -1362,17 +1362,17 @@ jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ /* Add fudge factor here for final descale. */ - tmp10 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp10 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2)); tmp10 = LEFT_SHIFT(tmp10, CONST_BITS); - z1 = (JLONG) wsptr[2]; - z2 = (JLONG) wsptr[4]; - z3 = (JLONG) wsptr[6]; + z1 = (JLONG)wsptr[2]; + z2 = (JLONG)wsptr[4]; + z3 = (JLONG)wsptr[6]; tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ z4 = z1 + z3; - tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ + tmp24 = MULTIPLY(z4, -FIX(1.155664402)); /* -(c2-c10) */ z4 -= z2; tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ tmp21 = tmp20 + tmp23 + tmp25 - @@ -1387,10 +1387,10 @@ jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z1 = (JLONG) wsptr[1]; - z2 = (JLONG) wsptr[3]; - z3 = (JLONG) wsptr[5]; - z4 = (JLONG) wsptr[7]; + z1 = (JLONG)wsptr[1]; + z2 = (JLONG)wsptr[3]; + z3 = (JLONG)wsptr[5]; + z4 = (JLONG)wsptr[7]; tmp11 = z1 + z2; tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ @@ -1402,48 +1402,48 @@ jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info *compptr, z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ - z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ + z1 = MULTIPLY(z2 + z4, -FIX(1.798248910)); /* -(c1+c9) */ tmp11 += z1; tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ - tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ + tmp14 += MULTIPLY(z2, -FIX(1.467221301)) + /* -(c5+c9) */ MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ /* Final output stage */ - outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[9] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp25, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; wsptr += 8; /* advance pointer to next row */ } @@ -1459,9 +1459,9 @@ jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; @@ -1472,32 +1472,32 @@ jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[8*12]; /* buffers data between passes */ + int workspace[8 * 12]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */ - z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); z3 = LEFT_SHIFT(z3, CONST_BITS); /* Add fudge factor here for final descale. */ - z3 += ONE << (CONST_BITS-PASS1_BITS-1); + z3 += ONE << (CONST_BITS - PASS1_BITS - 1); - z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z4 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]); z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ tmp10 = z3 + z4; tmp11 = z3 - z4; - z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ z1 = LEFT_SHIFT(z1, CONST_BITS); - z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]); z2 = LEFT_SHIFT(z2, CONST_BITS); tmp12 = z1 - z2; @@ -1517,19 +1517,19 @@ jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); + z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]); tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ - tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ + tmp14 = MULTIPLY(z2, -FIX_0_541196100); /* -c9 */ tmp10 = z1 + z3; tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ - tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ + tmp13 = MULTIPLY(z3 + z4, -FIX(1.045510580)); /* -(c7+c11) */ tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ @@ -1543,18 +1543,18 @@ jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); - wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); - wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); - wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); - wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); - wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); - wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); - wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); - wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); - wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); - wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); - wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); + wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS); + wsptr[8 * 11] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS); + wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS); + wsptr[8 * 10] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS); + wsptr[8 * 2] = (int)RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS - PASS1_BITS); + wsptr[8 * 9] = (int)RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS - PASS1_BITS); + wsptr[8 * 3] = (int)RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS - PASS1_BITS); + wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS - PASS1_BITS); + wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS); + wsptr[8 * 7] = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS); + wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS - PASS1_BITS); + wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS - PASS1_BITS); } /* Pass 2: process 12 rows from work array, store into output array. */ @@ -1566,19 +1566,19 @@ jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ /* Add fudge factor here for final descale. */ - z3 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + z3 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2)); z3 = LEFT_SHIFT(z3, CONST_BITS); - z4 = (JLONG) wsptr[4]; + z4 = (JLONG)wsptr[4]; z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ tmp10 = z3 + z4; tmp11 = z3 - z4; - z1 = (JLONG) wsptr[2]; + z1 = (JLONG)wsptr[2]; z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ z1 = LEFT_SHIFT(z1, CONST_BITS); - z2 = (JLONG) wsptr[6]; + z2 = (JLONG)wsptr[6]; z2 = LEFT_SHIFT(z2, CONST_BITS); tmp12 = z1 - z2; @@ -1598,19 +1598,19 @@ jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z1 = (JLONG) wsptr[1]; - z2 = (JLONG) wsptr[3]; - z3 = (JLONG) wsptr[5]; - z4 = (JLONG) wsptr[7]; + z1 = (JLONG)wsptr[1]; + z2 = (JLONG)wsptr[3]; + z3 = (JLONG)wsptr[5]; + z4 = (JLONG)wsptr[7]; tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ - tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ + tmp14 = MULTIPLY(z2, -FIX_0_541196100); /* -c9 */ tmp10 = z1 + z3; tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ - tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ + tmp13 = MULTIPLY(z3 + z4, -FIX(1.045510580)); /* -(c7+c11) */ tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ @@ -1624,42 +1624,42 @@ jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[11] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[9] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; wsptr += 8; /* advance pointer to next row */ } @@ -1675,9 +1675,9 @@ jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_13x13(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; @@ -1688,25 +1688,25 @@ jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[8*13]; /* buffers data between passes */ + int workspace[8 * 13]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */ - z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); z1 = LEFT_SHIFT(z1, CONST_BITS); /* Add fudge factor here for final descale. */ - z1 += ONE << (CONST_BITS-PASS1_BITS-1); + z1 += ONE << (CONST_BITS - PASS1_BITS - 1); - z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); - z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]); + z4 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]); tmp10 = z3 + z4; tmp11 = z3 - z4; @@ -1721,22 +1721,22 @@ jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info *compptr, tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ - tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ + tmp25 = MULTIPLY(z2, -FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ - tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ - tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ + tmp23 = MULTIPLY(z2, -FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ + tmp24 = MULTIPLY(z2, -FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ /* Odd part */ - z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); + z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]); tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ @@ -1744,13 +1744,13 @@ jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info *compptr, tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ - tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ + tmp14 = MULTIPLY(z2 + z3, -FIX(0.338443458)); /* -c11 */ tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ - tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ + tmp14 = MULTIPLY(z2 + z4, -FIX(1.163874945)); /* -c5 */ tmp11 += tmp14; tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ - tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ + tmp14 = MULTIPLY(z3 + z4, -FIX(0.657217813)); /* -c9 */ tmp12 += tmp14; tmp13 += tmp14; tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ @@ -1763,19 +1763,19 @@ jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); - wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); - wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); - wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); - wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); - wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); - wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); - wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); - wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); - wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); - wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); - wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); - wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS); + wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS); + wsptr[8 * 12] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS); + wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS); + wsptr[8 * 11] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS); + wsptr[8 * 2] = (int)RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS - PASS1_BITS); + wsptr[8 * 10] = (int)RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS - PASS1_BITS); + wsptr[8 * 3] = (int)RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS - PASS1_BITS); + wsptr[8 * 9] = (int)RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS - PASS1_BITS); + wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS); + wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS); + wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS - PASS1_BITS); + wsptr[8 * 7] = (int)RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS - PASS1_BITS); + wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp26, CONST_BITS - PASS1_BITS); } /* Pass 2: process 13 rows from work array, store into output array. */ @@ -1787,12 +1787,12 @@ jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ /* Add fudge factor here for final descale. */ - z1 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + z1 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2)); z1 = LEFT_SHIFT(z1, CONST_BITS); - z2 = (JLONG) wsptr[2]; - z3 = (JLONG) wsptr[4]; - z4 = (JLONG) wsptr[6]; + z2 = (JLONG)wsptr[2]; + z3 = (JLONG)wsptr[4]; + z4 = (JLONG)wsptr[6]; tmp10 = z3 + z4; tmp11 = z3 - z4; @@ -1807,22 +1807,22 @@ jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info *compptr, tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ - tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ + tmp25 = MULTIPLY(z2, -FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ - tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ - tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ + tmp23 = MULTIPLY(z2, -FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ + tmp24 = MULTIPLY(z2, -FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ /* Odd part */ - z1 = (JLONG) wsptr[1]; - z2 = (JLONG) wsptr[3]; - z3 = (JLONG) wsptr[5]; - z4 = (JLONG) wsptr[7]; + z1 = (JLONG)wsptr[1]; + z2 = (JLONG)wsptr[3]; + z3 = (JLONG)wsptr[5]; + z4 = (JLONG)wsptr[7]; tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ @@ -1830,13 +1830,13 @@ jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info *compptr, tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ - tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ + tmp14 = MULTIPLY(z2 + z3, -FIX(0.338443458)); /* -c11 */ tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ - tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ + tmp14 = MULTIPLY(z2 + z4, -FIX(1.163874945)); /* -c5 */ tmp11 += tmp14; tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ - tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ + tmp14 = MULTIPLY(z3 + z4, -FIX(0.657217813)); /* -c9 */ tmp12 += tmp14; tmp13 += tmp14; tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ @@ -1849,45 +1849,45 @@ jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[12] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[11] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[9] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp26, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; wsptr += 8; /* advance pointer to next row */ } @@ -1903,9 +1903,9 @@ jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_14x14(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; @@ -1916,22 +1916,22 @@ jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[8*14]; /* buffers data between passes */ + int workspace[8 * 14]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */ - z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); z1 = LEFT_SHIFT(z1, CONST_BITS); /* Add fudge factor here for final descale. */ - z1 += ONE << (CONST_BITS-PASS1_BITS-1); - z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z1 += ONE << (CONST_BITS - PASS1_BITS - 1); + z4 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]); z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ @@ -1941,10 +1941,10 @@ jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info *compptr, tmp12 = z1 - z4; tmp23 = RIGHT_SHIFT(z1 - LEFT_SHIFT(z2 + z3 - z4, 1), - CONST_BITS-PASS1_BITS); /* c0 = (c4+c12-c8)*2 */ + CONST_BITS - PASS1_BITS); /* c0 = (c4+c12-c8)*2 */ - z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]); z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ @@ -1962,10 +1962,10 @@ jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); + z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]); tmp13 = LEFT_SHIFT(z4, CONST_BITS); tmp14 = z1 + z3; @@ -1978,7 +1978,7 @@ jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info *compptr, tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */ tmp16 += tmp15; z1 += z4; - z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */ + z4 = MULTIPLY(z2 + z3, -FIX(0.158341681)) - tmp13; /* -c13 */ tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ @@ -1989,20 +1989,20 @@ jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); - wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); - wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); - wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); - wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); - wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); - wsptr[8*3] = (int) (tmp23 + tmp13); - wsptr[8*10] = (int) (tmp23 - tmp13); - wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); - wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); - wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); - wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); - wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); - wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); + wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS); + wsptr[8 * 13] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS); + wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS); + wsptr[8 * 12] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS); + wsptr[8 * 2] = (int)RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS - PASS1_BITS); + wsptr[8 * 11] = (int)RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS - PASS1_BITS); + wsptr[8 * 3] = (int)(tmp23 + tmp13); + wsptr[8 * 10] = (int)(tmp23 - tmp13); + wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS); + wsptr[8 * 9] = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS); + wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS - PASS1_BITS); + wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS - PASS1_BITS); + wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS - PASS1_BITS); + wsptr[8 * 7] = (int)RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS - PASS1_BITS); } /* Pass 2: process 14 rows from work array, store into output array. */ @@ -2014,9 +2014,9 @@ jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ /* Add fudge factor here for final descale. */ - z1 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + z1 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2)); z1 = LEFT_SHIFT(z1, CONST_BITS); - z4 = (JLONG) wsptr[4]; + z4 = (JLONG)wsptr[4]; z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ @@ -2027,8 +2027,8 @@ jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info *compptr, tmp23 = z1 - LEFT_SHIFT(z2 + z3 - z4, 1); /* c0 = (c4+c12-c8)*2 */ - z1 = (JLONG) wsptr[2]; - z2 = (JLONG) wsptr[6]; + z1 = (JLONG)wsptr[2]; + z2 = (JLONG)wsptr[6]; z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ @@ -2046,10 +2046,10 @@ jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z1 = (JLONG) wsptr[1]; - z2 = (JLONG) wsptr[3]; - z3 = (JLONG) wsptr[5]; - z4 = (JLONG) wsptr[7]; + z1 = (JLONG)wsptr[1]; + z2 = (JLONG)wsptr[3]; + z3 = (JLONG)wsptr[5]; + z4 = (JLONG)wsptr[7]; z4 = LEFT_SHIFT(z4, CONST_BITS); tmp14 = z1 + z3; @@ -2061,7 +2061,7 @@ jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info *compptr, z1 -= z2; tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */ tmp16 += tmp15; - tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */ + tmp13 = MULTIPLY(z2 + z3, -FIX(0.158341681)) - z4; /* -c13 */ tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ @@ -2072,48 +2072,48 @@ jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[13] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[12] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[11] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[9] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp26 + tmp16, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp26 - tmp16, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; wsptr += 8; /* advance pointer to next row */ } @@ -2129,9 +2129,9 @@ jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_15x15(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; @@ -2142,25 +2142,25 @@ jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[8*15]; /* buffers data between passes */ + int workspace[8 * 15]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */ - z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); z1 = LEFT_SHIFT(z1, CONST_BITS); /* Add fudge factor here for final descale. */ - z1 += ONE << (CONST_BITS-PASS1_BITS-1); + z1 += ONE << (CONST_BITS - PASS1_BITS - 1); - z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); - z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]); + z4 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]); tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ @@ -2195,19 +2195,19 @@ jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); + z4 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ - z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]); tmp13 = z2 - z4; tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ - tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ - tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ + tmp13 = MULTIPLY(z2, -FIX(0.831253876)); /* -c9 */ + tmp15 = MULTIPLY(z2, -FIX(1.344997024)); /* -c3 */ z2 = z1 - z4; tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ @@ -2220,21 +2220,21 @@ jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); - wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); - wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); - wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); - wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); - wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); - wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); - wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); - wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); - wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); - wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); - wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); - wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); - wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); - wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS); + wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS); + wsptr[8 * 14] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS); + wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS); + wsptr[8 * 13] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS); + wsptr[8 * 2] = (int)RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS - PASS1_BITS); + wsptr[8 * 12] = (int)RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS - PASS1_BITS); + wsptr[8 * 3] = (int)RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS - PASS1_BITS); + wsptr[8 * 11] = (int)RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS - PASS1_BITS); + wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS); + wsptr[8 * 10] = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS); + wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS - PASS1_BITS); + wsptr[8 * 9] = (int)RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS - PASS1_BITS); + wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS - PASS1_BITS); + wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS - PASS1_BITS); + wsptr[8 * 7] = (int)RIGHT_SHIFT(tmp27, CONST_BITS - PASS1_BITS); } /* Pass 2: process 15 rows from work array, store into output array. */ @@ -2246,12 +2246,12 @@ jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ /* Add fudge factor here for final descale. */ - z1 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + z1 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2)); z1 = LEFT_SHIFT(z1, CONST_BITS); - z2 = (JLONG) wsptr[2]; - z3 = (JLONG) wsptr[4]; - z4 = (JLONG) wsptr[6]; + z2 = (JLONG)wsptr[2]; + z3 = (JLONG)wsptr[4]; + z4 = (JLONG)wsptr[6]; tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ @@ -2286,19 +2286,19 @@ jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z1 = (JLONG) wsptr[1]; - z2 = (JLONG) wsptr[3]; - z4 = (JLONG) wsptr[5]; + z1 = (JLONG)wsptr[1]; + z2 = (JLONG)wsptr[3]; + z4 = (JLONG)wsptr[5]; z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ - z4 = (JLONG) wsptr[7]; + z4 = (JLONG)wsptr[7]; tmp13 = z2 - z4; tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ - tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ - tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ + tmp13 = MULTIPLY(z2, -FIX(0.831253876)); /* -c9 */ + tmp15 = MULTIPLY(z2, -FIX(1.344997024)); /* -c3 */ z2 = z1 - z4; tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ @@ -2311,51 +2311,51 @@ jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[14] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[13] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[12] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[11] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[9] = range_limit[(int)RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp26 + tmp16, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp26 - tmp16, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp27, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; wsptr += 8; /* advance pointer to next row */ } @@ -2371,9 +2371,9 @@ jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_16x16(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; @@ -2384,23 +2384,23 @@ jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[8*16]; /* buffers data between passes */ + int workspace[8 * 16]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { /* Even part */ - tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); /* Add fudge factor here for final descale. */ - tmp0 += 1 << (CONST_BITS-PASS1_BITS-1); + tmp0 += 1 << (CONST_BITS - PASS1_BITS - 1); - z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]); tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ @@ -2409,8 +2409,8 @@ jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info *compptr, tmp12 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; - z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]); z3 = z1 - z2; z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ @@ -2431,10 +2431,10 @@ jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); + z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]); tmp11 = z1 + z3; @@ -2455,13 +2455,13 @@ jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info *compptr, tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ z2 += z4; - z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ + z1 = MULTIPLY(z2, -FIX(0.666655658)); /* -c11 */ tmp1 += z1; tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ - z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ + z2 = MULTIPLY(z2, -FIX(1.247225013)); /* -c5 */ tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ tmp12 += z2; - z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ + z2 = MULTIPLY(z3 + z4, -FIX(1.353318001)); /* -c3 */ tmp2 += z2; tmp3 += z2; z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ @@ -2470,22 +2470,22 @@ jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS); - wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS); - wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS); - wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS); - wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS); - wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS); - wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS); - wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS); - wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS); - wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS); - wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS); - wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS); - wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS); - wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); - wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); - wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS - PASS1_BITS); + wsptr[8 * 15] = (int)RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS - PASS1_BITS); + wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS - PASS1_BITS); + wsptr[8 * 14] = (int)RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS - PASS1_BITS); + wsptr[8 * 2] = (int)RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS - PASS1_BITS); + wsptr[8 * 13] = (int)RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS - PASS1_BITS); + wsptr[8 * 3] = (int)RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS - PASS1_BITS); + wsptr[8 * 12] = (int)RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS - PASS1_BITS); + wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS - PASS1_BITS); + wsptr[8 * 11] = (int)RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS - PASS1_BITS); + wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS - PASS1_BITS); + wsptr[8 * 10] = (int)RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS - PASS1_BITS); + wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS - PASS1_BITS); + wsptr[8 * 9] = (int)RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS - PASS1_BITS); + wsptr[8 * 7] = (int)RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS - PASS1_BITS); + wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS - PASS1_BITS); } /* Pass 2: process 16 rows from work array, store into output array. */ @@ -2497,10 +2497,10 @@ jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ /* Add fudge factor here for final descale. */ - tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2)); tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); - z1 = (JLONG) wsptr[4]; + z1 = (JLONG)wsptr[4]; tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ @@ -2509,8 +2509,8 @@ jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info *compptr, tmp12 = tmp0 + tmp2; tmp13 = tmp0 - tmp2; - z1 = (JLONG) wsptr[2]; - z2 = (JLONG) wsptr[6]; + z1 = (JLONG)wsptr[2]; + z2 = (JLONG)wsptr[6]; z3 = z1 - z2; z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ @@ -2531,10 +2531,10 @@ jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Odd part */ - z1 = (JLONG) wsptr[1]; - z2 = (JLONG) wsptr[3]; - z3 = (JLONG) wsptr[5]; - z4 = (JLONG) wsptr[7]; + z1 = (JLONG)wsptr[1]; + z2 = (JLONG)wsptr[3]; + z3 = (JLONG)wsptr[5]; + z4 = (JLONG)wsptr[7]; tmp11 = z1 + z3; @@ -2555,13 +2555,13 @@ jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info *compptr, tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ z2 += z4; - z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ + z1 = MULTIPLY(z2, -FIX(0.666655658)); /* -c11 */ tmp1 += z1; tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ - z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ + z2 = MULTIPLY(z2, -FIX(1.247225013)); /* -c5 */ tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ tmp12 += z2; - z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ + z2 = MULTIPLY(z3 + z4, -FIX(1.353318001)); /* -c3 */ tmp2 += z2; tmp3 += z2; z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ @@ -2570,54 +2570,54 @@ jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Final output stage */ - outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; + outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp0, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[15] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp0, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp1, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[14] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp1, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp2, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[13] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp2, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp3, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[12] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp3, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp10, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[11] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp10, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp25 + tmp11, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp25 - tmp11, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp26 + tmp12, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[9] = range_limit[(int)RIGHT_SHIFT(tmp26 - tmp12, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp27 + tmp13, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; + outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp27 - tmp13, + CONST_BITS + PASS1_BITS + 3) & + RANGE_MASK]; wsptr += 8; /* advance pointer to next row */ } diff --git a/jidctred.c b/jidctred.c index 7a81803..1ff352f 100644 --- a/jidctred.c +++ b/jidctred.c @@ -58,20 +58,20 @@ */ #if CONST_BITS == 13 -#define FIX_0_211164243 ((JLONG) 1730) /* FIX(0.211164243) */ -#define FIX_0_509795579 ((JLONG) 4176) /* FIX(0.509795579) */ -#define FIX_0_601344887 ((JLONG) 4926) /* FIX(0.601344887) */ -#define FIX_0_720959822 ((JLONG) 5906) /* FIX(0.720959822) */ -#define FIX_0_765366865 ((JLONG) 6270) /* FIX(0.765366865) */ -#define FIX_0_850430095 ((JLONG) 6967) /* FIX(0.850430095) */ -#define FIX_0_899976223 ((JLONG) 7373) /* FIX(0.899976223) */ -#define FIX_1_061594337 ((JLONG) 8697) /* FIX(1.061594337) */ -#define FIX_1_272758580 ((JLONG) 10426) /* FIX(1.272758580) */ -#define FIX_1_451774981 ((JLONG) 11893) /* FIX(1.451774981) */ -#define FIX_1_847759065 ((JLONG) 15137) /* FIX(1.847759065) */ -#define FIX_2_172734803 ((JLONG) 17799) /* FIX(2.172734803) */ -#define FIX_2_562915447 ((JLONG) 20995) /* FIX(2.562915447) */ -#define FIX_3_624509785 ((JLONG) 29692) /* FIX(3.624509785) */ +#define FIX_0_211164243 ((JLONG)1730) /* FIX(0.211164243) */ +#define FIX_0_509795579 ((JLONG)4176) /* FIX(0.509795579) */ +#define FIX_0_601344887 ((JLONG)4926) /* FIX(0.601344887) */ +#define FIX_0_720959822 ((JLONG)5906) /* FIX(0.720959822) */ +#define FIX_0_765366865 ((JLONG)6270) /* FIX(0.765366865) */ +#define FIX_0_850430095 ((JLONG)6967) /* FIX(0.850430095) */ +#define FIX_0_899976223 ((JLONG)7373) /* FIX(0.899976223) */ +#define FIX_1_061594337 ((JLONG)8697) /* FIX(1.061594337) */ +#define FIX_1_272758580 ((JLONG)10426) /* FIX(1.272758580) */ +#define FIX_1_451774981 ((JLONG)11893) /* FIX(1.451774981) */ +#define FIX_1_847759065 ((JLONG)15137) /* FIX(1.847759065) */ +#define FIX_2_172734803 ((JLONG)17799) /* FIX(2.172734803) */ +#define FIX_2_562915447 ((JLONG)20995) /* FIX(2.562915447) */ +#define FIX_3_624509785 ((JLONG)29692) /* FIX(3.624509785) */ #else #define FIX_0_211164243 FIX(0.211164243) #define FIX_0_509795579 FIX(0.509795579) @@ -98,9 +98,9 @@ */ #if BITS_IN_JSAMPLE == 8 -#define MULTIPLY(var,const) MULTIPLY16C16(var,const) +#define MULTIPLY(var, const) MULTIPLY16C16(var, const) #else -#define MULTIPLY(var,const) ((var) * (const)) +#define MULTIPLY(var, const) ((var) * (const)) #endif @@ -109,7 +109,7 @@ * are 16 bits or less, so either int or short multiply will work. */ -#define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval)) +#define DEQUANTIZE(coef, quantval) (((ISLOW_MULT_TYPE)(coef)) * (quantval)) /* @@ -118,9 +118,9 @@ */ GLOBAL(void) -jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { JLONG tmp0, tmp2, tmp10, tmp12; JLONG z1, z2, z3, z4; @@ -130,69 +130,73 @@ jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[DCTSIZE*4]; /* buffers data between passes */ + int workspace[DCTSIZE * 4]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) { /* Don't bother to process column 4, because second pass won't use it */ - if (ctr == DCTSIZE-4) + if (ctr == DCTSIZE - 4) continue; - if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && - inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*5] == 0 && - inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0) { + if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 2] == 0 && + inptr[DCTSIZE * 3] == 0 && inptr[DCTSIZE * 5] == 0 && + inptr[DCTSIZE * 6] == 0 && inptr[DCTSIZE * 7] == 0) { /* AC terms all zero; we need not examine term 4 for 4x4 output */ - int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]), - PASS1_BITS); + int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE * 0], + quantptr[DCTSIZE * 0]), PASS1_BITS); - wsptr[DCTSIZE*0] = dcval; - wsptr[DCTSIZE*1] = dcval; - wsptr[DCTSIZE*2] = dcval; - wsptr[DCTSIZE*3] = dcval; + wsptr[DCTSIZE * 0] = dcval; + wsptr[DCTSIZE * 1] = dcval; + wsptr[DCTSIZE * 2] = dcval; + wsptr[DCTSIZE * 3] = dcval; continue; } /* Even part */ - tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - tmp0 = LEFT_SHIFT(tmp0, CONST_BITS+1); + tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); + tmp0 = LEFT_SHIFT(tmp0, CONST_BITS + 1); - z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]); - tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, - FIX_0_765366865); + tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, -FIX_0_765366865); tmp10 = tmp0 + tmp2; tmp12 = tmp0 - tmp2; /* Odd part */ - z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); - z2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - z4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z1 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]); + z2 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); + z3 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); + z4 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); - tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */ - + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */ - + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */ - + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */ + tmp0 = MULTIPLY(z1, -FIX_0_211164243) + /* sqrt(2) * ( c3-c1) */ + MULTIPLY(z2, FIX_1_451774981) + /* sqrt(2) * ( c3+c7) */ + MULTIPLY(z3, -FIX_2_172734803) + /* sqrt(2) * (-c1-c5) */ + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * ( c5+c7) */ - tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */ - + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */ - + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */ - + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ + tmp2 = MULTIPLY(z1, -FIX_0_509795579) + /* sqrt(2) * (c7-c5) */ + MULTIPLY(z2, -FIX_0_601344887) + /* sqrt(2) * (c5-c1) */ + MULTIPLY(z3, FIX_0_899976223) + /* sqrt(2) * (c3-c7) */ + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ /* Final output stage */ - wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp2, CONST_BITS-PASS1_BITS+1); - wsptr[DCTSIZE*3] = (int) DESCALE(tmp10 - tmp2, CONST_BITS-PASS1_BITS+1); - wsptr[DCTSIZE*1] = (int) DESCALE(tmp12 + tmp0, CONST_BITS-PASS1_BITS+1); - wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 - tmp0, CONST_BITS-PASS1_BITS+1); + wsptr[DCTSIZE * 0] = + (int)DESCALE(tmp10 + tmp2, CONST_BITS - PASS1_BITS + 1); + wsptr[DCTSIZE * 3] = + (int)DESCALE(tmp10 - tmp2, CONST_BITS - PASS1_BITS + 1); + wsptr[DCTSIZE * 1] = + (int)DESCALE(tmp12 + tmp0, CONST_BITS - PASS1_BITS + 1); + wsptr[DCTSIZE * 2] = + (int)DESCALE(tmp12 - tmp0, CONST_BITS - PASS1_BITS + 1); } /* Pass 2: process 4 rows from work array, store into output array. */ @@ -206,8 +210,8 @@ jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { /* AC terms all zero */ - JSAMPLE dcval = range_limit[(int) DESCALE((JLONG) wsptr[0], PASS1_BITS+3) - & RANGE_MASK]; + JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0], + PASS1_BITS + 3) & RANGE_MASK]; outptr[0] = dcval; outptr[1] = dcval; @@ -221,45 +225,45 @@ jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ - tmp0 = LEFT_SHIFT((JLONG) wsptr[0], CONST_BITS+1); + tmp0 = LEFT_SHIFT((JLONG)wsptr[0], CONST_BITS + 1); - tmp2 = MULTIPLY((JLONG) wsptr[2], FIX_1_847759065) - + MULTIPLY((JLONG) wsptr[6], - FIX_0_765366865); + tmp2 = MULTIPLY((JLONG)wsptr[2], FIX_1_847759065) + + MULTIPLY((JLONG)wsptr[6], -FIX_0_765366865); tmp10 = tmp0 + tmp2; tmp12 = tmp0 - tmp2; /* Odd part */ - z1 = (JLONG) wsptr[7]; - z2 = (JLONG) wsptr[5]; - z3 = (JLONG) wsptr[3]; - z4 = (JLONG) wsptr[1]; + z1 = (JLONG)wsptr[7]; + z2 = (JLONG)wsptr[5]; + z3 = (JLONG)wsptr[3]; + z4 = (JLONG)wsptr[1]; - tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */ - + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */ - + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */ - + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */ + tmp0 = MULTIPLY(z1, -FIX_0_211164243) + /* sqrt(2) * ( c3-c1) */ + MULTIPLY(z2, FIX_1_451774981) + /* sqrt(2) * ( c3+c7) */ + MULTIPLY(z3, -FIX_2_172734803) + /* sqrt(2) * (-c1-c5) */ + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * ( c5+c7) */ - tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */ - + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */ - + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */ - + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ + tmp2 = MULTIPLY(z1, -FIX_0_509795579) + /* sqrt(2) * (c7-c5) */ + MULTIPLY(z2, -FIX_0_601344887) + /* sqrt(2) * (c5-c1) */ + MULTIPLY(z3, FIX_0_899976223) + /* sqrt(2) * (c3-c7) */ + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ /* Final output stage */ - outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2, - CONST_BITS+PASS1_BITS+3+1) - & RANGE_MASK]; - outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2, - CONST_BITS+PASS1_BITS+3+1) - & RANGE_MASK]; - outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0, - CONST_BITS+PASS1_BITS+3+1) - & RANGE_MASK]; - outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0, - CONST_BITS+PASS1_BITS+3+1) - & RANGE_MASK]; + outptr[0] = range_limit[(int)DESCALE(tmp10 + tmp2, + CONST_BITS + PASS1_BITS + 3 + 1) & + RANGE_MASK]; + outptr[3] = range_limit[(int)DESCALE(tmp10 - tmp2, + CONST_BITS + PASS1_BITS + 3 + 1) & + RANGE_MASK]; + outptr[1] = range_limit[(int)DESCALE(tmp12 + tmp0, + CONST_BITS + PASS1_BITS + 3 + 1) & + RANGE_MASK]; + outptr[2] = range_limit[(int)DESCALE(tmp12 - tmp0, + CONST_BITS + PASS1_BITS + 3 + 1) & + RANGE_MASK]; wsptr += DCTSIZE; /* advance pointer to next row */ } @@ -272,9 +276,9 @@ jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { JLONG tmp0, tmp10, z1; JCOEFPTR inptr; @@ -283,50 +287,52 @@ jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPROW outptr; JSAMPLE *range_limit = IDCT_range_limit(cinfo); int ctr; - int workspace[DCTSIZE*2]; /* buffers data between passes */ + int workspace[DCTSIZE * 2]; /* buffers data between passes */ SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; wsptr = workspace; for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) { /* Don't bother to process columns 2,4,6 */ - if (ctr == DCTSIZE-2 || ctr == DCTSIZE-4 || ctr == DCTSIZE-6) + if (ctr == DCTSIZE - 2 || ctr == DCTSIZE - 4 || ctr == DCTSIZE - 6) continue; - if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*3] == 0 && - inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*7] == 0) { + if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 3] == 0 && + inptr[DCTSIZE * 5] == 0 && inptr[DCTSIZE * 7] == 0) { /* AC terms all zero; we need not examine terms 2,4,6 for 2x2 output */ - int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]), - PASS1_BITS); + int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE * 0], + quantptr[DCTSIZE * 0]), PASS1_BITS); - wsptr[DCTSIZE*0] = dcval; - wsptr[DCTSIZE*1] = dcval; + wsptr[DCTSIZE * 0] = dcval; + wsptr[DCTSIZE * 1] = dcval; continue; } /* Even part */ - z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - tmp10 = LEFT_SHIFT(z1, CONST_BITS+2); + z1 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); + tmp10 = LEFT_SHIFT(z1, CONST_BITS + 2); /* Odd part */ - z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); - tmp0 = MULTIPLY(z1, - FIX_0_720959822); /* sqrt(2) * (c7-c5+c3-c1) */ - z1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - tmp0 += MULTIPLY(z1, FIX_0_850430095); /* sqrt(2) * (-c1+c3+c5+c7) */ - z1 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - tmp0 += MULTIPLY(z1, - FIX_1_272758580); /* sqrt(2) * (-c1+c3-c5-c7) */ - z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - tmp0 += MULTIPLY(z1, FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */ + z1 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]); + tmp0 = MULTIPLY(z1, -FIX_0_720959822); /* sqrt(2) * ( c7-c5+c3-c1) */ + z1 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]); + tmp0 += MULTIPLY(z1, FIX_0_850430095); /* sqrt(2) * (-c1+c3+c5+c7) */ + z1 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]); + tmp0 += MULTIPLY(z1, -FIX_1_272758580); /* sqrt(2) * (-c1+c3-c5-c7) */ + z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]); + tmp0 += MULTIPLY(z1, FIX_3_624509785); /* sqrt(2) * ( c1+c3+c5+c7) */ /* Final output stage */ - wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS+2); - wsptr[DCTSIZE*1] = (int) DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS+2); + wsptr[DCTSIZE * 0] = + (int)DESCALE(tmp10 + tmp0, CONST_BITS - PASS1_BITS + 2); + wsptr[DCTSIZE * 1] = + (int)DESCALE(tmp10 - tmp0, CONST_BITS - PASS1_BITS + 2); } /* Pass 2: process 2 rows from work array, store into output array. */ @@ -339,8 +345,8 @@ jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, #ifndef NO_ZERO_ROW_TEST if (wsptr[1] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[7] == 0) { /* AC terms all zero */ - JSAMPLE dcval = range_limit[(int) DESCALE((JLONG) wsptr[0], PASS1_BITS+3) - & RANGE_MASK]; + JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0], + PASS1_BITS + 3) & RANGE_MASK]; outptr[0] = dcval; outptr[1] = dcval; @@ -352,23 +358,23 @@ jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* Even part */ - tmp10 = LEFT_SHIFT((JLONG) wsptr[0], CONST_BITS+2); + tmp10 = LEFT_SHIFT((JLONG)wsptr[0], CONST_BITS + 2); /* Odd part */ - tmp0 = MULTIPLY((JLONG) wsptr[7], - FIX_0_720959822) /* sqrt(2) * (c7-c5+c3-c1) */ - + MULTIPLY((JLONG) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */ - + MULTIPLY((JLONG) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */ - + MULTIPLY((JLONG) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */ + tmp0 = MULTIPLY((JLONG)wsptr[7], -FIX_0_720959822) + /* sqrt(2) * ( c7-c5+c3-c1) */ + MULTIPLY((JLONG)wsptr[5], FIX_0_850430095) + /* sqrt(2) * (-c1+c3+c5+c7) */ + MULTIPLY((JLONG)wsptr[3], -FIX_1_272758580) + /* sqrt(2) * (-c1+c3-c5-c7) */ + MULTIPLY((JLONG)wsptr[1], FIX_3_624509785); /* sqrt(2) * ( c1+c3+c5+c7) */ /* Final output stage */ - outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0, - CONST_BITS+PASS1_BITS+3+2) - & RANGE_MASK]; - outptr[1] = range_limit[(int) DESCALE(tmp10 - tmp0, - CONST_BITS+PASS1_BITS+3+2) - & RANGE_MASK]; + outptr[0] = range_limit[(int)DESCALE(tmp10 + tmp0, + CONST_BITS + PASS1_BITS + 3 + 2) & + RANGE_MASK]; + outptr[1] = range_limit[(int)DESCALE(tmp10 - tmp0, + CONST_BITS + PASS1_BITS + 3 + 2) & + RANGE_MASK]; wsptr += DCTSIZE; /* advance pointer to next row */ } @@ -381,9 +387,9 @@ jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, */ GLOBAL(void) -jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +jpeg_idct_1x1(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { int dcval; ISLOW_MULT_TYPE *quantptr; @@ -393,9 +399,9 @@ jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info *compptr, /* We hardly need an inverse DCT routine for this: just take the * average pixel value, which is one-eighth of the DC coefficient. */ - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table; dcval = DEQUANTIZE(coef_block[0], quantptr[0]); - dcval = (int) DESCALE((JLONG) dcval, 3); + dcval = (int)DESCALE((JLONG)dcval, 3); output_buf[0][output_col] = range_limit[dcval & RANGE_MASK]; } diff --git a/jinclude.h b/jinclude.h index d461a1a..c1bcf7d 100644 --- a/jinclude.h +++ b/jinclude.h @@ -61,14 +61,18 @@ #ifdef NEED_BSD_STRINGS #include -#define MEMZERO(target,size) bzero((void *)(target), (size_t)(size)) -#define MEMCOPY(dest,src,size) bcopy((const void *)(src), (void *)(dest), (size_t)(size)) +#define MEMZERO(target, size) \ + bzero((void *)(target), (size_t)(size)) +#define MEMCOPY(dest, src, size) \ + bcopy((const void *)(src), (void *)(dest), (size_t)(size)) #else /* not BSD, assume ANSI/SysV string lib */ #include -#define MEMZERO(target,size) memset((void *)(target), 0, (size_t)(size)) -#define MEMCOPY(dest,src,size) memcpy((void *)(dest), (const void *)(src), (size_t)(size)) +#define MEMZERO(target, size) \ + memset((void *)(target), 0, (size_t)(size)) +#define MEMCOPY(dest, src, size) \ + memcpy((void *)(dest), (const void *)(src), (size_t)(size)) #endif @@ -78,7 +82,7 @@ * CAUTION: argument order is different from underlying functions! */ -#define JFREAD(file,buf,sizeofbuf) \ - ((size_t) fread((void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file))) -#define JFWRITE(file,buf,sizeofbuf) \ - ((size_t) fwrite((const void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file))) +#define JFREAD(file, buf, sizeofbuf) \ + ((size_t)fread((void *)(buf), (size_t)1, (size_t)(sizeofbuf), (file))) +#define JFWRITE(file, buf, sizeofbuf) \ + ((size_t)fwrite((const void *)(buf), (size_t)1, (size_t)(sizeofbuf), (file))) diff --git a/jmemmgr.c b/jmemmgr.c index 8dfb633..508ca74 100644 --- a/jmemmgr.c +++ b/jmemmgr.c @@ -32,20 +32,20 @@ #include "jinclude.h" #include "jpeglib.h" #include "jmemsys.h" /* import the system-dependent declarations */ -#ifndef _WIN32 +#if !defined(_MSC_VER) || _MSC_VER > 1600 #include #endif #include #ifndef NO_GETENV #ifndef HAVE_STDLIB_H /* should declare getenv() */ -extern char *getenv (const char *name); +extern char *getenv(const char *name); #endif #endif LOCAL(size_t) -round_up_pow2 (size_t a, size_t b) +round_up_pow2(size_t a, size_t b) /* a rounded up to the next multiple of b, i.e. ceil(a/b)*b */ /* Assumes a >= 0, b > 0, and b is a power of 2 */ { @@ -89,7 +89,9 @@ round_up_pow2 (size_t a, size_t b) #ifndef WITH_SIMD #define ALIGN_SIZE sizeof(double) #else -#define ALIGN_SIZE 16 /* Most SIMD implementations require this */ +#define ALIGN_SIZE 32 /* Most of the SIMD instructions we support require + 16-byte (128-bit) alignment, but AVX2 requires + 32-byte alignment. */ #endif #endif @@ -104,7 +106,7 @@ round_up_pow2 (size_t a, size_t b) typedef struct small_pool_struct *small_pool_ptr; typedef struct small_pool_struct { - small_pool_ptr next; /* next in list of pools */ + small_pool_ptr next; /* next in list of pools */ size_t bytes_used; /* how many bytes already used within pool */ size_t bytes_left; /* bytes still available in this pool */ } small_pool_hdr; @@ -112,7 +114,7 @@ typedef struct small_pool_struct { typedef struct large_pool_struct *large_pool_ptr; typedef struct large_pool_struct { - large_pool_ptr next; /* next in list of pools */ + large_pool_ptr next; /* next in list of pools */ size_t bytes_used; /* how many bytes already used within pool */ size_t bytes_left; /* bytes still available in this pool */ } large_pool_hdr; @@ -191,9 +193,9 @@ struct jvirt_barray_control { #ifdef MEM_STATS /* optional extra stuff for statistics */ LOCAL(void) -print_mem_stats (j_common_ptr cinfo, int pool_id) +print_mem_stats(j_common_ptr cinfo, int pool_id) { - my_mem_ptr mem = (my_mem_ptr) cinfo->mem; + my_mem_ptr mem = (my_mem_ptr)cinfo->mem; small_pool_ptr shdr_ptr; large_pool_ptr lhdr_ptr; @@ -206,15 +208,13 @@ print_mem_stats (j_common_ptr cinfo, int pool_id) for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL; lhdr_ptr = lhdr_ptr->next) { - fprintf(stderr, " Large chunk used %ld\n", - (long) lhdr_ptr->bytes_used); + fprintf(stderr, " Large chunk used %ld\n", (long)lhdr_ptr->bytes_used); } for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL; shdr_ptr = shdr_ptr->next) { fprintf(stderr, " Small chunk used %ld free %ld\n", - (long) shdr_ptr->bytes_used, - (long) shdr_ptr->bytes_left); + (long)shdr_ptr->bytes_used, (long)shdr_ptr->bytes_left); } } @@ -222,7 +222,7 @@ print_mem_stats (j_common_ptr cinfo, int pool_id) LOCAL(void) -out_of_memory (j_common_ptr cinfo, int which) +out_of_memory(j_common_ptr cinfo, int which) /* Report an out-of-memory error and stop execution */ /* If we compiled MEM_STATS support, report alloc requests before dying */ { @@ -250,26 +250,24 @@ out_of_memory (j_common_ptr cinfo, int which) * adjustment. */ -static const size_t first_pool_slop[JPOOL_NUMPOOLS] = -{ - 1600, /* first PERMANENT pool */ - 16000 /* first IMAGE pool */ +static const size_t first_pool_slop[JPOOL_NUMPOOLS] = { + 1600, /* first PERMANENT pool */ + 16000 /* first IMAGE pool */ }; -static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = -{ - 0, /* additional PERMANENT pools */ - 5000 /* additional IMAGE pools */ +static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = { + 0, /* additional PERMANENT pools */ + 5000 /* additional IMAGE pools */ }; #define MIN_SLOP 50 /* greater than 0 to avoid futile looping */ METHODDEF(void *) -alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject) +alloc_small(j_common_ptr cinfo, int pool_id, size_t sizeofobject) /* Allocate a "small" object */ { - my_mem_ptr mem = (my_mem_ptr) cinfo->mem; + my_mem_ptr mem = (my_mem_ptr)cinfo->mem; small_pool_ptr hdr_ptr, prev_hdr_ptr; char *data_ptr; size_t min_request, slop; @@ -313,11 +311,11 @@ alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject) else slop = extra_pool_slop[pool_id]; /* Don't ask for more than MAX_ALLOC_CHUNK */ - if (slop > (size_t) (MAX_ALLOC_CHUNK-min_request)) - slop = (size_t) (MAX_ALLOC_CHUNK-min_request); + if (slop > (size_t)(MAX_ALLOC_CHUNK - min_request)) + slop = (size_t)(MAX_ALLOC_CHUNK - min_request); /* Try to get space, if fail reduce slop and try again */ for (;;) { - hdr_ptr = (small_pool_ptr) jpeg_get_small(cinfo, min_request + slop); + hdr_ptr = (small_pool_ptr)jpeg_get_small(cinfo, min_request + slop); if (hdr_ptr != NULL) break; slop /= 2; @@ -336,7 +334,7 @@ alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject) } /* OK, allocate the object from the current pool */ - data_ptr = (char *) hdr_ptr; /* point to first data byte in pool... */ + data_ptr = (char *)hdr_ptr; /* point to first data byte in pool... */ data_ptr += sizeof(small_pool_hdr); /* ...by skipping the header... */ if ((size_t)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */ data_ptr += ALIGN_SIZE - (size_t)data_ptr % ALIGN_SIZE; @@ -344,7 +342,7 @@ alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject) hdr_ptr->bytes_used += sizeofobject; hdr_ptr->bytes_left -= sizeofobject; - return (void *) data_ptr; + return (void *)data_ptr; } @@ -362,10 +360,10 @@ alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject) */ METHODDEF(void *) -alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject) +alloc_large(j_common_ptr cinfo, int pool_id, size_t sizeofobject) /* Allocate a "large" object */ { - my_mem_ptr mem = (my_mem_ptr) cinfo->mem; + my_mem_ptr mem = (my_mem_ptr)cinfo->mem; large_pool_ptr hdr_ptr; char *data_ptr; @@ -390,9 +388,9 @@ alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject) if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS) ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ - hdr_ptr = (large_pool_ptr) jpeg_get_large(cinfo, sizeofobject + - sizeof(large_pool_hdr) + - ALIGN_SIZE - 1); + hdr_ptr = (large_pool_ptr)jpeg_get_large(cinfo, sizeofobject + + sizeof(large_pool_hdr) + + ALIGN_SIZE - 1); if (hdr_ptr == NULL) out_of_memory(cinfo, 4); /* jpeg_get_large failed */ mem->total_space_allocated += sizeofobject + sizeof(large_pool_hdr) + @@ -407,12 +405,12 @@ alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject) hdr_ptr->bytes_left = 0; mem->large_list[pool_id] = hdr_ptr; - data_ptr = (char *) hdr_ptr; /* point to first data byte in pool... */ + data_ptr = (char *)hdr_ptr; /* point to first data byte in pool... */ data_ptr += sizeof(small_pool_hdr); /* ...by skipping the header... */ if ((size_t)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */ data_ptr += ALIGN_SIZE - (size_t)data_ptr % ALIGN_SIZE; - return (void *) data_ptr; + return (void *)data_ptr; } @@ -433,11 +431,11 @@ alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject) */ METHODDEF(JSAMPARRAY) -alloc_sarray (j_common_ptr cinfo, int pool_id, - JDIMENSION samplesperrow, JDIMENSION numrows) +alloc_sarray(j_common_ptr cinfo, int pool_id, JDIMENSION samplesperrow, + JDIMENSION numrows) /* Allocate a 2-D sample array */ { - my_mem_ptr mem = (my_mem_ptr) cinfo->mem; + my_mem_ptr mem = (my_mem_ptr)cinfo->mem; JSAMPARRAY result; JSAMPROW workspace; JDIMENSION rowsperchunk, currow, i; @@ -456,27 +454,27 @@ alloc_sarray (j_common_ptr cinfo, int pool_id, sizeof(JSAMPLE)); /* Calculate max # of rows allowed in one allocation chunk */ - ltemp = (MAX_ALLOC_CHUNK-sizeof(large_pool_hdr)) / - ((long) samplesperrow * sizeof(JSAMPLE)); + ltemp = (MAX_ALLOC_CHUNK - sizeof(large_pool_hdr)) / + ((long)samplesperrow * sizeof(JSAMPLE)); if (ltemp <= 0) ERREXIT(cinfo, JERR_WIDTH_OVERFLOW); - if (ltemp < (long) numrows) - rowsperchunk = (JDIMENSION) ltemp; + if (ltemp < (long)numrows) + rowsperchunk = (JDIMENSION)ltemp; else rowsperchunk = numrows; mem->last_rowsperchunk = rowsperchunk; /* Get space for row pointers (small object) */ - result = (JSAMPARRAY) alloc_small(cinfo, pool_id, - (size_t) (numrows * sizeof(JSAMPROW))); + result = (JSAMPARRAY)alloc_small(cinfo, pool_id, + (size_t)(numrows * sizeof(JSAMPROW))); /* Get the rows themselves (large objects) */ currow = 0; while (currow < numrows) { rowsperchunk = MIN(rowsperchunk, numrows - currow); - workspace = (JSAMPROW) alloc_large(cinfo, pool_id, - (size_t) ((size_t) rowsperchunk * (size_t) samplesperrow - * sizeof(JSAMPLE))); + workspace = (JSAMPROW)alloc_large(cinfo, pool_id, + (size_t)((size_t)rowsperchunk * (size_t)samplesperrow * + sizeof(JSAMPLE))); for (i = rowsperchunk; i > 0; i--) { result[currow++] = workspace; workspace += samplesperrow; @@ -493,11 +491,11 @@ alloc_sarray (j_common_ptr cinfo, int pool_id, */ METHODDEF(JBLOCKARRAY) -alloc_barray (j_common_ptr cinfo, int pool_id, - JDIMENSION blocksperrow, JDIMENSION numrows) +alloc_barray(j_common_ptr cinfo, int pool_id, JDIMENSION blocksperrow, + JDIMENSION numrows) /* Allocate a 2-D coefficient-block array */ { - my_mem_ptr mem = (my_mem_ptr) cinfo->mem; + my_mem_ptr mem = (my_mem_ptr)cinfo->mem; JBLOCKARRAY result; JBLOCKROW workspace; JDIMENSION rowsperchunk, currow, i; @@ -508,27 +506,27 @@ alloc_barray (j_common_ptr cinfo, int pool_id, out_of_memory(cinfo, 6); /* safety check */ /* Calculate max # of rows allowed in one allocation chunk */ - ltemp = (MAX_ALLOC_CHUNK-sizeof(large_pool_hdr)) / - ((long) blocksperrow * sizeof(JBLOCK)); + ltemp = (MAX_ALLOC_CHUNK - sizeof(large_pool_hdr)) / + ((long)blocksperrow * sizeof(JBLOCK)); if (ltemp <= 0) ERREXIT(cinfo, JERR_WIDTH_OVERFLOW); - if (ltemp < (long) numrows) - rowsperchunk = (JDIMENSION) ltemp; + if (ltemp < (long)numrows) + rowsperchunk = (JDIMENSION)ltemp; else rowsperchunk = numrows; mem->last_rowsperchunk = rowsperchunk; /* Get space for row pointers (small object) */ - result = (JBLOCKARRAY) alloc_small(cinfo, pool_id, - (size_t) (numrows * sizeof(JBLOCKROW))); + result = (JBLOCKARRAY)alloc_small(cinfo, pool_id, + (size_t)(numrows * sizeof(JBLOCKROW))); /* Get the rows themselves (large objects) */ currow = 0; while (currow < numrows) { rowsperchunk = MIN(rowsperchunk, numrows - currow); - workspace = (JBLOCKROW) alloc_large(cinfo, pool_id, - (size_t) ((size_t) rowsperchunk * (size_t) blocksperrow - * sizeof(JBLOCK))); + workspace = (JBLOCKROW)alloc_large(cinfo, pool_id, + (size_t)((size_t)rowsperchunk * (size_t)blocksperrow * + sizeof(JBLOCK))); for (i = rowsperchunk; i > 0; i--) { result[currow++] = workspace; workspace += blocksperrow; @@ -577,12 +575,12 @@ alloc_barray (j_common_ptr cinfo, int pool_id, METHODDEF(jvirt_sarray_ptr) -request_virt_sarray (j_common_ptr cinfo, int pool_id, boolean pre_zero, - JDIMENSION samplesperrow, JDIMENSION numrows, - JDIMENSION maxaccess) +request_virt_sarray(j_common_ptr cinfo, int pool_id, boolean pre_zero, + JDIMENSION samplesperrow, JDIMENSION numrows, + JDIMENSION maxaccess) /* Request a virtual 2-D sample array */ { - my_mem_ptr mem = (my_mem_ptr) cinfo->mem; + my_mem_ptr mem = (my_mem_ptr)cinfo->mem; jvirt_sarray_ptr result; /* Only IMAGE-lifetime virtual arrays are currently supported */ @@ -590,8 +588,8 @@ request_virt_sarray (j_common_ptr cinfo, int pool_id, boolean pre_zero, ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ /* get control block */ - result = (jvirt_sarray_ptr) alloc_small(cinfo, pool_id, - sizeof(struct jvirt_sarray_control)); + result = (jvirt_sarray_ptr)alloc_small(cinfo, pool_id, + sizeof(struct jvirt_sarray_control)); result->mem_buffer = NULL; /* marks array not yet realized */ result->rows_in_array = numrows; @@ -607,12 +605,12 @@ request_virt_sarray (j_common_ptr cinfo, int pool_id, boolean pre_zero, METHODDEF(jvirt_barray_ptr) -request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero, - JDIMENSION blocksperrow, JDIMENSION numrows, - JDIMENSION maxaccess) +request_virt_barray(j_common_ptr cinfo, int pool_id, boolean pre_zero, + JDIMENSION blocksperrow, JDIMENSION numrows, + JDIMENSION maxaccess) /* Request a virtual 2-D coefficient-block array */ { - my_mem_ptr mem = (my_mem_ptr) cinfo->mem; + my_mem_ptr mem = (my_mem_ptr)cinfo->mem; jvirt_barray_ptr result; /* Only IMAGE-lifetime virtual arrays are currently supported */ @@ -620,8 +618,8 @@ request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero, ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */ /* get control block */ - result = (jvirt_barray_ptr) alloc_small(cinfo, pool_id, - sizeof(struct jvirt_barray_control)); + result = (jvirt_barray_ptr)alloc_small(cinfo, pool_id, + sizeof(struct jvirt_barray_control)); result->mem_buffer = NULL; /* marks array not yet realized */ result->rows_in_array = numrows; @@ -637,10 +635,10 @@ request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero, METHODDEF(void) -realize_virt_arrays (j_common_ptr cinfo) +realize_virt_arrays(j_common_ptr cinfo) /* Allocate the in-memory buffers for any unrealized virtual arrays */ { - my_mem_ptr mem = (my_mem_ptr) cinfo->mem; + my_mem_ptr mem = (my_mem_ptr)cinfo->mem; size_t space_per_minheight, maximum_space, avail_mem; size_t minheights, max_minheights; jvirt_sarray_ptr sptr; @@ -654,11 +652,11 @@ realize_virt_arrays (j_common_ptr cinfo) maximum_space = 0; for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) { if (sptr->mem_buffer == NULL) { /* if not realized yet */ - size_t new_space = (long) sptr->rows_in_array * - (long) sptr->samplesperrow * sizeof(JSAMPLE); + size_t new_space = (long)sptr->rows_in_array * + (long)sptr->samplesperrow * sizeof(JSAMPLE); - space_per_minheight += (long) sptr->maxaccess * - (long) sptr->samplesperrow * sizeof(JSAMPLE); + space_per_minheight += (long)sptr->maxaccess * + (long)sptr->samplesperrow * sizeof(JSAMPLE); if (SIZE_MAX - maximum_space < new_space) out_of_memory(cinfo, 10); maximum_space += new_space; @@ -666,11 +664,11 @@ realize_virt_arrays (j_common_ptr cinfo) } for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) { if (bptr->mem_buffer == NULL) { /* if not realized yet */ - size_t new_space = (long) bptr->rows_in_array * - (long) bptr->blocksperrow * sizeof(JBLOCK); + size_t new_space = (long)bptr->rows_in_array * + (long)bptr->blocksperrow * sizeof(JBLOCK); - space_per_minheight += (long) bptr->maxaccess * - (long) bptr->blocksperrow * sizeof(JBLOCK); + space_per_minheight += (long)bptr->maxaccess * + (long)bptr->blocksperrow * sizeof(JBLOCK); if (SIZE_MAX - maximum_space < new_space) out_of_memory(cinfo, 11); maximum_space += new_space; @@ -703,17 +701,17 @@ realize_virt_arrays (j_common_ptr cinfo) for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) { if (sptr->mem_buffer == NULL) { /* if not realized yet */ - minheights = ((long) sptr->rows_in_array - 1L) / sptr->maxaccess + 1L; + minheights = ((long)sptr->rows_in_array - 1L) / sptr->maxaccess + 1L; if (minheights <= max_minheights) { /* This buffer fits in memory */ sptr->rows_in_mem = sptr->rows_in_array; } else { /* It doesn't fit in memory, create backing store. */ - sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess); - jpeg_open_backing_store(cinfo, & sptr->b_s_info, - (long) sptr->rows_in_array * - (long) sptr->samplesperrow * - (long) sizeof(JSAMPLE)); + sptr->rows_in_mem = (JDIMENSION)(max_minheights * sptr->maxaccess); + jpeg_open_backing_store(cinfo, &sptr->b_s_info, + (long)sptr->rows_in_array * + (long)sptr->samplesperrow * + (long)sizeof(JSAMPLE)); sptr->b_s_open = TRUE; } sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE, @@ -727,17 +725,17 @@ realize_virt_arrays (j_common_ptr cinfo) for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) { if (bptr->mem_buffer == NULL) { /* if not realized yet */ - minheights = ((long) bptr->rows_in_array - 1L) / bptr->maxaccess + 1L; + minheights = ((long)bptr->rows_in_array - 1L) / bptr->maxaccess + 1L; if (minheights <= max_minheights) { /* This buffer fits in memory */ bptr->rows_in_mem = bptr->rows_in_array; } else { /* It doesn't fit in memory, create backing store. */ - bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess); - jpeg_open_backing_store(cinfo, & bptr->b_s_info, - (long) bptr->rows_in_array * - (long) bptr->blocksperrow * - (long) sizeof(JBLOCK)); + bptr->rows_in_mem = (JDIMENSION)(max_minheights * bptr->maxaccess); + jpeg_open_backing_store(cinfo, &bptr->b_s_info, + (long)bptr->rows_in_array * + (long)bptr->blocksperrow * + (long)sizeof(JBLOCK)); bptr->b_s_open = TRUE; } bptr->mem_buffer = alloc_barray(cinfo, JPOOL_IMAGE, @@ -752,32 +750,32 @@ realize_virt_arrays (j_common_ptr cinfo) LOCAL(void) -do_sarray_io (j_common_ptr cinfo, jvirt_sarray_ptr ptr, boolean writing) +do_sarray_io(j_common_ptr cinfo, jvirt_sarray_ptr ptr, boolean writing) /* Do backing store read or write of a virtual sample array */ { long bytesperrow, file_offset, byte_count, rows, thisrow, i; - bytesperrow = (long) ptr->samplesperrow * sizeof(JSAMPLE); + bytesperrow = (long)ptr->samplesperrow * sizeof(JSAMPLE); file_offset = ptr->cur_start_row * bytesperrow; /* Loop to read or write each allocation chunk in mem_buffer */ - for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) { + for (i = 0; i < (long)ptr->rows_in_mem; i += ptr->rowsperchunk) { /* One chunk, but check for short chunk at end of buffer */ - rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i); + rows = MIN((long)ptr->rowsperchunk, (long)ptr->rows_in_mem - i); /* Transfer no more than is currently defined */ - thisrow = (long) ptr->cur_start_row + i; - rows = MIN(rows, (long) ptr->first_undef_row - thisrow); + thisrow = (long)ptr->cur_start_row + i; + rows = MIN(rows, (long)ptr->first_undef_row - thisrow); /* Transfer no more than fits in file */ - rows = MIN(rows, (long) ptr->rows_in_array - thisrow); + rows = MIN(rows, (long)ptr->rows_in_array - thisrow); if (rows <= 0) /* this chunk might be past end of file! */ break; byte_count = rows * bytesperrow; if (writing) - (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info, - (void *) ptr->mem_buffer[i], + (*ptr->b_s_info.write_backing_store) (cinfo, &ptr->b_s_info, + (void *)ptr->mem_buffer[i], file_offset, byte_count); else - (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info, - (void *) ptr->mem_buffer[i], + (*ptr->b_s_info.read_backing_store) (cinfo, &ptr->b_s_info, + (void *)ptr->mem_buffer[i], file_offset, byte_count); file_offset += byte_count; } @@ -785,32 +783,32 @@ do_sarray_io (j_common_ptr cinfo, jvirt_sarray_ptr ptr, boolean writing) LOCAL(void) -do_barray_io (j_common_ptr cinfo, jvirt_barray_ptr ptr, boolean writing) +do_barray_io(j_common_ptr cinfo, jvirt_barray_ptr ptr, boolean writing) /* Do backing store read or write of a virtual coefficient-block array */ { long bytesperrow, file_offset, byte_count, rows, thisrow, i; - bytesperrow = (long) ptr->blocksperrow * sizeof(JBLOCK); + bytesperrow = (long)ptr->blocksperrow * sizeof(JBLOCK); file_offset = ptr->cur_start_row * bytesperrow; /* Loop to read or write each allocation chunk in mem_buffer */ - for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) { + for (i = 0; i < (long)ptr->rows_in_mem; i += ptr->rowsperchunk) { /* One chunk, but check for short chunk at end of buffer */ - rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i); + rows = MIN((long)ptr->rowsperchunk, (long)ptr->rows_in_mem - i); /* Transfer no more than is currently defined */ - thisrow = (long) ptr->cur_start_row + i; - rows = MIN(rows, (long) ptr->first_undef_row - thisrow); + thisrow = (long)ptr->cur_start_row + i; + rows = MIN(rows, (long)ptr->first_undef_row - thisrow); /* Transfer no more than fits in file */ - rows = MIN(rows, (long) ptr->rows_in_array - thisrow); + rows = MIN(rows, (long)ptr->rows_in_array - thisrow); if (rows <= 0) /* this chunk might be past end of file! */ break; byte_count = rows * bytesperrow; if (writing) - (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info, - (void *) ptr->mem_buffer[i], + (*ptr->b_s_info.write_backing_store) (cinfo, &ptr->b_s_info, + (void *)ptr->mem_buffer[i], file_offset, byte_count); else - (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info, - (void *) ptr->mem_buffer[i], + (*ptr->b_s_info.read_backing_store) (cinfo, &ptr->b_s_info, + (void *)ptr->mem_buffer[i], file_offset, byte_count); file_offset += byte_count; } @@ -818,9 +816,8 @@ do_barray_io (j_common_ptr cinfo, jvirt_barray_ptr ptr, boolean writing) METHODDEF(JSAMPARRAY) -access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr, - JDIMENSION start_row, JDIMENSION num_rows, - boolean writable) +access_virt_sarray(j_common_ptr cinfo, jvirt_sarray_ptr ptr, + JDIMENSION start_row, JDIMENSION num_rows, boolean writable) /* Access the part of a virtual sample array starting at start_row */ /* and extending for num_rows rows. writable is true if */ /* caller intends to modify the accessed area. */ @@ -835,8 +832,8 @@ access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr, /* Make the desired part of the virtual array accessible */ if (start_row < ptr->cur_start_row || - end_row > ptr->cur_start_row+ptr->rows_in_mem) { - if (! ptr->b_s_open) + end_row > ptr->cur_start_row + ptr->rows_in_mem) { + if (!ptr->b_s_open) ERREXIT(cinfo, JERR_VIRTUAL_BUG); /* Flush old buffer contents if necessary */ if (ptr->dirty) { @@ -856,10 +853,10 @@ access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr, /* use long arithmetic here to avoid overflow & unsigned problems */ long ltemp; - ltemp = (long) end_row - (long) ptr->rows_in_mem; + ltemp = (long)end_row - (long)ptr->rows_in_mem; if (ltemp < 0) ltemp = 0; /* don't fall off front end of file */ - ptr->cur_start_row = (JDIMENSION) ltemp; + ptr->cur_start_row = (JDIMENSION)ltemp; } /* Read in the selected part of the array. * During the initial write pass, we will do no actual read @@ -882,15 +879,15 @@ access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr, if (writable) ptr->first_undef_row = end_row; if (ptr->pre_zero) { - size_t bytesperrow = (size_t) ptr->samplesperrow * sizeof(JSAMPLE); + size_t bytesperrow = (size_t)ptr->samplesperrow * sizeof(JSAMPLE); undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */ end_row -= ptr->cur_start_row; while (undef_row < end_row) { - jzero_far((void *) ptr->mem_buffer[undef_row], bytesperrow); + jzero_far((void *)ptr->mem_buffer[undef_row], bytesperrow); undef_row++; } } else { - if (! writable) /* reader looking at undefined data */ + if (!writable) /* reader looking at undefined data */ ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); } } @@ -903,9 +900,8 @@ access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr, METHODDEF(JBLOCKARRAY) -access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr, - JDIMENSION start_row, JDIMENSION num_rows, - boolean writable) +access_virt_barray(j_common_ptr cinfo, jvirt_barray_ptr ptr, + JDIMENSION start_row, JDIMENSION num_rows, boolean writable) /* Access the part of a virtual block array starting at start_row */ /* and extending for num_rows rows. writable is true if */ /* caller intends to modify the accessed area. */ @@ -920,8 +916,8 @@ access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr, /* Make the desired part of the virtual array accessible */ if (start_row < ptr->cur_start_row || - end_row > ptr->cur_start_row+ptr->rows_in_mem) { - if (! ptr->b_s_open) + end_row > ptr->cur_start_row + ptr->rows_in_mem) { + if (!ptr->b_s_open) ERREXIT(cinfo, JERR_VIRTUAL_BUG); /* Flush old buffer contents if necessary */ if (ptr->dirty) { @@ -941,10 +937,10 @@ access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr, /* use long arithmetic here to avoid overflow & unsigned problems */ long ltemp; - ltemp = (long) end_row - (long) ptr->rows_in_mem; + ltemp = (long)end_row - (long)ptr->rows_in_mem; if (ltemp < 0) ltemp = 0; /* don't fall off front end of file */ - ptr->cur_start_row = (JDIMENSION) ltemp; + ptr->cur_start_row = (JDIMENSION)ltemp; } /* Read in the selected part of the array. * During the initial write pass, we will do no actual read @@ -967,15 +963,15 @@ access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr, if (writable) ptr->first_undef_row = end_row; if (ptr->pre_zero) { - size_t bytesperrow = (size_t) ptr->blocksperrow * sizeof(JBLOCK); + size_t bytesperrow = (size_t)ptr->blocksperrow * sizeof(JBLOCK); undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */ end_row -= ptr->cur_start_row; while (undef_row < end_row) { - jzero_far((void *) ptr->mem_buffer[undef_row], bytesperrow); + jzero_far((void *)ptr->mem_buffer[undef_row], bytesperrow); undef_row++; } } else { - if (! writable) /* reader looking at undefined data */ + if (!writable) /* reader looking at undefined data */ ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS); } } @@ -992,9 +988,9 @@ access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr, */ METHODDEF(void) -free_pool (j_common_ptr cinfo, int pool_id) +free_pool(j_common_ptr cinfo, int pool_id) { - my_mem_ptr mem = (my_mem_ptr) cinfo->mem; + my_mem_ptr mem = (my_mem_ptr)cinfo->mem; small_pool_ptr shdr_ptr; large_pool_ptr lhdr_ptr; size_t space_freed; @@ -1015,14 +1011,14 @@ free_pool (j_common_ptr cinfo, int pool_id) for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) { if (sptr->b_s_open) { /* there may be no backing store */ sptr->b_s_open = FALSE; /* prevent recursive close if error */ - (*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info); + (*sptr->b_s_info.close_backing_store) (cinfo, &sptr->b_s_info); } } mem->virt_sarray_list = NULL; for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) { if (bptr->b_s_open) { /* there may be no backing store */ bptr->b_s_open = FALSE; /* prevent recursive close if error */ - (*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info); + (*bptr->b_s_info.close_backing_store) (cinfo, &bptr->b_s_info); } } mem->virt_barray_list = NULL; @@ -1037,7 +1033,7 @@ free_pool (j_common_ptr cinfo, int pool_id) space_freed = lhdr_ptr->bytes_used + lhdr_ptr->bytes_left + sizeof(large_pool_hdr); - jpeg_free_large(cinfo, (void *) lhdr_ptr, space_freed); + jpeg_free_large(cinfo, (void *)lhdr_ptr, space_freed); mem->total_space_allocated -= space_freed; lhdr_ptr = next_lhdr_ptr; } @@ -1048,10 +1044,9 @@ free_pool (j_common_ptr cinfo, int pool_id) while (shdr_ptr != NULL) { small_pool_ptr next_shdr_ptr = shdr_ptr->next; - space_freed = shdr_ptr->bytes_used + - shdr_ptr->bytes_left + + space_freed = shdr_ptr->bytes_used + shdr_ptr->bytes_left + sizeof(small_pool_hdr); - jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed); + jpeg_free_small(cinfo, (void *)shdr_ptr, space_freed); mem->total_space_allocated -= space_freed; shdr_ptr = next_shdr_ptr; } @@ -1064,7 +1059,7 @@ free_pool (j_common_ptr cinfo, int pool_id) */ METHODDEF(void) -self_destruct (j_common_ptr cinfo) +self_destruct(j_common_ptr cinfo) { int pool; @@ -1072,12 +1067,12 @@ self_destruct (j_common_ptr cinfo) * Releasing pools in reverse order might help avoid fragmentation * with some (brain-damaged) malloc libraries. */ - for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) { + for (pool = JPOOL_NUMPOOLS - 1; pool >= JPOOL_PERMANENT; pool--) { free_pool(cinfo, pool); } /* Release the memory manager control block too. */ - jpeg_free_small(cinfo, (void *) cinfo->mem, sizeof(my_memory_mgr)); + jpeg_free_small(cinfo, (void *)cinfo->mem, sizeof(my_memory_mgr)); cinfo->mem = NULL; /* ensures I will be called only once */ jpeg_mem_term(cinfo); /* system-dependent cleanup */ @@ -1090,7 +1085,7 @@ self_destruct (j_common_ptr cinfo) */ GLOBAL(void) -jinit_memory_mgr (j_common_ptr cinfo) +jinit_memory_mgr(j_common_ptr cinfo) { my_mem_ptr mem; long max_to_use; @@ -1106,22 +1101,22 @@ jinit_memory_mgr (j_common_ptr cinfo) * in common if and only if X is a power of 2, ie has only one one-bit. * Some compilers may give an "unreachable code" warning here; ignore it. */ - if ((ALIGN_SIZE & (ALIGN_SIZE-1)) != 0) + if ((ALIGN_SIZE & (ALIGN_SIZE - 1)) != 0) ERREXIT(cinfo, JERR_BAD_ALIGN_TYPE); /* MAX_ALLOC_CHUNK must be representable as type size_t, and must be * a multiple of ALIGN_SIZE. * Again, an "unreachable code" warning may be ignored here. * But a "constant too large" warning means you need to fix MAX_ALLOC_CHUNK. */ - test_mac = (size_t) MAX_ALLOC_CHUNK; - if ((long) test_mac != MAX_ALLOC_CHUNK || + test_mac = (size_t)MAX_ALLOC_CHUNK; + if ((long)test_mac != MAX_ALLOC_CHUNK || (MAX_ALLOC_CHUNK % ALIGN_SIZE) != 0) ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK); max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */ /* Attempt to allocate memory manager's control block */ - mem = (my_mem_ptr) jpeg_get_small(cinfo, sizeof(my_memory_mgr)); + mem = (my_mem_ptr)jpeg_get_small(cinfo, sizeof(my_memory_mgr)); if (mem == NULL) { jpeg_mem_term(cinfo); /* system-dependent cleanup */ @@ -1147,7 +1142,7 @@ jinit_memory_mgr (j_common_ptr cinfo) /* Initialize working state */ mem->pub.max_memory_to_use = max_to_use; - for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) { + for (pool = JPOOL_NUMPOOLS - 1; pool >= JPOOL_PERMANENT; pool--) { mem->small_list[pool] = NULL; mem->large_list[pool] = NULL; } @@ -1157,7 +1152,7 @@ jinit_memory_mgr (j_common_ptr cinfo) mem->total_space_allocated = sizeof(my_memory_mgr); /* Declare ourselves open for business */ - cinfo->mem = & mem->pub; + cinfo->mem = &mem->pub; /* Check for an environment variable JPEGMEM; if found, override the * default max_memory setting from jpeg_mem_init. Note that the @@ -1166,7 +1161,8 @@ jinit_memory_mgr (j_common_ptr cinfo) * this feature. */ #ifndef NO_GETENV - { char *memenv; + { + char *memenv; if ((memenv = getenv("JPEGMEM")) != NULL) { char ch = 'x'; diff --git a/jmemnobs.c b/jmemnobs.c index ac12afa..089be8f 100644 --- a/jmemnobs.c +++ b/jmemnobs.c @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1992-1996, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2017, D. R. Commander. + * Copyright (C) 2017-2018, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -23,8 +23,8 @@ #include "jmemsys.h" /* import the system-dependent declarations */ #ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ -extern void *malloc (size_t size); -extern void free (void *ptr); +extern void *malloc(size_t size); +extern void free(void *ptr); #endif @@ -34,13 +34,13 @@ extern void free (void *ptr); */ GLOBAL(void *) -jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject) +jpeg_get_small(j_common_ptr cinfo, size_t sizeofobject) { - return (void *) malloc(sizeofobject); + return (void *)malloc(sizeofobject); } GLOBAL(void) -jpeg_free_small (j_common_ptr cinfo, void *object, size_t sizeofobject) +jpeg_free_small(j_common_ptr cinfo, void *object, size_t sizeofobject) { free(object); } @@ -51,13 +51,13 @@ jpeg_free_small (j_common_ptr cinfo, void *object, size_t sizeofobject) */ GLOBAL(void *) -jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject) +jpeg_get_large(j_common_ptr cinfo, size_t sizeofobject) { - return (void *) malloc(sizeofobject); + return (void *)malloc(sizeofobject); } GLOBAL(void) -jpeg_free_large (j_common_ptr cinfo, void *object, size_t sizeofobject) +jpeg_free_large(j_common_ptr cinfo, void *object, size_t sizeofobject) { free(object); } @@ -68,11 +68,11 @@ jpeg_free_large (j_common_ptr cinfo, void *object, size_t sizeofobject) */ GLOBAL(size_t) -jpeg_mem_available (j_common_ptr cinfo, size_t min_bytes_needed, - size_t max_bytes_needed, size_t already_allocated) +jpeg_mem_available(j_common_ptr cinfo, size_t min_bytes_needed, + size_t max_bytes_needed, size_t already_allocated) { if (cinfo->mem->max_memory_to_use) { - if (cinfo->mem->max_memory_to_use > already_allocated) + if ((size_t)cinfo->mem->max_memory_to_use > already_allocated) return cinfo->mem->max_memory_to_use - already_allocated; else return 0; @@ -90,8 +90,8 @@ jpeg_mem_available (j_common_ptr cinfo, size_t min_bytes_needed, */ GLOBAL(void) -jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info, - long total_bytes_needed) +jpeg_open_backing_store(j_common_ptr cinfo, backing_store_ptr info, + long total_bytes_needed) { ERREXIT(cinfo, JERR_NO_BACKING_STORE); } @@ -103,13 +103,13 @@ jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info, */ GLOBAL(long) -jpeg_mem_init (j_common_ptr cinfo) +jpeg_mem_init(j_common_ptr cinfo) { return 0; /* just set max_memory_to_use to 0 */ } GLOBAL(void) -jpeg_mem_term (j_common_ptr cinfo) +jpeg_mem_term(j_common_ptr cinfo) { /* no work */ } diff --git a/jmemsys.h b/jmemsys.h index f7dfe87..9229550 100644 --- a/jmemsys.h +++ b/jmemsys.h @@ -31,9 +31,9 @@ * size of the object being freed, just in case it's needed. */ -EXTERN(void *) jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject); -EXTERN(void) jpeg_free_small (j_common_ptr cinfo, void *object, - size_t sizeofobject); +EXTERN(void *) jpeg_get_small(j_common_ptr cinfo, size_t sizeofobject); +EXTERN(void) jpeg_free_small(j_common_ptr cinfo, void *object, + size_t sizeofobject); /* * These two functions are used to allocate and release large chunks of @@ -43,9 +43,9 @@ EXTERN(void) jpeg_free_small (j_common_ptr cinfo, void *object, * large chunks. */ -EXTERN(void *) jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject); -EXTERN(void) jpeg_free_large (j_common_ptr cinfo, void *object, - size_t sizeofobject); +EXTERN(void *) jpeg_get_large(j_common_ptr cinfo, size_t sizeofobject); +EXTERN(void) jpeg_free_large(j_common_ptr cinfo, void *object, + size_t sizeofobject); /* * The macro MAX_ALLOC_CHUNK designates the maximum number of bytes that may @@ -84,9 +84,9 @@ EXTERN(void) jpeg_free_large (j_common_ptr cinfo, void *object, * Conversely, zero may be returned to always use the minimum amount of memory. */ -EXTERN(size_t) jpeg_mem_available (j_common_ptr cinfo, size_t min_bytes_needed, - size_t max_bytes_needed, - size_t already_allocated); +EXTERN(size_t) jpeg_mem_available(j_common_ptr cinfo, size_t min_bytes_needed, + size_t max_bytes_needed, + size_t already_allocated); /* @@ -157,9 +157,9 @@ typedef struct backing_store_struct { * just take an error exit.) */ -EXTERN(void) jpeg_open_backing_store (j_common_ptr cinfo, - backing_store_ptr info, - long total_bytes_needed); +EXTERN(void) jpeg_open_backing_store(j_common_ptr cinfo, + backing_store_ptr info, + long total_bytes_needed); /* @@ -174,5 +174,5 @@ EXTERN(void) jpeg_open_backing_store (j_common_ptr cinfo, * all opened backing-store objects have been closed. */ -EXTERN(long) jpeg_mem_init (j_common_ptr cinfo); -EXTERN(void) jpeg_mem_term (j_common_ptr cinfo); +EXTERN(long) jpeg_mem_init(j_common_ptr cinfo); +EXTERN(void) jpeg_mem_term(j_common_ptr cinfo); diff --git a/jmorecfg.h b/jmorecfg.h index 1d96786..d0b9300 100644 --- a/jmorecfg.h +++ b/jmorecfg.h @@ -5,7 +5,7 @@ * Copyright (C) 1991-1997, Thomas G. Lane. * Modified 1997-2009 by Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2009, 2011, 2014-2015, D. R. Commander. + * Copyright (C) 2009, 2011, 2014-2015, 2018, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -17,9 +17,9 @@ /* * Maximum number of components (color channels) allowed in JPEG image. - * To meet the letter of the JPEG spec, set this to 255. However, darn - * few applications need more than 4 channels (maybe 5 for CMYK + alpha - * mask). We recommend 10 as a reasonable compromise; use 4 if you are + * To meet the letter of Rec. ITU-T T.81 | ISO/IEC 10918-1, set this to 255. + * However, darn few applications need more than 4 channels (maybe 5 for CMYK + + * alpha mask). We recommend 10 as a reasonable compromise; use 4 if you are * really short on memory. (Each allowed component costs a hundred or so * bytes of storage, whether actually used in an image or not.) */ @@ -49,15 +49,15 @@ #ifdef HAVE_UNSIGNED_CHAR typedef unsigned char JSAMPLE; -#define GETJSAMPLE(value) ((int) (value)) +#define GETJSAMPLE(value) ((int)(value)) #else /* not HAVE_UNSIGNED_CHAR */ typedef char JSAMPLE; #ifdef __CHAR_UNSIGNED__ -#define GETJSAMPLE(value) ((int) (value)) +#define GETJSAMPLE(value) ((int)(value)) #else -#define GETJSAMPLE(value) ((int) (value) & 0xFF) +#define GETJSAMPLE(value) ((int)(value) & 0xFF) #endif /* __CHAR_UNSIGNED__ */ #endif /* HAVE_UNSIGNED_CHAR */ @@ -74,7 +74,7 @@ typedef char JSAMPLE; */ typedef short JSAMPLE; -#define GETJSAMPLE(value) ((int) (value)) +#define GETJSAMPLE(value) ((int)(value)) #define MAXJSAMPLE 4095 #define CENTERJSAMPLE 2048 @@ -173,9 +173,9 @@ typedef short INT16; */ #ifndef XMD_H /* X11/xmd.h correctly defines INT32 */ -#ifndef _BASETSD_H_ /* Microsoft defines it in basetsd.h */ -#ifndef _BASETSD_H /* MinGW is slightly different */ -#ifndef QGLOBAL_H /* Qt defines it in qglobal.h */ +#ifndef _BASETSD_H_ /* Microsoft defines it in basetsd.h */ +#ifndef _BASETSD_H /* MinGW is slightly different */ +#ifndef QGLOBAL_H /* Qt defines it in qglobal.h */ typedef long INT32; #endif #endif @@ -220,7 +220,7 @@ typedef unsigned int JDIMENSION; * software out there that uses it. */ -#define JMETHOD(type,methodname,arglist) type (*methodname) arglist +#define JMETHOD(type, methodname, arglist) type (*methodname) arglist /* libjpeg-turbo no longer supports platforms that have far symbols (MS-DOS), @@ -315,10 +315,10 @@ typedef int boolean; * with it. In reality, few people ever did this, because there were some * severe restrictions involved (cjpeg and djpeg no longer worked properly, * compressing/decompressing RGB JPEGs no longer worked properly, and the color - * quantizer wouldn't work with pixel sizes other than 3.) Further, since all - * of the O/S-supplied versions of libjpeg were built with the default values - * of RGB_RED, RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE, many applications have - * come to regard these values as immutable. + * quantizer wouldn't work with pixel sizes other than 3.) Furthermore, since + * all of the O/S-supplied versions of libjpeg were built with the default + * values of RGB_RED, RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE, many applications + * have come to regard these values as immutable. * * The libjpeg-turbo colorspace extensions provide a much cleaner way of * compressing from/decompressing to buffers with arbitrary component orders @@ -333,37 +333,37 @@ typedef int boolean; #define RGB_BLUE 2 /* Offset of Blue */ #define RGB_PIXELSIZE 3 /* JSAMPLEs per RGB scanline element */ -#define JPEG_NUMCS 17 +#define JPEG_NUMCS 17 -#define EXT_RGB_RED 0 -#define EXT_RGB_GREEN 1 -#define EXT_RGB_BLUE 2 -#define EXT_RGB_PIXELSIZE 3 +#define EXT_RGB_RED 0 +#define EXT_RGB_GREEN 1 +#define EXT_RGB_BLUE 2 +#define EXT_RGB_PIXELSIZE 3 -#define EXT_RGBX_RED 0 -#define EXT_RGBX_GREEN 1 -#define EXT_RGBX_BLUE 2 -#define EXT_RGBX_PIXELSIZE 4 +#define EXT_RGBX_RED 0 +#define EXT_RGBX_GREEN 1 +#define EXT_RGBX_BLUE 2 +#define EXT_RGBX_PIXELSIZE 4 -#define EXT_BGR_RED 2 -#define EXT_BGR_GREEN 1 -#define EXT_BGR_BLUE 0 -#define EXT_BGR_PIXELSIZE 3 +#define EXT_BGR_RED 2 +#define EXT_BGR_GREEN 1 +#define EXT_BGR_BLUE 0 +#define EXT_BGR_PIXELSIZE 3 -#define EXT_BGRX_RED 2 -#define EXT_BGRX_GREEN 1 -#define EXT_BGRX_BLUE 0 -#define EXT_BGRX_PIXELSIZE 4 +#define EXT_BGRX_RED 2 +#define EXT_BGRX_GREEN 1 +#define EXT_BGRX_BLUE 0 +#define EXT_BGRX_PIXELSIZE 4 -#define EXT_XBGR_RED 3 -#define EXT_XBGR_GREEN 2 -#define EXT_XBGR_BLUE 1 -#define EXT_XBGR_PIXELSIZE 4 +#define EXT_XBGR_RED 3 +#define EXT_XBGR_GREEN 2 +#define EXT_XBGR_BLUE 1 +#define EXT_XBGR_PIXELSIZE 4 -#define EXT_XRGB_RED 1 -#define EXT_XRGB_GREEN 2 -#define EXT_XRGB_BLUE 3 -#define EXT_XRGB_PIXELSIZE 4 +#define EXT_XRGB_RED 1 +#define EXT_XRGB_GREEN 2 +#define EXT_XRGB_BLUE 3 +#define EXT_XRGB_PIXELSIZE 4 static const int rgb_red[JPEG_NUMCS] = { -1, -1, RGB_RED, -1, -1, -1, EXT_RGB_RED, EXT_RGBX_RED, @@ -404,7 +404,7 @@ static const int rgb_pixelsize[JPEG_NUMCS] = { #ifndef WITH_SIMD #define MULTIPLIER int /* type for fastest integer multiply */ #else -#define MULTIPLIER short /* prefer 16-bit with SIMD for parellelism */ +#define MULTIPLIER short /* prefer 16-bit with SIMD for parellelism */ #endif #endif diff --git a/jpegcomp.h b/jpegcomp.h index ade0d1e..b32d544 100644 --- a/jpegcomp.h +++ b/jpegcomp.h @@ -11,21 +11,21 @@ */ #if JPEG_LIB_VERSION >= 70 -#define _DCT_scaled_size DCT_h_scaled_size -#define _DCT_h_scaled_size DCT_h_scaled_size -#define _DCT_v_scaled_size DCT_v_scaled_size -#define _min_DCT_scaled_size min_DCT_h_scaled_size -#define _min_DCT_h_scaled_size min_DCT_h_scaled_size -#define _min_DCT_v_scaled_size min_DCT_v_scaled_size -#define _jpeg_width jpeg_width -#define _jpeg_height jpeg_height +#define _DCT_scaled_size DCT_h_scaled_size +#define _DCT_h_scaled_size DCT_h_scaled_size +#define _DCT_v_scaled_size DCT_v_scaled_size +#define _min_DCT_scaled_size min_DCT_h_scaled_size +#define _min_DCT_h_scaled_size min_DCT_h_scaled_size +#define _min_DCT_v_scaled_size min_DCT_v_scaled_size +#define _jpeg_width jpeg_width +#define _jpeg_height jpeg_height #else -#define _DCT_scaled_size DCT_scaled_size -#define _DCT_h_scaled_size DCT_scaled_size -#define _DCT_v_scaled_size DCT_scaled_size -#define _min_DCT_scaled_size min_DCT_scaled_size -#define _min_DCT_h_scaled_size min_DCT_scaled_size -#define _min_DCT_v_scaled_size min_DCT_scaled_size -#define _jpeg_width image_width -#define _jpeg_height image_height +#define _DCT_scaled_size DCT_scaled_size +#define _DCT_h_scaled_size DCT_scaled_size +#define _DCT_v_scaled_size DCT_scaled_size +#define _min_DCT_scaled_size min_DCT_scaled_size +#define _min_DCT_h_scaled_size min_DCT_scaled_size +#define _min_DCT_v_scaled_size min_DCT_scaled_size +#define _jpeg_width image_width +#define _jpeg_height image_height #endif diff --git a/jpegint.h b/jpegint.h index 9979a91..ad36ca8 100644 --- a/jpegint.h +++ b/jpegint.h @@ -27,21 +27,21 @@ typedef enum { /* Operating modes for buffer controllers */ } J_BUF_MODE; /* Values of global_state field (jdapi.c has some dependencies on ordering!) */ -#define CSTATE_START 100 /* after create_compress */ -#define CSTATE_SCANNING 101 /* start_compress done, write_scanlines OK */ -#define CSTATE_RAW_OK 102 /* start_compress done, write_raw_data OK */ -#define CSTATE_WRCOEFS 103 /* jpeg_write_coefficients done */ -#define DSTATE_START 200 /* after create_decompress */ -#define DSTATE_INHEADER 201 /* reading header markers, no SOS yet */ -#define DSTATE_READY 202 /* found SOS, ready for start_decompress */ -#define DSTATE_PRELOAD 203 /* reading multiscan file in start_decompress*/ -#define DSTATE_PRESCAN 204 /* performing dummy pass for 2-pass quant */ -#define DSTATE_SCANNING 205 /* start_decompress done, read_scanlines OK */ -#define DSTATE_RAW_OK 206 /* start_decompress done, read_raw_data OK */ -#define DSTATE_BUFIMAGE 207 /* expecting jpeg_start_output */ -#define DSTATE_BUFPOST 208 /* looking for SOS/EOI in jpeg_finish_output */ -#define DSTATE_RDCOEFS 209 /* reading file in jpeg_read_coefficients */ -#define DSTATE_STOPPING 210 /* looking for EOI in jpeg_finish_decompress */ +#define CSTATE_START 100 /* after create_compress */ +#define CSTATE_SCANNING 101 /* start_compress done, write_scanlines OK */ +#define CSTATE_RAW_OK 102 /* start_compress done, write_raw_data OK */ +#define CSTATE_WRCOEFS 103 /* jpeg_write_coefficients done */ +#define DSTATE_START 200 /* after create_decompress */ +#define DSTATE_INHEADER 201 /* reading header markers, no SOS yet */ +#define DSTATE_READY 202 /* found SOS, ready for start_decompress */ +#define DSTATE_PRELOAD 203 /* reading multiscan file in start_decompress*/ +#define DSTATE_PRESCAN 204 /* performing dummy pass for 2-pass quant */ +#define DSTATE_SCANNING 205 /* start_decompress done, read_scanlines OK */ +#define DSTATE_RAW_OK 206 /* start_decompress done, read_raw_data OK */ +#define DSTATE_BUFIMAGE 207 /* expecting jpeg_start_output */ +#define DSTATE_BUFPOST 208 /* looking for SOS/EOI in jpeg_finish_output */ +#define DSTATE_RDCOEFS 209 /* reading file in jpeg_read_coefficients */ +#define DSTATE_STOPPING 210 /* looking for EOI in jpeg_finish_decompress */ /* JLONG must hold at least signed 32-bit values. */ @@ -53,7 +53,7 @@ typedef long JLONG; * sanitizer warnings */ -#define LEFT_SHIFT(a, b) ((JLONG)((unsigned long)(a) << (b))) +#define LEFT_SHIFT(a, b) ((JLONG)((unsigned long)(a) << (b))) /* Declarations for compression modules */ @@ -274,9 +274,9 @@ struct jpeg_color_quantizer { /* Miscellaneous useful macros */ #undef MAX -#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) #undef MIN -#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) /* We assume that right shift corresponds to signed division by 2 with @@ -291,64 +291,64 @@ struct jpeg_color_quantizer { #ifdef RIGHT_SHIFT_IS_UNSIGNED #define SHIFT_TEMPS JLONG shift_temp; -#define RIGHT_SHIFT(x,shft) \ - ((shift_temp = (x)) < 0 ? \ - (shift_temp >> (shft)) | ((~((JLONG) 0)) << (32-(shft))) : \ - (shift_temp >> (shft))) +#define RIGHT_SHIFT(x, shft) \ + ((shift_temp = (x)) < 0 ? \ + (shift_temp >> (shft)) | ((~((JLONG)0)) << (32 - (shft))) : \ + (shift_temp >> (shft))) #else #define SHIFT_TEMPS -#define RIGHT_SHIFT(x,shft) ((x) >> (shft)) +#define RIGHT_SHIFT(x, shft) ((x) >> (shft)) #endif /* Compression module initialization routines */ -EXTERN(void) jinit_compress_master (j_compress_ptr cinfo); -EXTERN(void) jinit_c_master_control (j_compress_ptr cinfo, - boolean transcode_only); -EXTERN(void) jinit_c_main_controller (j_compress_ptr cinfo, - boolean need_full_buffer); -EXTERN(void) jinit_c_prep_controller (j_compress_ptr cinfo, - boolean need_full_buffer); -EXTERN(void) jinit_c_coef_controller (j_compress_ptr cinfo, - boolean need_full_buffer); -EXTERN(void) jinit_color_converter (j_compress_ptr cinfo); -EXTERN(void) jinit_downsampler (j_compress_ptr cinfo); -EXTERN(void) jinit_forward_dct (j_compress_ptr cinfo); -EXTERN(void) jinit_huff_encoder (j_compress_ptr cinfo); -EXTERN(void) jinit_phuff_encoder (j_compress_ptr cinfo); -EXTERN(void) jinit_arith_encoder (j_compress_ptr cinfo); -EXTERN(void) jinit_marker_writer (j_compress_ptr cinfo); +EXTERN(void) jinit_compress_master(j_compress_ptr cinfo); +EXTERN(void) jinit_c_master_control(j_compress_ptr cinfo, + boolean transcode_only); +EXTERN(void) jinit_c_main_controller(j_compress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_c_prep_controller(j_compress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_c_coef_controller(j_compress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_color_converter(j_compress_ptr cinfo); +EXTERN(void) jinit_downsampler(j_compress_ptr cinfo); +EXTERN(void) jinit_forward_dct(j_compress_ptr cinfo); +EXTERN(void) jinit_huff_encoder(j_compress_ptr cinfo); +EXTERN(void) jinit_phuff_encoder(j_compress_ptr cinfo); +EXTERN(void) jinit_arith_encoder(j_compress_ptr cinfo); +EXTERN(void) jinit_marker_writer(j_compress_ptr cinfo); /* Decompression module initialization routines */ -EXTERN(void) jinit_master_decompress (j_decompress_ptr cinfo); -EXTERN(void) jinit_d_main_controller (j_decompress_ptr cinfo, - boolean need_full_buffer); -EXTERN(void) jinit_d_coef_controller (j_decompress_ptr cinfo, - boolean need_full_buffer); -EXTERN(void) jinit_d_post_controller (j_decompress_ptr cinfo, - boolean need_full_buffer); -EXTERN(void) jinit_input_controller (j_decompress_ptr cinfo); -EXTERN(void) jinit_marker_reader (j_decompress_ptr cinfo); -EXTERN(void) jinit_huff_decoder (j_decompress_ptr cinfo); -EXTERN(void) jinit_phuff_decoder (j_decompress_ptr cinfo); -EXTERN(void) jinit_arith_decoder (j_decompress_ptr cinfo); -EXTERN(void) jinit_inverse_dct (j_decompress_ptr cinfo); -EXTERN(void) jinit_upsampler (j_decompress_ptr cinfo); -EXTERN(void) jinit_color_deconverter (j_decompress_ptr cinfo); -EXTERN(void) jinit_1pass_quantizer (j_decompress_ptr cinfo); -EXTERN(void) jinit_2pass_quantizer (j_decompress_ptr cinfo); -EXTERN(void) jinit_merged_upsampler (j_decompress_ptr cinfo); +EXTERN(void) jinit_master_decompress(j_decompress_ptr cinfo); +EXTERN(void) jinit_d_main_controller(j_decompress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_d_coef_controller(j_decompress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_d_post_controller(j_decompress_ptr cinfo, + boolean need_full_buffer); +EXTERN(void) jinit_input_controller(j_decompress_ptr cinfo); +EXTERN(void) jinit_marker_reader(j_decompress_ptr cinfo); +EXTERN(void) jinit_huff_decoder(j_decompress_ptr cinfo); +EXTERN(void) jinit_phuff_decoder(j_decompress_ptr cinfo); +EXTERN(void) jinit_arith_decoder(j_decompress_ptr cinfo); +EXTERN(void) jinit_inverse_dct(j_decompress_ptr cinfo); +EXTERN(void) jinit_upsampler(j_decompress_ptr cinfo); +EXTERN(void) jinit_color_deconverter(j_decompress_ptr cinfo); +EXTERN(void) jinit_1pass_quantizer(j_decompress_ptr cinfo); +EXTERN(void) jinit_2pass_quantizer(j_decompress_ptr cinfo); +EXTERN(void) jinit_merged_upsampler(j_decompress_ptr cinfo); /* Memory manager initialization */ -EXTERN(void) jinit_memory_mgr (j_common_ptr cinfo); +EXTERN(void) jinit_memory_mgr(j_common_ptr cinfo); /* Utility routines in jutils.c */ -EXTERN(long) jdiv_round_up (long a, long b); -EXTERN(long) jround_up (long a, long b); -EXTERN(void) jcopy_sample_rows (JSAMPARRAY input_array, int source_row, - JSAMPARRAY output_array, int dest_row, - int num_rows, JDIMENSION num_cols); -EXTERN(void) jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row, - JDIMENSION num_blocks); -EXTERN(void) jzero_far (void *target, size_t bytestozero); +EXTERN(long) jdiv_round_up(long a, long b); +EXTERN(long) jround_up(long a, long b); +EXTERN(void) jcopy_sample_rows(JSAMPARRAY input_array, int source_row, + JSAMPARRAY output_array, int dest_row, + int num_rows, JDIMENSION num_cols); +EXTERN(void) jcopy_block_row(JBLOCKROW input_row, JBLOCKROW output_row, + JDIMENSION num_blocks); +EXTERN(void) jzero_far(void *target, size_t bytestozero); /* Constant tables in jutils.c */ #if 0 /* This table is not actually needed in v6a */ extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */ diff --git a/jpeglib.h b/jpeglib.h index 6c63f58..33f8ad2 100644 --- a/jpeglib.h +++ b/jpeglib.h @@ -5,7 +5,7 @@ * Copyright (C) 1991-1998, Thomas G. Lane. * Modified 2002-2009 by Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2009-2011, 2013-2014, 2016, D. R. Commander. + * Copyright (C) 2009-2011, 2013-2014, 2016-2017, D. R. Commander. * Copyright (C) 2015, Google, Inc. * For conditions of distribution and use, see the accompanying README.ijg * file. @@ -211,8 +211,8 @@ struct jpeg_marker_struct { /* Known color spaces. */ -#define JCS_EXTENSIONS 1 -#define JCS_ALPHA_EXTENSIONS 1 +#define JCS_EXTENSIONS 1 +#define JCS_ALPHA_EXTENSIONS 1 typedef enum { JCS_UNKNOWN, /* error/unspecified */ @@ -268,11 +268,11 @@ typedef enum { /* Common fields between JPEG compression and decompression master structs. */ #define jpeg_common_fields \ - struct jpeg_error_mgr *err; /* Error handler module */\ - struct jpeg_memory_mgr *mem; /* Memory manager module */\ - struct jpeg_progress_mgr *progress; /* Progress monitor, or NULL if none */\ - void *client_data; /* Available for use by application */\ - boolean is_decompressor; /* So common code can tell which is which */\ + struct jpeg_error_mgr *err; /* Error handler module */ \ + struct jpeg_memory_mgr *mem; /* Memory manager module */ \ + struct jpeg_progress_mgr *progress; /* Progress monitor, or NULL if none */ \ + void *client_data; /* Available for use by application */ \ + boolean is_decompressor; /* So common code can tell which is which */ \ int global_state /* For checking call sequence validity */ /* Routines that are to be used by both halves of the library are declared @@ -822,9 +822,9 @@ struct jpeg_source_mgr { * successful. */ -#define JPOOL_PERMANENT 0 /* lasts until master record is destroyed */ -#define JPOOL_IMAGE 1 /* lasts until done with image/datastream */ -#define JPOOL_NUMPOOLS 2 +#define JPOOL_PERMANENT 0 /* lasts until master record is destroyed */ +#define JPOOL_IMAGE 1 /* lasts until done with image/datastream */ +#define JPOOL_NUMPOOLS 2 typedef struct jvirt_sarray_control *jvirt_sarray_ptr; typedef struct jvirt_barray_control *jvirt_barray_ptr; @@ -888,7 +888,7 @@ typedef boolean (*jpeg_marker_parser_method) (j_decompress_ptr cinfo); /* Default error-management setup */ -EXTERN(struct jpeg_error_mgr *) jpeg_std_error (struct jpeg_error_mgr *err); +EXTERN(struct jpeg_error_mgr *) jpeg_std_error(struct jpeg_error_mgr *err); /* Initialization of JPEG compression objects. * jpeg_create_compress() and jpeg_create_decompress() are the exported @@ -898,90 +898,95 @@ EXTERN(struct jpeg_error_mgr *) jpeg_std_error (struct jpeg_error_mgr *err); * NB: you must set up the error-manager BEFORE calling jpeg_create_xxx. */ #define jpeg_create_compress(cinfo) \ - jpeg_CreateCompress((cinfo), JPEG_LIB_VERSION, \ - (size_t) sizeof(struct jpeg_compress_struct)) + jpeg_CreateCompress((cinfo), JPEG_LIB_VERSION, \ + (size_t)sizeof(struct jpeg_compress_struct)) #define jpeg_create_decompress(cinfo) \ - jpeg_CreateDecompress((cinfo), JPEG_LIB_VERSION, \ - (size_t) sizeof(struct jpeg_decompress_struct)) -EXTERN(void) jpeg_CreateCompress (j_compress_ptr cinfo, int version, - size_t structsize); -EXTERN(void) jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, - size_t structsize); + jpeg_CreateDecompress((cinfo), JPEG_LIB_VERSION, \ + (size_t)sizeof(struct jpeg_decompress_struct)) +EXTERN(void) jpeg_CreateCompress(j_compress_ptr cinfo, int version, + size_t structsize); +EXTERN(void) jpeg_CreateDecompress(j_decompress_ptr cinfo, int version, + size_t structsize); /* Destruction of JPEG compression objects */ -EXTERN(void) jpeg_destroy_compress (j_compress_ptr cinfo); -EXTERN(void) jpeg_destroy_decompress (j_decompress_ptr cinfo); +EXTERN(void) jpeg_destroy_compress(j_compress_ptr cinfo); +EXTERN(void) jpeg_destroy_decompress(j_decompress_ptr cinfo); /* Standard data source and destination managers: stdio streams. */ /* Caller is responsible for opening the file before and closing after. */ -EXTERN(void) jpeg_stdio_dest (j_compress_ptr cinfo, FILE *outfile); -EXTERN(void) jpeg_stdio_src (j_decompress_ptr cinfo, FILE *infile); +EXTERN(void) jpeg_stdio_dest(j_compress_ptr cinfo, FILE *outfile); +EXTERN(void) jpeg_stdio_src(j_decompress_ptr cinfo, FILE *infile); #if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) /* Data source and destination managers: memory buffers. */ -EXTERN(void) jpeg_mem_dest (j_compress_ptr cinfo, unsigned char **outbuffer, - unsigned long *outsize); -EXTERN(void) jpeg_mem_src (j_decompress_ptr cinfo, - const unsigned char *inbuffer, - unsigned long insize); +EXTERN(void) jpeg_mem_dest(j_compress_ptr cinfo, unsigned char **outbuffer, + unsigned long *outsize); +EXTERN(void) jpeg_mem_src(j_decompress_ptr cinfo, + const unsigned char *inbuffer, unsigned long insize); #endif /* Default parameter setup for compression */ -EXTERN(void) jpeg_set_defaults (j_compress_ptr cinfo); +EXTERN(void) jpeg_set_defaults(j_compress_ptr cinfo); /* Compression parameter setup aids */ -EXTERN(void) jpeg_set_colorspace (j_compress_ptr cinfo, - J_COLOR_SPACE colorspace); -EXTERN(void) jpeg_default_colorspace (j_compress_ptr cinfo); -EXTERN(void) jpeg_set_quality (j_compress_ptr cinfo, int quality, - boolean force_baseline); -EXTERN(void) jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor, - boolean force_baseline); +EXTERN(void) jpeg_set_colorspace(j_compress_ptr cinfo, + J_COLOR_SPACE colorspace); +EXTERN(void) jpeg_default_colorspace(j_compress_ptr cinfo); +EXTERN(void) jpeg_set_quality(j_compress_ptr cinfo, int quality, + boolean force_baseline); +EXTERN(void) jpeg_set_linear_quality(j_compress_ptr cinfo, int scale_factor, + boolean force_baseline); #if JPEG_LIB_VERSION >= 70 -EXTERN(void) jpeg_default_qtables (j_compress_ptr cinfo, - boolean force_baseline); +EXTERN(void) jpeg_default_qtables(j_compress_ptr cinfo, + boolean force_baseline); #endif -EXTERN(void) jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl, - const unsigned int *basic_table, - int scale_factor, boolean force_baseline); -EXTERN(int) jpeg_quality_scaling (int quality); -EXTERN(void) jpeg_simple_progression (j_compress_ptr cinfo); -EXTERN(void) jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress); -EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table (j_common_ptr cinfo); -EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table (j_common_ptr cinfo); +EXTERN(void) jpeg_add_quant_table(j_compress_ptr cinfo, int which_tbl, + const unsigned int *basic_table, + int scale_factor, boolean force_baseline); +EXTERN(int) jpeg_quality_scaling(int quality); +EXTERN(void) jpeg_simple_progression(j_compress_ptr cinfo); +EXTERN(void) jpeg_suppress_tables(j_compress_ptr cinfo, boolean suppress); +EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table(j_common_ptr cinfo); +EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table(j_common_ptr cinfo); /* Main entry points for compression */ -EXTERN(void) jpeg_start_compress (j_compress_ptr cinfo, - boolean write_all_tables); -EXTERN(JDIMENSION) jpeg_write_scanlines (j_compress_ptr cinfo, - JSAMPARRAY scanlines, - JDIMENSION num_lines); -EXTERN(void) jpeg_finish_compress (j_compress_ptr cinfo); +EXTERN(void) jpeg_start_compress(j_compress_ptr cinfo, + boolean write_all_tables); +EXTERN(JDIMENSION) jpeg_write_scanlines(j_compress_ptr cinfo, + JSAMPARRAY scanlines, + JDIMENSION num_lines); +EXTERN(void) jpeg_finish_compress(j_compress_ptr cinfo); #if JPEG_LIB_VERSION >= 70 /* Precalculate JPEG dimensions for current compression parameters. */ -EXTERN(void) jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo); +EXTERN(void) jpeg_calc_jpeg_dimensions(j_compress_ptr cinfo); #endif /* Replaces jpeg_write_scanlines when writing raw downsampled data. */ -EXTERN(JDIMENSION) jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data, - JDIMENSION num_lines); +EXTERN(JDIMENSION) jpeg_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION num_lines); /* Write a special marker. See libjpeg.txt concerning safe usage. */ -EXTERN(void) jpeg_write_marker (j_compress_ptr cinfo, int marker, - const JOCTET *dataptr, unsigned int datalen); +EXTERN(void) jpeg_write_marker(j_compress_ptr cinfo, int marker, + const JOCTET *dataptr, unsigned int datalen); /* Same, but piecemeal. */ -EXTERN(void) jpeg_write_m_header (j_compress_ptr cinfo, int marker, - unsigned int datalen); -EXTERN(void) jpeg_write_m_byte (j_compress_ptr cinfo, int val); +EXTERN(void) jpeg_write_m_header(j_compress_ptr cinfo, int marker, + unsigned int datalen); +EXTERN(void) jpeg_write_m_byte(j_compress_ptr cinfo, int val); /* Alternate compression function: just write an abbreviated table file */ -EXTERN(void) jpeg_write_tables (j_compress_ptr cinfo); +EXTERN(void) jpeg_write_tables(j_compress_ptr cinfo); + +/* Write ICC profile. See libjpeg.txt for usage information. */ +EXTERN(void) jpeg_write_icc_profile(j_compress_ptr cinfo, + const JOCTET *icc_data_ptr, + unsigned int icc_data_len); + /* Decompression startup: read start of JPEG datastream to see what's there */ -EXTERN(int) jpeg_read_header (j_decompress_ptr cinfo, boolean require_image); +EXTERN(int) jpeg_read_header(j_decompress_ptr cinfo, boolean require_image); /* Return value is one of: */ -#define JPEG_SUSPENDED 0 /* Suspended due to lack of input data */ -#define JPEG_HEADER_OK 1 /* Found valid image datastream */ -#define JPEG_HEADER_TABLES_ONLY 2 /* Found valid table-specs-only datastream */ +#define JPEG_SUSPENDED 0 /* Suspended due to lack of input data */ +#define JPEG_HEADER_OK 1 /* Found valid image datastream */ +#define JPEG_HEADER_TABLES_ONLY 2 /* Found valid table-specs-only datastream */ /* If you pass require_image = TRUE (normal case), you need not check for * a TABLES_ONLY return code; an abbreviated file will cause an error exit. * JPEG_SUSPENDED is only possible if you use a data source module that can @@ -989,27 +994,27 @@ EXTERN(int) jpeg_read_header (j_decompress_ptr cinfo, boolean require_image); */ /* Main entry points for decompression */ -EXTERN(boolean) jpeg_start_decompress (j_decompress_ptr cinfo); -EXTERN(JDIMENSION) jpeg_read_scanlines (j_decompress_ptr cinfo, - JSAMPARRAY scanlines, - JDIMENSION max_lines); -EXTERN(JDIMENSION) jpeg_skip_scanlines (j_decompress_ptr cinfo, - JDIMENSION num_lines); -EXTERN(void) jpeg_crop_scanline (j_decompress_ptr cinfo, JDIMENSION *xoffset, - JDIMENSION *width); -EXTERN(boolean) jpeg_finish_decompress (j_decompress_ptr cinfo); +EXTERN(boolean) jpeg_start_decompress(j_decompress_ptr cinfo); +EXTERN(JDIMENSION) jpeg_read_scanlines(j_decompress_ptr cinfo, + JSAMPARRAY scanlines, + JDIMENSION max_lines); +EXTERN(JDIMENSION) jpeg_skip_scanlines(j_decompress_ptr cinfo, + JDIMENSION num_lines); +EXTERN(void) jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset, + JDIMENSION *width); +EXTERN(boolean) jpeg_finish_decompress(j_decompress_ptr cinfo); /* Replaces jpeg_read_scanlines when reading raw downsampled data. */ -EXTERN(JDIMENSION) jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data, - JDIMENSION max_lines); +EXTERN(JDIMENSION) jpeg_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION max_lines); /* Additional entry points for buffered-image mode. */ -EXTERN(boolean) jpeg_has_multiple_scans (j_decompress_ptr cinfo); -EXTERN(boolean) jpeg_start_output (j_decompress_ptr cinfo, int scan_number); -EXTERN(boolean) jpeg_finish_output (j_decompress_ptr cinfo); -EXTERN(boolean) jpeg_input_complete (j_decompress_ptr cinfo); -EXTERN(void) jpeg_new_colormap (j_decompress_ptr cinfo); -EXTERN(int) jpeg_consume_input (j_decompress_ptr cinfo); +EXTERN(boolean) jpeg_has_multiple_scans(j_decompress_ptr cinfo); +EXTERN(boolean) jpeg_start_output(j_decompress_ptr cinfo, int scan_number); +EXTERN(boolean) jpeg_finish_output(j_decompress_ptr cinfo); +EXTERN(boolean) jpeg_input_complete(j_decompress_ptr cinfo); +EXTERN(void) jpeg_new_colormap(j_decompress_ptr cinfo); +EXTERN(int) jpeg_consume_input(j_decompress_ptr cinfo); /* Return value is one of: */ /* #define JPEG_SUSPENDED 0 Suspended due to lack of input data */ #define JPEG_REACHED_SOS 1 /* Reached start of new scan */ @@ -1019,25 +1024,25 @@ EXTERN(int) jpeg_consume_input (j_decompress_ptr cinfo); /* Precalculate output dimensions for current decompression parameters. */ #if JPEG_LIB_VERSION >= 80 -EXTERN(void) jpeg_core_output_dimensions (j_decompress_ptr cinfo); +EXTERN(void) jpeg_core_output_dimensions(j_decompress_ptr cinfo); #endif -EXTERN(void) jpeg_calc_output_dimensions (j_decompress_ptr cinfo); +EXTERN(void) jpeg_calc_output_dimensions(j_decompress_ptr cinfo); /* Control saving of COM and APPn markers into marker_list. */ -EXTERN(void) jpeg_save_markers (j_decompress_ptr cinfo, int marker_code, - unsigned int length_limit); +EXTERN(void) jpeg_save_markers(j_decompress_ptr cinfo, int marker_code, + unsigned int length_limit); /* Install a special processing method for COM or APPn markers. */ -EXTERN(void) jpeg_set_marker_processor (j_decompress_ptr cinfo, - int marker_code, - jpeg_marker_parser_method routine); +EXTERN(void) jpeg_set_marker_processor(j_decompress_ptr cinfo, + int marker_code, + jpeg_marker_parser_method routine); /* Read or write raw DCT coefficients --- useful for lossless transcoding. */ -EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients (j_decompress_ptr cinfo); -EXTERN(void) jpeg_write_coefficients (j_compress_ptr cinfo, - jvirt_barray_ptr *coef_arrays); -EXTERN(void) jpeg_copy_critical_parameters (j_decompress_ptr srcinfo, - j_compress_ptr dstinfo); +EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients(j_decompress_ptr cinfo); +EXTERN(void) jpeg_write_coefficients(j_compress_ptr cinfo, + jvirt_barray_ptr *coef_arrays); +EXTERN(void) jpeg_copy_critical_parameters(j_decompress_ptr srcinfo, + j_compress_ptr dstinfo); /* If you choose to abort compression or decompression before completing * jpeg_finish_(de)compress, then you need to clean up to release memory, @@ -1045,17 +1050,22 @@ EXTERN(void) jpeg_copy_critical_parameters (j_decompress_ptr srcinfo, * if you're done with the JPEG object, but if you want to clean it up and * reuse it, call this: */ -EXTERN(void) jpeg_abort_compress (j_compress_ptr cinfo); -EXTERN(void) jpeg_abort_decompress (j_decompress_ptr cinfo); +EXTERN(void) jpeg_abort_compress(j_compress_ptr cinfo); +EXTERN(void) jpeg_abort_decompress(j_decompress_ptr cinfo); /* Generic versions of jpeg_abort and jpeg_destroy that work on either * flavor of JPEG object. These may be more convenient in some places. */ -EXTERN(void) jpeg_abort (j_common_ptr cinfo); -EXTERN(void) jpeg_destroy (j_common_ptr cinfo); +EXTERN(void) jpeg_abort(j_common_ptr cinfo); +EXTERN(void) jpeg_destroy(j_common_ptr cinfo); /* Default restart-marker-resync procedure for use by data source modules */ -EXTERN(boolean) jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired); +EXTERN(boolean) jpeg_resync_to_restart(j_decompress_ptr cinfo, int desired); + +/* Read ICC profile. See libjpeg.txt for usage information. */ +EXTERN(boolean) jpeg_read_icc_profile(j_decompress_ptr cinfo, + JOCTET **icc_data_ptr, + unsigned int *icc_data_len); /* These marker codes are exported since applications and data source modules diff --git a/jpegtran.1 b/jpegtran.1 index 631455b..2efb264 100644 --- a/jpegtran.1 +++ b/jpegtran.1 @@ -217,6 +217,11 @@ v6a, \fBjpegtran\fR always did the equivalent of \fB-copy none\fR.) .PP Additional switches recognized by jpegtran are: .TP +.BI \-icc " file" +Embed ICC color management profile contained in the specified file. Note that +this will cause \fBjpegtran\fR to ignore any APP2 markers in the input file, +even if \fB-copy all\fR is specified. +.TP .BI \-maxmemory " N" Set limit for amount of memory to use in processing large images. Value is in thousands of bytes, or millions of bytes if "M" is attached to the diff --git a/jpegtran.c b/jpegtran.c index 6f8fd5b..058e844 100644 --- a/jpegtran.c +++ b/jpegtran.c @@ -40,13 +40,14 @@ static const char *progname; /* program name for error messages */ +static char *icc_filename; /* for -icc switch */ static char *outfilename; /* for -outfile switch */ static JCOPY_OPTION copyoption; /* -copy switch */ static jpeg_transform_info transformoption; /* image transformation options */ LOCAL(void) -usage (void) +usage(void) /* complain about bad command line */ { fprintf(stderr, "usage: %s [switches] ", progname); @@ -83,6 +84,7 @@ usage (void) #ifdef C_ARITH_CODING_SUPPORTED fprintf(stderr, " -arithmetic Use arithmetic coding\n"); #endif + fprintf(stderr, " -icc FILE Embed ICC profile contained in FILE\n"); fprintf(stderr, " -restart N Set restart interval in rows, or in blocks with B\n"); fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n"); fprintf(stderr, " -outfile name Specify name for output file\n"); @@ -97,7 +99,7 @@ usage (void) LOCAL(void) -select_transform (JXFORM_CODE transform) +select_transform(JXFORM_CODE transform) /* Silly little routine to detect multiple transform options, * which we can't handle. */ @@ -120,8 +122,8 @@ select_transform (JXFORM_CODE transform) LOCAL(int) -parse_switches (j_compress_ptr cinfo, int argc, char **argv, - int last_file_arg_seen, boolean for_real) +parse_switches(j_compress_ptr cinfo, int argc, char **argv, + int last_file_arg_seen, boolean for_real) /* Parse optional switches. * Returns argv[] index of first file-name argument (== argc if none). * Any file names with indexes <= last_file_arg_seen are ignored; @@ -138,6 +140,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, /* Set up default JPEG parameters. */ simple_progressive = FALSE; + icc_filename = NULL; outfilename = NULL; copyoption = JCOPYOPT_DEFAULT; transformoption.transform = JXFORM_NONE; @@ -190,7 +193,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, #if TRANSFORMS_SUPPORTED if (++argn >= argc) /* advance to next argument */ usage(); - if (! jtransform_parse_crop_spec(&transformoption, argv[argn])) { + if (!jtransform_parse_crop_spec(&transformoption, argv[argn])) { fprintf(stderr, "%s: bogus -crop argument '%s'\n", progname, argv[argn]); exit(EXIT_FAILURE); @@ -204,7 +207,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, /* On first -d, print version identification */ static boolean printed_version = FALSE; - if (! printed_version) { + if (!printed_version) { fprintf(stderr, "%s version %s (build %s)\n", PACKAGE_NAME, VERSION, BUILD); fprintf(stderr, "%s\n\n", JCOPYRIGHT); @@ -230,7 +233,8 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, else usage(); - } else if (keymatch(arg, "grayscale", 1) || keymatch(arg, "greyscale",1)) { + } else if (keymatch(arg, "grayscale", 1) || + keymatch(arg, "greyscale", 1)) { /* Force to grayscale. */ #if TRANSFORMS_SUPPORTED transformoption.force_grayscale = TRUE; @@ -238,6 +242,12 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, select_transform(JXFORM_NONE); /* force an error */ #endif + } else if (keymatch(arg, "icc", 1)) { + /* Set ICC filename. */ + if (++argn >= argc) /* advance to next argument */ + usage(); + icc_filename = argv[argn]; + } else if (keymatch(arg, "maxmemory", 3)) { /* Maximum memory in Kb (or Mb with 'm'). */ long lval; @@ -295,10 +305,10 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, if (lval < 0 || lval > 65535L) usage(); if (ch == 'b' || ch == 'B') { - cinfo->restart_interval = (unsigned int) lval; + cinfo->restart_interval = (unsigned int)lval; cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */ } else { - cinfo->restart_in_rows = (int) lval; + cinfo->restart_in_rows = (int)lval; /* restart_interval will be computed during startup */ } @@ -356,7 +366,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, #ifdef C_MULTISCAN_FILES_SUPPORTED if (scansarg != NULL) /* process -scans if it was present */ - if (! read_scan_script(cinfo, scansarg)) + if (!read_scan_script(cinfo, scansarg)) usage(); #endif } @@ -370,7 +380,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv, */ int -main (int argc, char **argv) +main(int argc, char **argv) { struct jpeg_decompress_struct srcinfo; struct jpeg_compress_struct dstinfo; @@ -385,6 +395,9 @@ main (int argc, char **argv) * single file pointer for sequential input and output operation. */ FILE *fp; + FILE *icc_file; + JOCTET *icc_profile = NULL; + long icc_len = 0; /* On Mac, fetch a command line. */ #ifdef USE_CCOMMAND @@ -417,14 +430,14 @@ main (int argc, char **argv) #ifdef TWO_FILE_COMMANDLINE /* Must have either -outfile switch or explicit output file name */ if (outfilename == NULL) { - if (file_index != argc-2) { + if (file_index != argc - 2) { fprintf(stderr, "%s: must name one input and one output file\n", progname); usage(); } - outfilename = argv[file_index+1]; + outfilename = argv[file_index + 1]; } else { - if (file_index != argc-1) { + if (file_index != argc - 1) { fprintf(stderr, "%s: must name one input and one output file\n", progname); usage(); @@ -432,7 +445,7 @@ main (int argc, char **argv) } #else /* Unix style: expect zero or one file name */ - if (file_index < argc-1) { + if (file_index < argc - 1) { fprintf(stderr, "%s: only one input file\n", progname); usage(); } @@ -441,7 +454,8 @@ main (int argc, char **argv) /* Open the input file. */ if (file_index < argc) { if ((fp = fopen(argv[file_index], READ_BINARY)) == NULL) { - fprintf(stderr, "%s: can't open %s for reading\n", progname, argv[file_index]); + fprintf(stderr, "%s: can't open %s for reading\n", progname, + argv[file_index]); exit(EXIT_FAILURE); } } else { @@ -449,8 +463,37 @@ main (int argc, char **argv) fp = read_stdin(); } + if (icc_filename != NULL) { + if ((icc_file = fopen(icc_filename, READ_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, icc_filename); + exit(EXIT_FAILURE); + } + if (fseek(icc_file, 0, SEEK_END) < 0 || + (icc_len = ftell(icc_file)) < 1 || + fseek(icc_file, 0, SEEK_SET) < 0) { + fprintf(stderr, "%s: can't determine size of %s\n", progname, + icc_filename); + exit(EXIT_FAILURE); + } + if ((icc_profile = (JOCTET *)malloc(icc_len)) == NULL) { + fprintf(stderr, "%s: can't allocate memory for ICC profile\n", progname); + fclose(icc_file); + exit(EXIT_FAILURE); + } + if (fread(icc_profile, icc_len, 1, icc_file) < 1) { + fprintf(stderr, "%s: can't read ICC profile from %s\n", progname, + icc_filename); + free(icc_profile); + fclose(icc_file); + exit(EXIT_FAILURE); + } + fclose(icc_file); + if (copyoption == JCOPYOPT_ALL) + copyoption = JCOPYOPT_ALL_EXCEPT_ICC; + } + #ifdef PROGRESS_REPORT - start_progress_monitor((j_common_ptr) &dstinfo, &progress); + start_progress_monitor((j_common_ptr)&dstinfo, &progress); #endif /* Specify data source for decompression */ @@ -460,7 +503,7 @@ main (int argc, char **argv) jcopy_markers_setup(&srcinfo, copyoption); /* Read file header */ - (void) jpeg_read_header(&srcinfo, TRUE); + (void)jpeg_read_header(&srcinfo, TRUE); /* Any space needed by a transform option must be requested before * jpeg_read_coefficients so that memory allocation will be done right. @@ -494,7 +537,7 @@ main (int argc, char **argv) /* Close input file, if we opened it. * Note: we assume that jpeg_read_coefficients consumed all input * until JPEG_REACHED_EOI, and that jpeg_finish_decompress will - * only consume more while (! cinfo->inputctl->eoi_reached). + * only consume more while (!cinfo->inputctl->eoi_reached). * We cannot call jpeg_finish_decompress here since we still need the * virtual arrays allocated from the source object for processing. */ @@ -504,7 +547,8 @@ main (int argc, char **argv) /* Open the output file. */ if (outfilename != NULL) { if ((fp = fopen(outfilename, WRITE_BINARY)) == NULL) { - fprintf(stderr, "%s: can't open %s for writing\n", progname, outfilename); + fprintf(stderr, "%s: can't open %s for writing\n", progname, + outfilename); exit(EXIT_FAILURE); } } else { @@ -524,17 +568,19 @@ main (int argc, char **argv) /* Copy to the output file any extra markers that we want to preserve */ jcopy_markers_execute(&srcinfo, &dstinfo, copyoption); + if (icc_profile != NULL) + jpeg_write_icc_profile(&dstinfo, icc_profile, (unsigned int)icc_len); + /* Execute image transformation, if any */ #if TRANSFORMS_SUPPORTED - jtransform_execute_transformation(&srcinfo, &dstinfo, - src_coef_arrays, + jtransform_execute_transformation(&srcinfo, &dstinfo, src_coef_arrays, &transformoption); #endif /* Finish compression and release memory */ jpeg_finish_compress(&dstinfo); jpeg_destroy_compress(&dstinfo); - (void) jpeg_finish_decompress(&srcinfo); + (void)jpeg_finish_decompress(&srcinfo); jpeg_destroy_decompress(&srcinfo); /* Close output file, if we opened it */ @@ -542,10 +588,14 @@ main (int argc, char **argv) fclose(fp); #ifdef PROGRESS_REPORT - end_progress_monitor((j_common_ptr) &dstinfo); + end_progress_monitor((j_common_ptr)&dstinfo); #endif + if (icc_profile != NULL) + free(icc_profile); + /* All done. */ - exit(jsrcerr.num_warnings + jdsterr.num_warnings ?EXIT_WARNING:EXIT_SUCCESS); + exit(jsrcerr.num_warnings + jdsterr.num_warnings ? + EXIT_WARNING : EXIT_SUCCESS); return 0; /* suppress no-return-value warnings */ } diff --git a/jquant1.c b/jquant1.c index e781481..40bbb28 100644 --- a/jquant1.c +++ b/jquant1.c @@ -73,8 +73,9 @@ #define ODITHER_SIZE 16 /* dimension of dither matrix */ /* NB: if ODITHER_SIZE is not a power of 2, ODITHER_MASK uses will break */ -#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE) /* # cells in matrix */ -#define ODITHER_MASK (ODITHER_SIZE-1) /* mask for wrapping around counters */ +#define ODITHER_CELLS (ODITHER_SIZE * ODITHER_SIZE) /* # cells in matrix */ +#define ODITHER_MASK (ODITHER_SIZE - 1) /* mask for wrapping around + counters */ typedef int ODITHER_MATRIX[ODITHER_SIZE][ODITHER_SIZE]; typedef int (*ODITHER_MATRIX_PTR)[ODITHER_SIZE]; @@ -132,12 +133,12 @@ typedef JLONG FSERROR; /* may need more than 16 bits */ typedef JLONG LOCFSERROR; /* be sure calculation temps are big enough */ #endif -typedef FSERROR *FSERRPTR; /* pointer to error array */ +typedef FSERROR *FSERRPTR; /* pointer to error array */ /* Private subobject */ -#define MAX_Q_COMPS 4 /* max components I can handle */ +#define MAX_Q_COMPS 4 /* max components I can handle */ typedef struct { struct jpeg_color_quantizer pub; /* public fields */ @@ -153,7 +154,7 @@ typedef struct { */ boolean is_padded; /* is the colorindex padded for odither? */ - int Ncolors[MAX_Q_COMPS]; /* # of values alloced to each component */ + int Ncolors[MAX_Q_COMPS]; /* # of values allocated to each component */ /* Variables for ordered dithering */ int row_index; /* cur row's vertical index in dither matrix */ @@ -183,7 +184,7 @@ typedef my_cquantizer *my_cquantize_ptr; LOCAL(int) -select_ncolors (j_decompress_ptr cinfo, int Ncolors[]) +select_ncolors(j_decompress_ptr cinfo, int Ncolors[]) /* Determine allocation of desired colors to components, */ /* and fill in Ncolors[] array to indicate choice. */ /* Return value is total number of colors (product of Ncolors[] values). */ @@ -206,12 +207,12 @@ select_ncolors (j_decompress_ptr cinfo, int Ncolors[]) temp = iroot; /* set temp = iroot ** nc */ for (i = 1; i < nc; i++) temp *= iroot; - } while (temp <= (long) max_colors); /* repeat till iroot exceeds root */ + } while (temp <= (long)max_colors); /* repeat till iroot exceeds root */ iroot--; /* now iroot = floor(root) */ /* Must have at least 2 color values per component */ if (iroot < 2) - ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, (int) temp); + ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, (int)temp); /* Initialize to iroot color values for each component */ total_colors = 1; @@ -231,11 +232,11 @@ select_ncolors (j_decompress_ptr cinfo, int Ncolors[]) j = (cinfo->out_color_space == JCS_RGB ? RGB_order[i] : i); /* calculate new total_colors if Ncolors[j] is incremented */ temp = total_colors / Ncolors[j]; - temp *= Ncolors[j]+1; /* done in long arith to avoid oflo */ - if (temp > (long) max_colors) + temp *= Ncolors[j] + 1; /* done in long arith to avoid oflo */ + if (temp > (long)max_colors) break; /* won't fit, done with this pass */ Ncolors[j]++; /* OK, apply the increment */ - total_colors = (int) temp; + total_colors = (int)temp; changed = TRUE; } } while (changed); @@ -245,7 +246,7 @@ select_ncolors (j_decompress_ptr cinfo, int Ncolors[]) LOCAL(int) -output_value (j_decompress_ptr cinfo, int ci, int j, int maxj) +output_value(j_decompress_ptr cinfo, int ci, int j, int maxj) /* Return j'th output value, where j will range from 0 to maxj */ /* The output values must fall in 0..MAXJSAMPLE in increasing order */ { @@ -254,17 +255,17 @@ output_value (j_decompress_ptr cinfo, int ci, int j, int maxj) * (Forcing the upper and lower values to the limits ensures that * dithering can't produce a color outside the selected gamut.) */ - return (int) (((JLONG) j * MAXJSAMPLE + maxj/2) / maxj); + return (int)(((JLONG)j * MAXJSAMPLE + maxj / 2) / maxj); } LOCAL(int) -largest_input_value (j_decompress_ptr cinfo, int ci, int j, int maxj) +largest_input_value(j_decompress_ptr cinfo, int ci, int j, int maxj) /* Return largest input value that should map to j'th output value */ /* Must have largest(j=0) >= 0, and largest(j=maxj) >= MAXJSAMPLE */ { /* Breakpoints are halfway between values returned by output_value */ - return (int) (((JLONG) (2*j + 1) * MAXJSAMPLE + maxj) / (2*maxj)); + return (int)(((JLONG)(2 * j + 1) * MAXJSAMPLE + maxj) / (2 * maxj)); } @@ -273,21 +274,21 @@ largest_input_value (j_decompress_ptr cinfo, int ci, int j, int maxj) */ LOCAL(void) -create_colormap (j_decompress_ptr cinfo) +create_colormap(j_decompress_ptr cinfo) { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; JSAMPARRAY colormap; /* Created colormap */ int total_colors; /* Number of distinct output colors */ - int i,j,k, nci, blksize, blkdist, ptr, val; + int i, j, k, nci, blksize, blkdist, ptr, val; /* Select number of colors for each component */ total_colors = select_ncolors(cinfo, cquantize->Ncolors); /* Report selected color counts */ if (cinfo->out_color_components == 3) - TRACEMS4(cinfo, 1, JTRC_QUANT_3_NCOLORS, - total_colors, cquantize->Ncolors[0], - cquantize->Ncolors[1], cquantize->Ncolors[2]); + TRACEMS4(cinfo, 1, JTRC_QUANT_3_NCOLORS, total_colors, + cquantize->Ncolors[0], cquantize->Ncolors[1], + cquantize->Ncolors[2]); else TRACEMS1(cinfo, 1, JTRC_QUANT_NCOLORS, total_colors); @@ -296,8 +297,8 @@ create_colormap (j_decompress_ptr cinfo) /* i.e. rightmost (highest-indexed) color changes most rapidly. */ colormap = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) total_colors, (JDIMENSION) cinfo->out_color_components); + ((j_common_ptr)cinfo, JPOOL_IMAGE, + (JDIMENSION)total_colors, (JDIMENSION)cinfo->out_color_components); /* blksize is number of adjacent repeated entries for a component */ /* blkdist is distance between groups of identical entries for a component */ @@ -309,12 +310,12 @@ create_colormap (j_decompress_ptr cinfo) blksize = blkdist / nci; for (j = 0; j < nci; j++) { /* Compute j'th output value (out of nci) for component */ - val = output_value(cinfo, i, j, nci-1); + val = output_value(cinfo, i, j, nci - 1); /* Fill in all colormap entries that have this value of this component */ for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) { /* fill in blksize entries beginning at ptr */ for (k = 0; k < blksize; k++) - colormap[i][ptr+k] = (JSAMPLE) val; + colormap[i][ptr + k] = (JSAMPLE)val; } } blkdist = blksize; /* blksize of this color is blkdist of next */ @@ -333,11 +334,11 @@ create_colormap (j_decompress_ptr cinfo) */ LOCAL(void) -create_colorindex (j_decompress_ptr cinfo) +create_colorindex(j_decompress_ptr cinfo) { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; JSAMPROW indexptr; - int i,j,k, nci, blksize, val, pad; + int i, j, k, nci, blksize, val, pad; /* For ordered dither, we pad the color index tables by MAXJSAMPLE in * each direction (input index values can be -MAXJSAMPLE .. 2*MAXJSAMPLE). @@ -345,7 +346,7 @@ create_colorindex (j_decompress_ptr cinfo) * flag whether it was done in case user changes dithering mode. */ if (cinfo->dither_mode == JDITHER_ORDERED) { - pad = MAXJSAMPLE*2; + pad = MAXJSAMPLE * 2; cquantize->is_padded = TRUE; } else { pad = 0; @@ -353,9 +354,9 @@ create_colorindex (j_decompress_ptr cinfo) } cquantize->colorindex = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) (MAXJSAMPLE+1 + pad), - (JDIMENSION) cinfo->out_color_components); + ((j_common_ptr)cinfo, JPOOL_IMAGE, + (JDIMENSION)(MAXJSAMPLE + 1 + pad), + (JDIMENSION)cinfo->out_color_components); /* blksize is number of adjacent repeated entries for a component */ blksize = cquantize->sv_actual; @@ -373,18 +374,18 @@ create_colorindex (j_decompress_ptr cinfo) /* and k = largest j that maps to current val */ indexptr = cquantize->colorindex[i]; val = 0; - k = largest_input_value(cinfo, i, 0, nci-1); + k = largest_input_value(cinfo, i, 0, nci - 1); for (j = 0; j <= MAXJSAMPLE; j++) { while (j > k) /* advance val if past boundary */ - k = largest_input_value(cinfo, i, ++val, nci-1); + k = largest_input_value(cinfo, i, ++val, nci - 1); /* premultiply so that no multiplication needed in main processing */ - indexptr[j] = (JSAMPLE) (val * blksize); + indexptr[j] = (JSAMPLE)(val * blksize); } /* Pad at both ends if necessary */ if (pad) for (j = 1; j <= MAXJSAMPLE; j++) { indexptr[-j] = indexptr[0]; - indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE]; + indexptr[MAXJSAMPLE + j] = indexptr[MAXJSAMPLE]; } } } @@ -396,29 +397,29 @@ create_colorindex (j_decompress_ptr cinfo) */ LOCAL(ODITHER_MATRIX_PTR) -make_odither_array (j_decompress_ptr cinfo, int ncolors) +make_odither_array(j_decompress_ptr cinfo, int ncolors) { ODITHER_MATRIX_PTR odither; - int j,k; - JLONG num,den; + int j, k; + JLONG num, den; odither = (ODITHER_MATRIX_PTR) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(ODITHER_MATRIX)); /* The inter-value distance for this color is MAXJSAMPLE/(ncolors-1). * Hence the dither value for the matrix cell with fill order f * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1). * On 16-bit-int machine, be careful to avoid overflow. */ - den = 2 * ODITHER_CELLS * ((JLONG) (ncolors - 1)); + den = 2 * ODITHER_CELLS * ((JLONG)(ncolors - 1)); for (j = 0; j < ODITHER_SIZE; j++) { for (k = 0; k < ODITHER_SIZE; k++) { - num = ((JLONG) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k]))) - * MAXJSAMPLE; + num = ((JLONG)(ODITHER_CELLS - 1 - + 2 * ((int)base_dither_matrix[j][k]))) * MAXJSAMPLE; /* Ensure round towards zero despite C's lack of consistency * about rounding negative values in integer division... */ - odither[j][k] = (int) (num<0 ? -((-num)/den) : num/den); + odither[j][k] = (int)(num < 0 ? -((-num) / den) : num / den); } } return odither; @@ -432,9 +433,9 @@ make_odither_array (j_decompress_ptr cinfo, int ncolors) */ LOCAL(void) -create_odither_tables (j_decompress_ptr cinfo) +create_odither_tables(j_decompress_ptr cinfo) { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; ODITHER_MATRIX_PTR odither; int i, j, nci; @@ -459,11 +460,11 @@ create_odither_tables (j_decompress_ptr cinfo) */ METHODDEF(void) -color_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf, - JSAMPARRAY output_buf, int num_rows) +color_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPARRAY output_buf, int num_rows) /* General case, no dithering */ { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; JSAMPARRAY colorindex = cquantize->colorindex; register int pixcode, ci; register JSAMPROW ptrin, ptrout; @@ -480,18 +481,18 @@ color_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf, for (ci = 0; ci < nc; ci++) { pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]); } - *ptrout++ = (JSAMPLE) pixcode; + *ptrout++ = (JSAMPLE)pixcode; } } } METHODDEF(void) -color_quantize3 (j_decompress_ptr cinfo, JSAMPARRAY input_buf, - JSAMPARRAY output_buf, int num_rows) +color_quantize3(j_decompress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPARRAY output_buf, int num_rows) /* Fast path for out_color_components==3, no dithering */ { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; register int pixcode; register JSAMPROW ptrin, ptrout; JSAMPROW colorindex0 = cquantize->colorindex[0]; @@ -508,18 +509,18 @@ color_quantize3 (j_decompress_ptr cinfo, JSAMPARRAY input_buf, pixcode = GETJSAMPLE(colorindex0[GETJSAMPLE(*ptrin++)]); pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*ptrin++)]); pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*ptrin++)]); - *ptrout++ = (JSAMPLE) pixcode; + *ptrout++ = (JSAMPLE)pixcode; } } } METHODDEF(void) -quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, - JSAMPARRAY output_buf, int num_rows) +quantize_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPARRAY output_buf, int num_rows) /* General case, with ordered dithering */ { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; register JSAMPROW input_ptr; register JSAMPROW output_ptr; JSAMPROW colorindex_ci; @@ -533,7 +534,7 @@ quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, for (row = 0; row < num_rows; row++) { /* Initialize output values to 0 so can process components separately */ - jzero_far((void *) output_buf[row], (size_t) (width * sizeof(JSAMPLE))); + jzero_far((void *)output_buf[row], (size_t)(width * sizeof(JSAMPLE))); row_index = cquantize->row_index; for (ci = 0; ci < nc; ci++) { input_ptr = input_buf[row] + ci; @@ -550,7 +551,8 @@ quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, * inputs. The maximum dither is +- MAXJSAMPLE; this sets the * required amount of padding. */ - *output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]]; + *output_ptr += + colorindex_ci[GETJSAMPLE(*input_ptr) + dither[col_index]]; input_ptr += nc; output_ptr++; col_index = (col_index + 1) & ODITHER_MASK; @@ -564,11 +566,11 @@ quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, METHODDEF(void) -quantize3_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, - JSAMPARRAY output_buf, int num_rows) +quantize3_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPARRAY output_buf, int num_rows) /* Fast path for out_color_components==3, with ordered dithering */ { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; register int pixcode; register JSAMPROW input_ptr; register JSAMPROW output_ptr; @@ -593,13 +595,13 @@ quantize3_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, col_index = 0; for (col = width; col > 0; col--) { - pixcode = GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) + - dither0[col_index]]); - pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) + - dither1[col_index]]); - pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) + - dither2[col_index]]); - *output_ptr++ = (JSAMPLE) pixcode; + pixcode = + GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) + dither0[col_index]]); + pixcode += + GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) + dither1[col_index]]); + pixcode += + GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) + dither2[col_index]]); + *output_ptr++ = (JSAMPLE)pixcode; col_index = (col_index + 1) & ODITHER_MASK; } row_index = (row_index + 1) & ODITHER_MASK; @@ -609,11 +611,11 @@ quantize3_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, METHODDEF(void) -quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, - JSAMPARRAY output_buf, int num_rows) +quantize_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPARRAY output_buf, int num_rows) /* General case, with Floyd-Steinberg dithering */ { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; register LOCFSERROR cur; /* current error or pixel value */ LOCFSERROR belowerr; /* error for pixel below cur */ LOCFSERROR bpreverr; /* error for below/prev col */ @@ -637,17 +639,17 @@ quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, for (row = 0; row < num_rows; row++) { /* Initialize output values to 0 so can process components separately */ - jzero_far((void *) output_buf[row], (size_t) (width * sizeof(JSAMPLE))); + jzero_far((void *)output_buf[row], (size_t)(width * sizeof(JSAMPLE))); for (ci = 0; ci < nc; ci++) { input_ptr = input_buf[row] + ci; output_ptr = output_buf[row]; if (cquantize->on_odd_row) { /* work right to left in this row */ - input_ptr += (width-1) * nc; /* so point to rightmost pixel */ - output_ptr += width-1; + input_ptr += (width - 1) * nc; /* so point to rightmost pixel */ + output_ptr += width - 1; dir = -1; dirnc = -nc; - errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */ + errorptr = cquantize->fserrors[ci] + (width + 1); /* => entry after last column */ } else { /* work left to right in this row */ dir = 1; @@ -679,7 +681,7 @@ quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, cur = GETJSAMPLE(range_limit[cur]); /* Select output value, accumulate into output code for this pixel */ pixcode = GETJSAMPLE(colorindex_ci[cur]); - *output_ptr += (JSAMPLE) pixcode; + *output_ptr += (JSAMPLE)pixcode; /* Compute actual representation error at this pixel */ /* Note: we can do this even though we don't have the final */ /* pixel code, because the colormap is orthogonal. */ @@ -691,7 +693,7 @@ quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, bnexterr = cur; delta = cur * 2; cur += delta; /* form error * 3 */ - errorptr[0] = (FSERROR) (bpreverr + cur); + errorptr[0] = (FSERROR)(bpreverr + cur); cur += delta; /* form error * 5 */ bpreverr = belowerr + cur; belowerr = bnexterr; @@ -708,7 +710,7 @@ quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, * final fserrors[] entry. Note we need not unload belowerr because * it is for the dummy column before or after the actual array. */ - errorptr[0] = (FSERROR) bpreverr; /* unload prev err into array */ + errorptr[0] = (FSERROR)bpreverr; /* unload prev err into array */ } cquantize->on_odd_row = (cquantize->on_odd_row ? FALSE : TRUE); } @@ -720,16 +722,16 @@ quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf, */ LOCAL(void) -alloc_fs_workspace (j_decompress_ptr cinfo) +alloc_fs_workspace(j_decompress_ptr cinfo) { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; size_t arraysize; int i; - arraysize = (size_t) ((cinfo->output_width + 2) * sizeof(FSERROR)); + arraysize = (size_t)((cinfo->output_width + 2) * sizeof(FSERROR)); for (i = 0; i < cinfo->out_color_components; i++) { cquantize->fserrors[i] = (FSERRPTR) - (*cinfo->mem->alloc_large)((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize); + (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE, arraysize); } } @@ -739,9 +741,9 @@ alloc_fs_workspace (j_decompress_ptr cinfo) */ METHODDEF(void) -start_pass_1_quant (j_decompress_ptr cinfo, boolean is_pre_scan) +start_pass_1_quant(j_decompress_ptr cinfo, boolean is_pre_scan) { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; size_t arraysize; int i; @@ -767,7 +769,7 @@ start_pass_1_quant (j_decompress_ptr cinfo, boolean is_pre_scan) * we must recreate the color index table with padding. * This will cost extra space, but probably isn't very likely. */ - if (! cquantize->is_padded) + if (!cquantize->is_padded) create_colorindex(cinfo); /* Create ordered-dither tables if we didn't already. */ if (cquantize->odither[0] == NULL) @@ -780,9 +782,9 @@ start_pass_1_quant (j_decompress_ptr cinfo, boolean is_pre_scan) if (cquantize->fserrors[0] == NULL) alloc_fs_workspace(cinfo); /* Initialize the propagated errors to zero. */ - arraysize = (size_t) ((cinfo->output_width + 2) * sizeof(FSERROR)); + arraysize = (size_t)((cinfo->output_width + 2) * sizeof(FSERROR)); for (i = 0; i < cinfo->out_color_components; i++) - jzero_far((void *) cquantize->fserrors[i], arraysize); + jzero_far((void *)cquantize->fserrors[i], arraysize); break; default: ERREXIT(cinfo, JERR_NOT_COMPILED); @@ -796,7 +798,7 @@ start_pass_1_quant (j_decompress_ptr cinfo, boolean is_pre_scan) */ METHODDEF(void) -finish_pass_1_quant (j_decompress_ptr cinfo) +finish_pass_1_quant(j_decompress_ptr cinfo) { /* no work in 1-pass case */ } @@ -808,7 +810,7 @@ finish_pass_1_quant (j_decompress_ptr cinfo) */ METHODDEF(void) -new_color_map_1_quant (j_decompress_ptr cinfo) +new_color_map_1_quant(j_decompress_ptr cinfo) { ERREXIT(cinfo, JERR_MODE_CHANGE); } @@ -819,14 +821,14 @@ new_color_map_1_quant (j_decompress_ptr cinfo) */ GLOBAL(void) -jinit_1pass_quantizer (j_decompress_ptr cinfo) +jinit_1pass_quantizer(j_decompress_ptr cinfo) { my_cquantize_ptr cquantize; cquantize = (my_cquantize_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_cquantizer)); - cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize; + cinfo->cquantize = (struct jpeg_color_quantizer *)cquantize; cquantize->pub.start_pass = start_pass_1_quant; cquantize->pub.finish_pass = finish_pass_1_quant; cquantize->pub.new_color_map = new_color_map_1_quant; @@ -837,8 +839,8 @@ jinit_1pass_quantizer (j_decompress_ptr cinfo) if (cinfo->out_color_components > MAX_Q_COMPS) ERREXIT1(cinfo, JERR_QUANT_COMPONENTS, MAX_Q_COMPS); /* Make sure colormap indexes can be represented by JSAMPLEs */ - if (cinfo->desired_number_of_colors > (MAXJSAMPLE+1)) - ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXJSAMPLE+1); + if (cinfo->desired_number_of_colors > (MAXJSAMPLE + 1)) + ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXJSAMPLE + 1); /* Create the colormap and color index table. */ create_colormap(cinfo); diff --git a/jquant2.c b/jquant2.c index cfbd0f1..0ce0ca5 100644 --- a/jquant2.c +++ b/jquant2.c @@ -73,14 +73,14 @@ * probably need to change these scale factors. */ -#define R_SCALE 2 /* scale R distances by this much */ -#define G_SCALE 3 /* scale G distances by this much */ -#define B_SCALE 1 /* and B by this much */ +#define R_SCALE 2 /* scale R distances by this much */ +#define G_SCALE 3 /* scale G distances by this much */ +#define B_SCALE 1 /* and B by this much */ -static const int c_scales[3]={R_SCALE, G_SCALE, B_SCALE}; -#define C0_SCALE c_scales[rgb_red[cinfo->out_color_space]] -#define C1_SCALE c_scales[rgb_green[cinfo->out_color_space]] -#define C2_SCALE c_scales[rgb_blue[cinfo->out_color_space]] +static const int c_scales[3] = { R_SCALE, G_SCALE, B_SCALE }; +#define C0_SCALE c_scales[rgb_red[cinfo->out_color_space]] +#define C1_SCALE c_scales[rgb_green[cinfo->out_color_space]] +#define C2_SCALE c_scales[rgb_blue[cinfo->out_color_space]] /* * First we have the histogram data structure and routines for creating it. @@ -106,7 +106,7 @@ static const int c_scales[3]={R_SCALE, G_SCALE, B_SCALE}; * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries. */ -#define MAXNUMCOLORS (MAXJSAMPLE+1) /* maximum size of colormap */ +#define MAXNUMCOLORS (MAXJSAMPLE + 1) /* maximum size of colormap */ /* These will do the right thing for either R,G,B or B,G,R color order, * but you may not like the results for other color orders. @@ -116,19 +116,19 @@ static const int c_scales[3]={R_SCALE, G_SCALE, B_SCALE}; #define HIST_C2_BITS 5 /* bits of precision in B/R histogram */ /* Number of elements along histogram axes. */ -#define HIST_C0_ELEMS (1<cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; register JSAMPROW ptr; register histptr histp; register hist3d histogram = cquantize->histogram; @@ -215,9 +215,9 @@ prescan_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf, ptr = input_buf[row]; for (col = width; col > 0; col--) { /* get pixel value and index into the histogram */ - histp = & histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT] - [GETJSAMPLE(ptr[1]) >> C1_SHIFT] - [GETJSAMPLE(ptr[2]) >> C2_SHIFT]; + histp = &histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT] + [GETJSAMPLE(ptr[1]) >> C1_SHIFT] + [GETJSAMPLE(ptr[2]) >> C2_SHIFT]; /* increment, check for overflow and undo increment if so. */ if (++(*histp) <= 0) (*histp)--; @@ -249,7 +249,7 @@ typedef box *boxptr; LOCAL(boxptr) -find_biggest_color_pop (boxptr boxlist, int numboxes) +find_biggest_color_pop(boxptr boxlist, int numboxes) /* Find the splittable box with the largest color population */ /* Returns NULL if no splittable boxes remain */ { @@ -269,7 +269,7 @@ find_biggest_color_pop (boxptr boxlist, int numboxes) LOCAL(boxptr) -find_biggest_volume (boxptr boxlist, int numboxes) +find_biggest_volume(boxptr boxlist, int numboxes) /* Find the splittable box with the largest (scaled) volume */ /* Returns NULL if no splittable boxes remain */ { @@ -289,16 +289,16 @@ find_biggest_volume (boxptr boxlist, int numboxes) LOCAL(void) -update_box (j_decompress_ptr cinfo, boxptr boxp) +update_box(j_decompress_ptr cinfo, boxptr boxp) /* Shrink the min/max bounds of a box to enclose only nonzero elements, */ /* and recompute its volume and population */ { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; hist3d histogram = cquantize->histogram; histptr histp; - int c0,c1,c2; - int c0min,c0max,c1min,c1max,c2min,c2max; - JLONG dist0,dist1,dist2; + int c0, c1, c2; + int c0min, c0max, c1min, c1max, c2min, c2max; + JLONG dist0, dist1, dist2; long ccount; c0min = boxp->c0min; c0max = boxp->c0max; @@ -308,69 +308,69 @@ update_box (j_decompress_ptr cinfo, boxptr boxp) if (c0max > c0min) for (c0 = c0min; c0 <= c0max; c0++) for (c1 = c1min; c1 <= c1max; c1++) { - histp = & histogram[c0][c1][c2min]; + histp = &histogram[c0][c1][c2min]; for (c2 = c2min; c2 <= c2max; c2++) if (*histp++ != 0) { boxp->c0min = c0min = c0; goto have_c0min; } } - have_c0min: +have_c0min: if (c0max > c0min) for (c0 = c0max; c0 >= c0min; c0--) for (c1 = c1min; c1 <= c1max; c1++) { - histp = & histogram[c0][c1][c2min]; + histp = &histogram[c0][c1][c2min]; for (c2 = c2min; c2 <= c2max; c2++) if (*histp++ != 0) { boxp->c0max = c0max = c0; goto have_c0max; } } - have_c0max: +have_c0max: if (c1max > c1min) for (c1 = c1min; c1 <= c1max; c1++) for (c0 = c0min; c0 <= c0max; c0++) { - histp = & histogram[c0][c1][c2min]; + histp = &histogram[c0][c1][c2min]; for (c2 = c2min; c2 <= c2max; c2++) if (*histp++ != 0) { boxp->c1min = c1min = c1; goto have_c1min; } } - have_c1min: +have_c1min: if (c1max > c1min) for (c1 = c1max; c1 >= c1min; c1--) for (c0 = c0min; c0 <= c0max; c0++) { - histp = & histogram[c0][c1][c2min]; + histp = &histogram[c0][c1][c2min]; for (c2 = c2min; c2 <= c2max; c2++) if (*histp++ != 0) { boxp->c1max = c1max = c1; goto have_c1max; } } - have_c1max: +have_c1max: if (c2max > c2min) for (c2 = c2min; c2 <= c2max; c2++) for (c0 = c0min; c0 <= c0max; c0++) { - histp = & histogram[c0][c1min][c2]; + histp = &histogram[c0][c1min][c2]; for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS) if (*histp != 0) { boxp->c2min = c2min = c2; goto have_c2min; } } - have_c2min: +have_c2min: if (c2max > c2min) for (c2 = c2max; c2 >= c2min; c2--) for (c0 = c0min; c0 <= c0max; c0++) { - histp = & histogram[c0][c1min][c2]; + histp = &histogram[c0][c1min][c2]; for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS) if (*histp != 0) { boxp->c2max = c2max = c2; goto have_c2max; } } - have_c2max: +have_c2max: /* Update box volume. * We use 2-norm rather than real volume here; this biases the method @@ -383,13 +383,13 @@ update_box (j_decompress_ptr cinfo, boxptr boxp) dist0 = ((c0max - c0min) << C0_SHIFT) * C0_SCALE; dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE; dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE; - boxp->volume = dist0*dist0 + dist1*dist1 + dist2*dist2; + boxp->volume = dist0 * dist0 + dist1 * dist1 + dist2 * dist2; /* Now scan remaining volume of box and compute population */ ccount = 0; for (c0 = c0min; c0 <= c0max; c0++) for (c1 = c1min; c1 <= c1max; c1++) { - histp = & histogram[c0][c1][c2min]; + histp = &histogram[c0][c1][c2min]; for (c2 = c2min; c2 <= c2max; c2++, histp++) if (*histp != 0) { ccount++; @@ -400,19 +400,19 @@ update_box (j_decompress_ptr cinfo, boxptr boxp) LOCAL(int) -median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes, - int desired_colors) +median_cut(j_decompress_ptr cinfo, boxptr boxlist, int numboxes, + int desired_colors) /* Repeatedly select and split the largest box until we have enough boxes */ { - int n,lb; - int c0,c1,c2,cmax; - register boxptr b1,b2; + int n, lb; + int c0, c1, c2, cmax; + register boxptr b1, b2; while (numboxes < desired_colors) { /* Select box to split. * Current algorithm: by population for first half, then by volume. */ - if (numboxes*2 <= desired_colors) { + if (numboxes * 2 <= desired_colors) { b1 = find_biggest_color_pop(boxlist, numboxes); } else { b1 = find_biggest_volume(boxlist, numboxes); @@ -421,8 +421,8 @@ median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes, break; b2 = &boxlist[numboxes]; /* where new box will go */ /* Copy the color bounds to the new box. */ - b2->c0max = b1->c0max; b2->c1max = b1->c1max; b2->c2max = b1->c2max; - b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min; + b2->c0max = b1->c0max; b2->c1max = b1->c1max; b2->c2max = b1->c2max; + b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min; /* Choose which axis to split the box on. * Current algorithm: longest scaled axis. * See notes in update_box about scaling distances. @@ -434,13 +434,12 @@ median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes, * This code does the right thing for R,G,B or B,G,R color orders only. */ if (rgb_red[cinfo->out_color_space] == 0) { - cmax = c1; n = 1; - if (c0 > cmax) { cmax = c0; n = 0; } + cmax = c1; n = 1; + if (c0 > cmax) { cmax = c0; n = 0; } if (c2 > cmax) { n = 2; } - } - else { - cmax = c1; n = 1; - if (c2 > cmax) { cmax = c2; n = 2; } + } else { + cmax = c1; n = 1; + if (c2 > cmax) { cmax = c2; n = 2; } if (c0 > cmax) { n = 0; } } /* Choose split point along selected axis, and update box bounds. @@ -453,17 +452,17 @@ median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes, case 0: lb = (b1->c0max + b1->c0min) / 2; b1->c0max = lb; - b2->c0min = lb+1; + b2->c0min = lb + 1; break; case 1: lb = (b1->c1max + b1->c1min) / 2; b1->c1max = lb; - b2->c1min = lb+1; + b2->c1min = lb + 1; break; case 2: lb = (b1->c2max + b1->c2min) / 2; b1->c2max = lb; - b2->c2min = lb+1; + b2->c2min = lb + 1; break; } /* Update stats for boxes */ @@ -476,16 +475,16 @@ median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes, LOCAL(void) -compute_color (j_decompress_ptr cinfo, boxptr boxp, int icolor) +compute_color(j_decompress_ptr cinfo, boxptr boxp, int icolor) /* Compute representative color for a box, put it in colormap[icolor] */ { /* Current algorithm: mean weighted by pixels (not colors) */ /* Note it is important to get the rounding correct! */ - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; hist3d histogram = cquantize->histogram; histptr histp; - int c0,c1,c2; - int c0min,c0max,c1min,c1max,c2min,c2max; + int c0, c1, c2; + int c0min, c0max, c1min, c1max, c2min, c2max; long count; long total = 0; long c0total = 0; @@ -498,25 +497,25 @@ compute_color (j_decompress_ptr cinfo, boxptr boxp, int icolor) for (c0 = c0min; c0 <= c0max; c0++) for (c1 = c1min; c1 <= c1max; c1++) { - histp = & histogram[c0][c1][c2min]; + histp = &histogram[c0][c1][c2min]; for (c2 = c2min; c2 <= c2max; c2++) { if ((count = *histp++) != 0) { total += count; - c0total += ((c0 << C0_SHIFT) + ((1<>1)) * count; - c1total += ((c1 << C1_SHIFT) + ((1<>1)) * count; - c2total += ((c2 << C2_SHIFT) + ((1<>1)) * count; + c0total += ((c0 << C0_SHIFT) + ((1 << C0_SHIFT) >> 1)) * count; + c1total += ((c1 << C1_SHIFT) + ((1 << C1_SHIFT) >> 1)) * count; + c2total += ((c2 << C2_SHIFT) + ((1 << C2_SHIFT) >> 1)) * count; } } } - cinfo->colormap[0][icolor] = (JSAMPLE) ((c0total + (total>>1)) / total); - cinfo->colormap[1][icolor] = (JSAMPLE) ((c1total + (total>>1)) / total); - cinfo->colormap[2][icolor] = (JSAMPLE) ((c2total + (total>>1)) / total); + cinfo->colormap[0][icolor] = (JSAMPLE)((c0total + (total >> 1)) / total); + cinfo->colormap[1][icolor] = (JSAMPLE)((c1total + (total >> 1)) / total); + cinfo->colormap[2][icolor] = (JSAMPLE)((c2total + (total >> 1)) / total); } LOCAL(void) -select_colors (j_decompress_ptr cinfo, int desired_colors) +select_colors(j_decompress_ptr cinfo, int desired_colors) /* Master routine for color selection */ { boxptr boxlist; @@ -524,8 +523,8 @@ select_colors (j_decompress_ptr cinfo, int desired_colors) int i; /* Allocate workspace for box list */ - boxlist = (boxptr) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, desired_colors * sizeof(box)); + boxlist = (boxptr)(*cinfo->mem->alloc_small) + ((j_common_ptr)cinfo, JPOOL_IMAGE, desired_colors * sizeof(box)); /* Initialize one box containing whole space */ numboxes = 1; boxlist[0].c0min = 0; @@ -535,12 +534,12 @@ select_colors (j_decompress_ptr cinfo, int desired_colors) boxlist[0].c2min = 0; boxlist[0].c2max = MAXJSAMPLE >> C2_SHIFT; /* Shrink it to actually-used volume and set its statistics */ - update_box(cinfo, & boxlist[0]); + update_box(cinfo, &boxlist[0]); /* Perform median-cut to produce final box list */ numboxes = median_cut(cinfo, boxlist, numboxes, desired_colors); /* Compute the representative color for each box, fill colormap */ for (i = 0; i < numboxes; i++) - compute_color(cinfo, & boxlist[i], i); + compute_color(cinfo, &boxlist[i], i); cinfo->actual_number_of_colors = numboxes; TRACEMS1(cinfo, 1, JTRC_QUANT_SELECTED, numboxes); } @@ -601,13 +600,13 @@ select_colors (j_decompress_ptr cinfo, int desired_colors) /* log2(histogram cells in update box) for each axis; this can be adjusted */ -#define BOX_C0_LOG (HIST_C0_BITS-3) -#define BOX_C1_LOG (HIST_C1_BITS-3) -#define BOX_C2_LOG (HIST_C2_BITS-3) +#define BOX_C0_LOG (HIST_C0_BITS - 3) +#define BOX_C1_LOG (HIST_C1_BITS - 3) +#define BOX_C2_LOG (HIST_C2_BITS - 3) -#define BOX_C0_ELEMS (1<colormap[0][i]); if (x < minc0) { tdist = (x - minc0) * C0_SCALE; - min_dist = tdist*tdist; + min_dist = tdist * tdist; tdist = (x - maxc0) * C0_SCALE; - max_dist = tdist*tdist; + max_dist = tdist * tdist; } else if (x > maxc0) { tdist = (x - maxc0) * C0_SCALE; - min_dist = tdist*tdist; + min_dist = tdist * tdist; tdist = (x - minc0) * C0_SCALE; - max_dist = tdist*tdist; + max_dist = tdist * tdist; } else { /* within cell range so no contribution to min_dist */ min_dist = 0; if (x <= centerc0) { tdist = (x - maxc0) * C0_SCALE; - max_dist = tdist*tdist; + max_dist = tdist * tdist; } else { tdist = (x - minc0) * C0_SCALE; - max_dist = tdist*tdist; + max_dist = tdist * tdist; } } x = GETJSAMPLE(cinfo->colormap[1][i]); if (x < minc1) { tdist = (x - minc1) * C1_SCALE; - min_dist += tdist*tdist; + min_dist += tdist * tdist; tdist = (x - maxc1) * C1_SCALE; - max_dist += tdist*tdist; + max_dist += tdist * tdist; } else if (x > maxc1) { tdist = (x - maxc1) * C1_SCALE; - min_dist += tdist*tdist; + min_dist += tdist * tdist; tdist = (x - minc1) * C1_SCALE; - max_dist += tdist*tdist; + max_dist += tdist * tdist; } else { /* within cell range so no contribution to min_dist */ if (x <= centerc1) { tdist = (x - maxc1) * C1_SCALE; - max_dist += tdist*tdist; + max_dist += tdist * tdist; } else { tdist = (x - minc1) * C1_SCALE; - max_dist += tdist*tdist; + max_dist += tdist * tdist; } } x = GETJSAMPLE(cinfo->colormap[2][i]); if (x < minc2) { tdist = (x - minc2) * C2_SCALE; - min_dist += tdist*tdist; + min_dist += tdist * tdist; tdist = (x - maxc2) * C2_SCALE; - max_dist += tdist*tdist; + max_dist += tdist * tdist; } else if (x > maxc2) { tdist = (x - maxc2) * C2_SCALE; - min_dist += tdist*tdist; + min_dist += tdist * tdist; tdist = (x - minc2) * C2_SCALE; - max_dist += tdist*tdist; + max_dist += tdist * tdist; } else { /* within cell range so no contribution to min_dist */ if (x <= centerc2) { tdist = (x - maxc2) * C2_SCALE; - max_dist += tdist*tdist; + max_dist += tdist * tdist; } else { tdist = (x - minc2) * C2_SCALE; - max_dist += tdist*tdist; + max_dist += tdist * tdist; } } @@ -745,15 +744,15 @@ find_nearby_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2, ncolors = 0; for (i = 0; i < numcolors; i++) { if (mindist[i] <= minmaxdist) - colorlist[ncolors++] = (JSAMPLE) i; + colorlist[ncolors++] = (JSAMPLE)i; } return ncolors; } LOCAL(void) -find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2, - int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[]) +find_best_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2, + int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[]) /* Find the closest colormap entry for each cell in the update box, * given the list of candidate colors prepared by find_nearby_colors. * Return the indexes of the closest entries in the bestcolor[] array. @@ -775,7 +774,7 @@ find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2, /* Initialize best-distance for each cell of the update box */ bptr = bestdist; - for (i = BOX_C0_ELEMS*BOX_C1_ELEMS*BOX_C2_ELEMS-1; i >= 0; i--) + for (i = BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS - 1; i >= 0; i--) *bptr++ = 0x7FFFFFFFL; /* For each color selected by find_nearby_colors, @@ -792,11 +791,11 @@ find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2, icolor = GETJSAMPLE(colorlist[i]); /* Compute (square of) distance from minc0/c1/c2 to this color */ inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE; - dist0 = inc0*inc0; + dist0 = inc0 * inc0; inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE; - dist0 += inc1*inc1; + dist0 += inc1 * inc1; inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE; - dist0 += inc2*inc2; + dist0 += inc2 * inc2; /* Form the initial difference increments */ inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0; inc1 = inc1 * (2 * STEP_C1) + STEP_C1 * STEP_C1; @@ -805,16 +804,16 @@ find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2, bptr = bestdist; cptr = bestcolor; xx0 = inc0; - for (ic0 = BOX_C0_ELEMS-1; ic0 >= 0; ic0--) { + for (ic0 = BOX_C0_ELEMS - 1; ic0 >= 0; ic0--) { dist1 = dist0; xx1 = inc1; - for (ic1 = BOX_C1_ELEMS-1; ic1 >= 0; ic1--) { + for (ic1 = BOX_C1_ELEMS - 1; ic1 >= 0; ic1--) { dist2 = dist1; xx2 = inc2; - for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) { + for (ic2 = BOX_C2_ELEMS - 1; ic2 >= 0; ic2--) { if (dist2 < *bptr) { *bptr = dist2; - *cptr = (JSAMPLE) icolor; + *cptr = (JSAMPLE)icolor; } dist2 += xx2; xx2 += 2 * STEP_C2 * STEP_C2; @@ -832,12 +831,12 @@ find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2, LOCAL(void) -fill_inverse_cmap (j_decompress_ptr cinfo, int c0, int c1, int c2) +fill_inverse_cmap(j_decompress_ptr cinfo, int c0, int c1, int c2) /* Fill the inverse-colormap entries in the update box that contains */ /* histogram cell c0/c1/c2. (Only that one cell MUST be filled, but */ /* we can fill as many others as we wish.) */ { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; hist3d histogram = cquantize->histogram; int minc0, minc1, minc2; /* lower left corner of update box */ int ic0, ic1, ic2; @@ -878,9 +877,9 @@ fill_inverse_cmap (j_decompress_ptr cinfo, int c0, int c1, int c2) cptr = bestcolor; for (ic0 = 0; ic0 < BOX_C0_ELEMS; ic0++) { for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) { - cachep = & histogram[c0+ic0][c1+ic1][c2]; + cachep = &histogram[c0 + ic0][c1 + ic1][c2]; for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) { - *cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1); + *cachep++ = (histcell)(GETJSAMPLE(*cptr++) + 1); } } } @@ -892,11 +891,11 @@ fill_inverse_cmap (j_decompress_ptr cinfo, int c0, int c1, int c2) */ METHODDEF(void) -pass2_no_dither (j_decompress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows) +pass2_no_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPARRAY output_buf, int num_rows) /* This version performs no dithering */ { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; hist3d histogram = cquantize->histogram; register JSAMPROW inptr, outptr; register histptr cachep; @@ -913,24 +912,24 @@ pass2_no_dither (j_decompress_ptr cinfo, c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT; c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT; c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT; - cachep = & histogram[c0][c1][c2]; + cachep = &histogram[c0][c1][c2]; /* If we have not seen this color before, find nearest colormap entry */ /* and update the cache */ if (*cachep == 0) - fill_inverse_cmap(cinfo, c0,c1,c2); + fill_inverse_cmap(cinfo, c0, c1, c2); /* Now emit the colormap index for this cell */ - *outptr++ = (JSAMPLE) (*cachep - 1); + *outptr++ = (JSAMPLE)(*cachep - 1); } } } METHODDEF(void) -pass2_fs_dither (j_decompress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows) +pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPARRAY output_buf, int num_rows) /* This version performs Floyd-Steinberg dithering */ { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; hist3d histogram = cquantize->histogram; register LOCFSERROR cur0, cur1, cur2; /* current error or pixel value */ LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */ @@ -956,11 +955,11 @@ pass2_fs_dither (j_decompress_ptr cinfo, outptr = output_buf[row]; if (cquantize->on_odd_row) { /* work right to left in this row */ - inptr += (width-1) * 3; /* so point to rightmost pixel */ - outptr += width-1; + inptr += (width - 1) * 3; /* so point to rightmost pixel */ + outptr += width - 1; dir = -1; dir3 = -3; - errorptr = cquantize->fserrors + (width+1)*3; /* => entry after last column */ + errorptr = cquantize->fserrors + (width + 1) * 3; /* => entry after last column */ cquantize->on_odd_row = FALSE; /* flip for next time */ } else { /* work left to right in this row */ @@ -984,9 +983,9 @@ pass2_fs_dither (j_decompress_ptr cinfo, * for either sign of the error value. * Note: errorptr points to *previous* column's array entry. */ - cur0 = RIGHT_SHIFT(cur0 + errorptr[dir3+0] + 8, 4); - cur1 = RIGHT_SHIFT(cur1 + errorptr[dir3+1] + 8, 4); - cur2 = RIGHT_SHIFT(cur2 + errorptr[dir3+2] + 8, 4); + cur0 = RIGHT_SHIFT(cur0 + errorptr[dir3 + 0] + 8, 4); + cur1 = RIGHT_SHIFT(cur1 + errorptr[dir3 + 1] + 8, 4); + cur2 = RIGHT_SHIFT(cur2 + errorptr[dir3 + 2] + 8, 4); /* Limit the error using transfer function set by init_error_limit. * See comments with init_error_limit for rationale. */ @@ -1004,14 +1003,17 @@ pass2_fs_dither (j_decompress_ptr cinfo, cur1 = GETJSAMPLE(range_limit[cur1]); cur2 = GETJSAMPLE(range_limit[cur2]); /* Index into the cache with adjusted pixel value */ - cachep = & histogram[cur0>>C0_SHIFT][cur1>>C1_SHIFT][cur2>>C2_SHIFT]; + cachep = + &histogram[cur0 >> C0_SHIFT][cur1 >> C1_SHIFT][cur2 >> C2_SHIFT]; /* If we have not seen this color before, find nearest colormap */ /* entry and update the cache */ if (*cachep == 0) - fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT); + fill_inverse_cmap(cinfo, cur0 >> C0_SHIFT, cur1 >> C1_SHIFT, + cur2 >> C2_SHIFT); /* Now emit the colormap index for this cell */ - { register int pixcode = *cachep - 1; - *outptr = (JSAMPLE) pixcode; + { + register int pixcode = *cachep - 1; + *outptr = (JSAMPLE)pixcode; /* Compute representation error for this pixel */ cur0 -= GETJSAMPLE(colormap0[pixcode]); cur1 -= GETJSAMPLE(colormap1[pixcode]); @@ -1021,20 +1023,21 @@ pass2_fs_dither (j_decompress_ptr cinfo, * Add these into the running sums, and simultaneously shift the * next-line error sums left by 1 column. */ - { register LOCFSERROR bnexterr; + { + register LOCFSERROR bnexterr; bnexterr = cur0; /* Process component 0 */ - errorptr[0] = (FSERROR) (bpreverr0 + cur0 * 3); + errorptr[0] = (FSERROR)(bpreverr0 + cur0 * 3); bpreverr0 = belowerr0 + cur0 * 5; belowerr0 = bnexterr; cur0 *= 7; bnexterr = cur1; /* Process component 1 */ - errorptr[1] = (FSERROR) (bpreverr1 + cur1 * 3); + errorptr[1] = (FSERROR)(bpreverr1 + cur1 * 3); bpreverr1 = belowerr1 + cur1 * 5; belowerr1 = bnexterr; cur1 *= 7; bnexterr = cur2; /* Process component 2 */ - errorptr[2] = (FSERROR) (bpreverr2 + cur2 * 3); + errorptr[2] = (FSERROR)(bpreverr2 + cur2 * 3); bpreverr2 = belowerr2 + cur2 * 5; belowerr2 = bnexterr; cur2 *= 7; @@ -1051,9 +1054,9 @@ pass2_fs_dither (j_decompress_ptr cinfo, * final fserrors[] entry. Note we need not unload belowerrN because * it is for the dummy column before or after the actual array. */ - errorptr[0] = (FSERROR) bpreverr0; /* unload prev errs into array */ - errorptr[1] = (FSERROR) bpreverr1; - errorptr[2] = (FSERROR) bpreverr2; + errorptr[0] = (FSERROR)bpreverr0; /* unload prev errs into array */ + errorptr[1] = (FSERROR)bpreverr1; + errorptr[2] = (FSERROR)bpreverr2; } } @@ -1076,31 +1079,31 @@ pass2_fs_dither (j_decompress_ptr cinfo, */ LOCAL(void) -init_error_limit (j_decompress_ptr cinfo) +init_error_limit(j_decompress_ptr cinfo) /* Allocate and fill in the error_limiter table */ { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; int *table; int in, out; - table = (int *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * sizeof(int)); + table = (int *)(*cinfo->mem->alloc_small) + ((j_common_ptr)cinfo, JPOOL_IMAGE, (MAXJSAMPLE * 2 + 1) * sizeof(int)); table += MAXJSAMPLE; /* so can index -MAXJSAMPLE .. +MAXJSAMPLE */ cquantize->error_limiter = table; -#define STEPSIZE ((MAXJSAMPLE+1)/16) +#define STEPSIZE ((MAXJSAMPLE + 1) / 16) /* Map errors 1:1 up to +- MAXJSAMPLE/16 */ out = 0; for (in = 0; in < STEPSIZE; in++, out++) { - table[in] = out; table[-in] = -out; + table[in] = out; table[-in] = -out; } /* Map errors 1:2 up to +- 3*MAXJSAMPLE/16 */ - for (; in < STEPSIZE*3; in++, out += (in&1) ? 0 : 1) { - table[in] = out; table[-in] = -out; + for (; in < STEPSIZE * 3; in++, out += (in & 1) ? 0 : 1) { + table[in] = out; table[-in] = -out; } /* Clamp the rest to final out value (which is (MAXJSAMPLE+1)/8) */ for (; in <= MAXJSAMPLE; in++) { - table[in] = out; table[-in] = -out; + table[in] = out; table[-in] = -out; } #undef STEPSIZE } @@ -1111,9 +1114,9 @@ init_error_limit (j_decompress_ptr cinfo) */ METHODDEF(void) -finish_pass1 (j_decompress_ptr cinfo) +finish_pass1(j_decompress_ptr cinfo) { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; /* Select the representative colors and fill in cinfo->colormap */ cinfo->colormap = cquantize->sv_colormap; @@ -1124,7 +1127,7 @@ finish_pass1 (j_decompress_ptr cinfo) METHODDEF(void) -finish_pass2 (j_decompress_ptr cinfo) +finish_pass2(j_decompress_ptr cinfo) { /* no work */ } @@ -1135,9 +1138,9 @@ finish_pass2 (j_decompress_ptr cinfo) */ METHODDEF(void) -start_pass_2_quant (j_decompress_ptr cinfo, boolean is_pre_scan) +start_pass_2_quant(j_decompress_ptr cinfo, boolean is_pre_scan) { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; hist3d histogram = cquantize->histogram; int i; @@ -1167,14 +1170,14 @@ start_pass_2_quant (j_decompress_ptr cinfo, boolean is_pre_scan) ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS); if (cinfo->dither_mode == JDITHER_FS) { - size_t arraysize = (size_t) ((cinfo->output_width + 2) * - (3 * sizeof(FSERROR))); + size_t arraysize = + (size_t)((cinfo->output_width + 2) * (3 * sizeof(FSERROR))); /* Allocate Floyd-Steinberg workspace if we didn't already. */ if (cquantize->fserrors == NULL) - cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large) - ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize); + cquantize->fserrors = (FSERRPTR)(*cinfo->mem->alloc_large) + ((j_common_ptr)cinfo, JPOOL_IMAGE, arraysize); /* Initialize the propagated errors to zero. */ - jzero_far((void *) cquantize->fserrors, arraysize); + jzero_far((void *)cquantize->fserrors, arraysize); /* Make the error-limit table if we didn't already. */ if (cquantize->error_limiter == NULL) init_error_limit(cinfo); @@ -1185,8 +1188,8 @@ start_pass_2_quant (j_decompress_ptr cinfo, boolean is_pre_scan) /* Zero the histogram or inverse color map, if necessary */ if (cquantize->needs_zeroed) { for (i = 0; i < HIST_C0_ELEMS; i++) { - jzero_far((void *) histogram[i], - HIST_C1_ELEMS*HIST_C2_ELEMS * sizeof(histcell)); + jzero_far((void *)histogram[i], + HIST_C1_ELEMS * HIST_C2_ELEMS * sizeof(histcell)); } cquantize->needs_zeroed = FALSE; } @@ -1198,9 +1201,9 @@ start_pass_2_quant (j_decompress_ptr cinfo, boolean is_pre_scan) */ METHODDEF(void) -new_color_map_2_quant (j_decompress_ptr cinfo) +new_color_map_2_quant(j_decompress_ptr cinfo) { - my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize; + my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize; /* Reset the inverse color map */ cquantize->needs_zeroed = TRUE; @@ -1212,15 +1215,15 @@ new_color_map_2_quant (j_decompress_ptr cinfo) */ GLOBAL(void) -jinit_2pass_quantizer (j_decompress_ptr cinfo) +jinit_2pass_quantizer(j_decompress_ptr cinfo) { my_cquantize_ptr cquantize; int i; cquantize = (my_cquantize_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(my_cquantizer)); - cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize; + cinfo->cquantize = (struct jpeg_color_quantizer *)cquantize; cquantize->pub.start_pass = start_pass_2_quant; cquantize->pub.new_color_map = new_color_map_2_quant; cquantize->fserrors = NULL; /* flag optional arrays not allocated */ @@ -1231,12 +1234,12 @@ jinit_2pass_quantizer (j_decompress_ptr cinfo) ERREXIT(cinfo, JERR_NOTIMPL); /* Allocate the histogram/inverse colormap storage */ - cquantize->histogram = (hist3d) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * sizeof(hist2d)); + cquantize->histogram = (hist3d)(*cinfo->mem->alloc_small) + ((j_common_ptr)cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * sizeof(hist2d)); for (i = 0; i < HIST_C0_ELEMS; i++) { - cquantize->histogram[i] = (hist2d) (*cinfo->mem->alloc_large) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - HIST_C1_ELEMS*HIST_C2_ELEMS * sizeof(histcell)); + cquantize->histogram[i] = (hist2d)(*cinfo->mem->alloc_large) + ((j_common_ptr)cinfo, JPOOL_IMAGE, + HIST_C1_ELEMS * HIST_C2_ELEMS * sizeof(histcell)); } cquantize->needs_zeroed = TRUE; /* histogram is garbage now */ @@ -1254,7 +1257,7 @@ jinit_2pass_quantizer (j_decompress_ptr cinfo) if (desired > MAXNUMCOLORS) ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS); cquantize->sv_colormap = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo,JPOOL_IMAGE, (JDIMENSION) desired, (JDIMENSION) 3); + ((j_common_ptr)cinfo, JPOOL_IMAGE, (JDIMENSION)desired, (JDIMENSION)3); cquantize->desired = desired; } else cquantize->sv_colormap = NULL; @@ -1271,9 +1274,9 @@ jinit_2pass_quantizer (j_decompress_ptr cinfo) * dither_mode changes. */ if (cinfo->dither_mode == JDITHER_FS) { - cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - (size_t) ((cinfo->output_width + 2) * (3 * sizeof(FSERROR)))); + cquantize->fserrors = (FSERRPTR)(*cinfo->mem->alloc_large) + ((j_common_ptr)cinfo, JPOOL_IMAGE, + (size_t)((cinfo->output_width + 2) * (3 * sizeof(FSERROR)))); /* Might as well create the error-limiting table too. */ init_error_limit(cinfo); } diff --git a/jsimd.h b/jsimd.h index 3aa0779..51e2b8c 100644 --- a/jsimd.h +++ b/jsimd.h @@ -3,7 +3,7 @@ * * Copyright 2009 Pierre Ossman for Cendio AB * Copyright (C) 2011, 2014, D. R. Commander. - * Copyright (C) 2015, Matthieu Darbois. + * Copyright (C) 2015-2016, 2018, Matthieu Darbois. * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. @@ -13,81 +13,105 @@ #include "jchuff.h" /* Declarations shared with jcphuff.c */ -EXTERN(int) jsimd_can_rgb_ycc (void); -EXTERN(int) jsimd_can_rgb_gray (void); -EXTERN(int) jsimd_can_ycc_rgb (void); -EXTERN(int) jsimd_can_ycc_rgb565 (void); -EXTERN(int) jsimd_c_can_null_convert (void); - -EXTERN(void) jsimd_rgb_ycc_convert - (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); -EXTERN(void) jsimd_rgb_gray_convert - (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); -EXTERN(void) jsimd_ycc_rgb_convert - (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); -EXTERN(void) jsimd_ycc_rgb565_convert - (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); -EXTERN(void) jsimd_c_null_convert - (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); - -EXTERN(int) jsimd_can_h2v2_downsample (void); -EXTERN(int) jsimd_can_h2v1_downsample (void); - -EXTERN(void) jsimd_h2v2_downsample - (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data); - -EXTERN(int) jsimd_can_h2v2_smooth_downsample (void); - -EXTERN(void) jsimd_h2v2_smooth_downsample - (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data); - -EXTERN(void) jsimd_h2v1_downsample - (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data); - -EXTERN(int) jsimd_can_h2v2_upsample (void); -EXTERN(int) jsimd_can_h2v1_upsample (void); -EXTERN(int) jsimd_can_int_upsample (void); - -EXTERN(void) jsimd_h2v2_upsample - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); -EXTERN(void) jsimd_h2v1_upsample - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); -EXTERN(void) jsimd_int_upsample - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); - -EXTERN(int) jsimd_can_h2v2_fancy_upsample (void); -EXTERN(int) jsimd_can_h2v1_fancy_upsample (void); - -EXTERN(void) jsimd_h2v2_fancy_upsample - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); -EXTERN(void) jsimd_h2v1_fancy_upsample - (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); - -EXTERN(int) jsimd_can_h2v2_merged_upsample (void); -EXTERN(int) jsimd_can_h2v1_merged_upsample (void); - -EXTERN(void) jsimd_h2v2_merged_upsample - (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); -EXTERN(void) jsimd_h2v1_merged_upsample - (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); - -EXTERN(int) jsimd_can_huff_encode_one_block (void); - -EXTERN(JOCTET*) jsimd_huff_encode_one_block - (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val, - c_derived_tbl *dctbl, c_derived_tbl *actbl); +EXTERN(int) jsimd_can_rgb_ycc(void); +EXTERN(int) jsimd_can_rgb_gray(void); +EXTERN(int) jsimd_can_ycc_rgb(void); +EXTERN(int) jsimd_can_ycc_rgb565(void); +EXTERN(int) jsimd_c_can_null_convert(void); + +EXTERN(void) jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_c_null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows); + +EXTERN(int) jsimd_can_h2v2_downsample(void); +EXTERN(int) jsimd_can_h2v1_downsample(void); + +EXTERN(void) jsimd_h2v2_downsample(j_compress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY output_data); + +EXTERN(int) jsimd_can_h2v2_smooth_downsample(void); + +EXTERN(void) jsimd_h2v2_smooth_downsample(j_compress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v1_downsample(j_compress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY output_data); + +EXTERN(int) jsimd_can_h2v2_upsample(void); +EXTERN(int) jsimd_can_h2v1_upsample(void); +EXTERN(int) jsimd_can_int_upsample(void); + +EXTERN(void) jsimd_h2v2_upsample(j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v1_upsample(j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_int_upsample(j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +EXTERN(int) jsimd_can_h2v2_fancy_upsample(void); +EXTERN(int) jsimd_can_h2v1_fancy_upsample(void); + +EXTERN(void) jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +EXTERN(int) jsimd_can_h2v2_merged_upsample(void); +EXTERN(int) jsimd_can_h2v1_merged_upsample(void); + +EXTERN(void) jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); + +EXTERN(int) jsimd_can_huff_encode_one_block(void); + +EXTERN(JOCTET *) jsimd_huff_encode_one_block(void *state, JOCTET *buffer, + JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, + c_derived_tbl *actbl); + +EXTERN(int) jsimd_can_encode_mcu_AC_first_prepare(void); + +EXTERN(void) jsimd_encode_mcu_AC_first_prepare + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, + JCOEF *values, size_t *zerobits); + +EXTERN(int) jsimd_can_encode_mcu_AC_refine_prepare(void); + +EXTERN(int) jsimd_encode_mcu_AC_refine_prepare + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, + JCOEF *absvalues, size_t *bits); diff --git a/jsimd_none.c b/jsimd_none.c index f29030c..3cb6c80 100644 --- a/jsimd_none.c +++ b/jsimd_none.c @@ -3,7 +3,7 @@ * * Copyright 2009 Pierre Ossman for Cendio AB * Copyright (C) 2009-2011, 2014, D. R. Commander. - * Copyright (C) 2015, Matthieu Darbois. + * Copyright (C) 2015-2016, 2018, Matthieu Darbois. * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. @@ -20,385 +20,399 @@ #include "jsimddct.h" GLOBAL(int) -jsimd_can_rgb_ycc (void) +jsimd_can_rgb_ycc(void) { return 0; } GLOBAL(int) -jsimd_can_rgb_gray (void) +jsimd_can_rgb_gray(void) { return 0; } GLOBAL(int) -jsimd_can_ycc_rgb (void) +jsimd_can_ycc_rgb(void) { return 0; } GLOBAL(int) -jsimd_can_ycc_rgb565 (void) +jsimd_can_ycc_rgb565(void) { return 0; } GLOBAL(int) -jsimd_c_can_null_convert (void) +jsimd_c_can_null_convert(void) { return 0; } GLOBAL(void) -jsimd_rgb_ycc_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) +jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) { } GLOBAL(void) -jsimd_rgb_gray_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) +jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) { } GLOBAL(void) -jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) { } GLOBAL(void) -jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) { } GLOBAL(void) -jsimd_c_null_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) +jsimd_c_null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) { } GLOBAL(int) -jsimd_can_h2v2_downsample (void) +jsimd_can_h2v2_downsample(void) { return 0; } GLOBAL(int) -jsimd_can_h2v1_downsample (void) +jsimd_can_h2v1_downsample(void) { return 0; } GLOBAL(int) -jsimd_can_h2v2_smooth_downsample (void) +jsimd_can_h2v2_smooth_downsample(void) { return 0; } GLOBAL(void) -jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { } GLOBAL(void) -jsimd_h2v2_smooth_downsample (j_compress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +jsimd_h2v2_smooth_downsample(j_compress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { } GLOBAL(void) -jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { } GLOBAL(int) -jsimd_can_h2v2_upsample (void) +jsimd_can_h2v2_upsample(void) { return 0; } GLOBAL(int) -jsimd_can_h2v1_upsample (void) +jsimd_can_h2v1_upsample(void) { return 0; } GLOBAL(int) -jsimd_can_int_upsample (void) +jsimd_can_int_upsample(void) { return 0; } GLOBAL(void) -jsimd_int_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +jsimd_int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { } GLOBAL(void) -jsimd_h2v2_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { } GLOBAL(void) -jsimd_h2v1_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { } GLOBAL(int) -jsimd_can_h2v2_fancy_upsample (void) +jsimd_can_h2v2_fancy_upsample(void) { return 0; } GLOBAL(int) -jsimd_can_h2v1_fancy_upsample (void) +jsimd_can_h2v1_fancy_upsample(void) { return 0; } GLOBAL(void) -jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { } GLOBAL(void) -jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { } GLOBAL(int) -jsimd_can_h2v2_merged_upsample (void) +jsimd_can_h2v2_merged_upsample(void) { return 0; } GLOBAL(int) -jsimd_can_h2v1_merged_upsample (void) +jsimd_can_h2v1_merged_upsample(void) { return 0; } GLOBAL(void) -jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) { } GLOBAL(void) -jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) { } GLOBAL(int) -jsimd_can_convsamp (void) +jsimd_can_convsamp(void) { return 0; } GLOBAL(int) -jsimd_can_convsamp_float (void) +jsimd_can_convsamp_float(void) { return 0; } GLOBAL(void) -jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, - DCTELEM *workspace) +jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) { } GLOBAL(void) -jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, - FAST_FLOAT *workspace) +jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) { } GLOBAL(int) -jsimd_can_fdct_islow (void) +jsimd_can_fdct_islow(void) { return 0; } GLOBAL(int) -jsimd_can_fdct_ifast (void) +jsimd_can_fdct_ifast(void) { return 0; } GLOBAL(int) -jsimd_can_fdct_float (void) +jsimd_can_fdct_float(void) { return 0; } GLOBAL(void) -jsimd_fdct_islow (DCTELEM *data) +jsimd_fdct_islow(DCTELEM *data) { } GLOBAL(void) -jsimd_fdct_ifast (DCTELEM *data) +jsimd_fdct_ifast(DCTELEM *data) { } GLOBAL(void) -jsimd_fdct_float (FAST_FLOAT *data) +jsimd_fdct_float(FAST_FLOAT *data) { } GLOBAL(int) -jsimd_can_quantize (void) +jsimd_can_quantize(void) { return 0; } GLOBAL(int) -jsimd_can_quantize_float (void) +jsimd_can_quantize_float(void) { return 0; } GLOBAL(void) -jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, - DCTELEM *workspace) +jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) { } GLOBAL(void) -jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, - FAST_FLOAT *workspace) +jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) { } GLOBAL(int) -jsimd_can_idct_2x2 (void) +jsimd_can_idct_2x2(void) { return 0; } GLOBAL(int) -jsimd_can_idct_4x4 (void) +jsimd_can_idct_4x4(void) { return 0; } GLOBAL(int) -jsimd_can_idct_6x6 (void) +jsimd_can_idct_6x6(void) { return 0; } GLOBAL(int) -jsimd_can_idct_12x12 (void) +jsimd_can_idct_12x12(void) { return 0; } GLOBAL(void) -jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { } GLOBAL(void) -jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { } GLOBAL(void) -jsimd_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { } GLOBAL(void) -jsimd_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { } GLOBAL(int) -jsimd_can_idct_islow (void) +jsimd_can_idct_islow(void) { return 0; } GLOBAL(int) -jsimd_can_idct_ifast (void) +jsimd_can_idct_ifast(void) { return 0; } GLOBAL(int) -jsimd_can_idct_float (void) +jsimd_can_idct_float(void) { return 0; } GLOBAL(void) -jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { } GLOBAL(void) -jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { } GLOBAL(void) -jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { } GLOBAL(int) -jsimd_can_huff_encode_one_block (void) +jsimd_can_huff_encode_one_block(void) { return 0; } -GLOBAL(JOCTET*) -jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, - int last_dc_val, c_derived_tbl *dctbl, - c_derived_tbl *actbl) +GLOBAL(JOCTET *) +jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) { return NULL; } + +GLOBAL(int) +jsimd_can_encode_mcu_AC_first_prepare(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *values, size_t *zerobits) +{ +} + +GLOBAL(int) +jsimd_can_encode_mcu_AC_refine_prepare(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *absvalues, size_t *bits) +{ + return 0; +} diff --git a/jsimddct.h b/jsimddct.h index b19ab48..55ee8cf 100644 --- a/jsimddct.h +++ b/jsimddct.h @@ -9,66 +9,62 @@ * */ -EXTERN(int) jsimd_can_convsamp (void); -EXTERN(int) jsimd_can_convsamp_float (void); +EXTERN(int) jsimd_can_convsamp(void); +EXTERN(int) jsimd_can_convsamp_float(void); -EXTERN(void) jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, - DCTELEM *workspace); -EXTERN(void) jsimd_convsamp_float (JSAMPARRAY sample_data, - JDIMENSION start_col, - FAST_FLOAT *workspace); +EXTERN(void) jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace); +EXTERN(void) jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace); -EXTERN(int) jsimd_can_fdct_islow (void); -EXTERN(int) jsimd_can_fdct_ifast (void); -EXTERN(int) jsimd_can_fdct_float (void); +EXTERN(int) jsimd_can_fdct_islow(void); +EXTERN(int) jsimd_can_fdct_ifast(void); +EXTERN(int) jsimd_can_fdct_float(void); -EXTERN(void) jsimd_fdct_islow (DCTELEM *data); -EXTERN(void) jsimd_fdct_ifast (DCTELEM *data); -EXTERN(void) jsimd_fdct_float (FAST_FLOAT *data); +EXTERN(void) jsimd_fdct_islow(DCTELEM *data); +EXTERN(void) jsimd_fdct_ifast(DCTELEM *data); +EXTERN(void) jsimd_fdct_float(FAST_FLOAT *data); -EXTERN(int) jsimd_can_quantize (void); -EXTERN(int) jsimd_can_quantize_float (void); +EXTERN(int) jsimd_can_quantize(void); +EXTERN(int) jsimd_can_quantize_float(void); -EXTERN(void) jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, - DCTELEM *workspace); -EXTERN(void) jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, - FAST_FLOAT *workspace); +EXTERN(void) jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace); +EXTERN(void) jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace); -EXTERN(int) jsimd_can_idct_2x2 (void); -EXTERN(int) jsimd_can_idct_4x4 (void); -EXTERN(int) jsimd_can_idct_6x6 (void); -EXTERN(int) jsimd_can_idct_12x12 (void); +EXTERN(int) jsimd_can_idct_2x2(void); +EXTERN(int) jsimd_can_idct_4x4(void); +EXTERN(int) jsimd_can_idct_6x6(void); +EXTERN(int) jsimd_can_idct_12x12(void); -EXTERN(void) jsimd_idct_2x2 (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); -EXTERN(void) jsimd_idct_4x4 (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); -EXTERN(void) jsimd_idct_6x6 (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); -EXTERN(void) jsimd_idct_12x12 (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); +EXTERN(void) jsimd_idct_2x2(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jsimd_idct_4x4(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jsimd_idct_6x6(j_decompress_ptr cinfo, + jpeg_component_info *compptr, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col); +EXTERN(void) jsimd_idct_12x12(j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); -EXTERN(int) jsimd_can_idct_islow (void); -EXTERN(int) jsimd_can_idct_ifast (void); -EXTERN(int) jsimd_can_idct_float (void); +EXTERN(int) jsimd_can_idct_islow(void); +EXTERN(int) jsimd_can_idct_ifast(void); +EXTERN(int) jsimd_can_idct_float(void); -EXTERN(void) jsimd_idct_islow (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); -EXTERN(void) jsimd_idct_ifast (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); -EXTERN(void) jsimd_idct_float (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); +EXTERN(void) jsimd_idct_islow(j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_ifast(j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_float(j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); diff --git a/jstdhuff.c b/jstdhuff.c index e202e8e..036d649 100644 --- a/jstdhuff.c +++ b/jstdhuff.c @@ -17,8 +17,8 @@ */ LOCAL(void) -add_huff_table (j_common_ptr cinfo, - JHUFF_TBL **htblptr, const UINT8 *bits, const UINT8 *val) +add_huff_table(j_common_ptr cinfo, JHUFF_TBL **htblptr, const UINT8 *bits, + const UINT8 *val) /* Define a Huffman table */ { int nsymbols, len; @@ -50,71 +50,79 @@ add_huff_table (j_common_ptr cinfo, LOCAL(void) -std_huff_tables (j_common_ptr cinfo) +std_huff_tables(j_common_ptr cinfo) /* Set up the standard Huffman tables (cf. JPEG standard section K.3) */ /* IMPORTANT: these are only valid for 8-bit data precision! */ { JHUFF_TBL **dc_huff_tbl_ptrs, **ac_huff_tbl_ptrs; - static const UINT8 bits_dc_luminance[17] = - { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; - static const UINT8 val_dc_luminance[] = - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; + static const UINT8 bits_dc_luminance[17] = { + /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 + }; + static const UINT8 val_dc_luminance[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 + }; - static const UINT8 bits_dc_chrominance[17] = - { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; - static const UINT8 val_dc_chrominance[] = - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; + static const UINT8 bits_dc_chrominance[17] = { + /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 + }; + static const UINT8 val_dc_chrominance[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 + }; - static const UINT8 bits_ac_luminance[17] = - { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d }; - static const UINT8 val_ac_luminance[] = - { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, - 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, - 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, - 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, - 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, - 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, - 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, - 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, - 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, - 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, - 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, - 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, - 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, - 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, - 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, - 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, - 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, - 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, - 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, - 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, - 0xf9, 0xfa }; + static const UINT8 bits_ac_luminance[17] = { + /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d + }; + static const UINT8 val_ac_luminance[] = { + 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, + 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, + 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, + 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, + 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, + 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, + 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, + 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, + 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, + 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, + 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, + 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, + 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, + 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, + 0xf9, 0xfa + }; - static const UINT8 bits_ac_chrominance[17] = - { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 }; - static const UINT8 val_ac_chrominance[] = - { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, - 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, - 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, - 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, - 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, - 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, - 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, - 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, - 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, - 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, - 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, - 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, - 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, - 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, - 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, - 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, - 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, - 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, - 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, - 0xf9, 0xfa }; + static const UINT8 bits_ac_chrominance[17] = { + /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 + }; + static const UINT8 val_ac_chrominance[] = { + 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, + 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, + 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, + 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, + 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, + 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, + 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, + 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, + 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, + 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, + 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, + 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, + 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, + 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, + 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, + 0xf9, 0xfa + }; if (cinfo->is_decompressor) { dc_huff_tbl_ptrs = ((j_decompress_ptr)cinfo)->dc_huff_tbl_ptrs; diff --git a/jutils.c b/jutils.c index f9d3502..5c5bb17 100644 --- a/jutils.c +++ b/jutils.c @@ -53,7 +53,7 @@ const int jpeg_zigzag_order[DCTSIZE2] = { * fake entries. */ -const int jpeg_natural_order[DCTSIZE2+16] = { +const int jpeg_natural_order[DCTSIZE2 + 16] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, @@ -72,7 +72,7 @@ const int jpeg_natural_order[DCTSIZE2+16] = { */ GLOBAL(long) -jdiv_round_up (long a, long b) +jdiv_round_up(long a, long b) /* Compute a/b rounded up to next integer, ie, ceil(a/b) */ /* Assumes a >= 0, b > 0 */ { @@ -81,7 +81,7 @@ jdiv_round_up (long a, long b) GLOBAL(long) -jround_up (long a, long b) +jround_up(long a, long b) /* Compute a rounded up to next multiple of b, ie, ceil(a/b)*b */ /* Assumes a >= 0, b > 0 */ { @@ -91,9 +91,9 @@ jround_up (long a, long b) GLOBAL(void) -jcopy_sample_rows (JSAMPARRAY input_array, int source_row, - JSAMPARRAY output_array, int dest_row, - int num_rows, JDIMENSION num_cols) +jcopy_sample_rows(JSAMPARRAY input_array, int source_row, + JSAMPARRAY output_array, int dest_row, int num_rows, + JDIMENSION num_cols) /* Copy some rows of samples from one place to another. * num_rows rows are copied from input_array[source_row++] * to output_array[dest_row++]; these areas may overlap for duplication. @@ -101,7 +101,7 @@ jcopy_sample_rows (JSAMPARRAY input_array, int source_row, */ { register JSAMPROW inptr, outptr; - register size_t count = (size_t) (num_cols * sizeof(JSAMPLE)); + register size_t count = (size_t)(num_cols * sizeof(JSAMPLE)); register int row; input_array += source_row; @@ -116,8 +116,8 @@ jcopy_sample_rows (JSAMPARRAY input_array, int source_row, GLOBAL(void) -jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row, - JDIMENSION num_blocks) +jcopy_block_row(JBLOCKROW input_row, JBLOCKROW output_row, + JDIMENSION num_blocks) /* Copy a row of coefficient blocks from one place to another. */ { MEMCOPY(output_row, input_row, num_blocks * (DCTSIZE2 * sizeof(JCOEF))); @@ -125,7 +125,7 @@ jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row, GLOBAL(void) -jzero_far (void *target, size_t bytestozero) +jzero_far(void *target, size_t bytestozero) /* Zero out a chunk of memory. */ /* This might be sample-array data, block-array data, or alloc_large data. */ { diff --git a/jversion.h b/jversion.h index 7e44eaa..2039f44 100644 --- a/jversion.h +++ b/jversion.h @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2010, 2012-2017, D. R. Commander. + * Copyright (C) 2010, 2012-2018, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -35,15 +35,18 @@ * their code */ -#define JCOPYRIGHT "Copyright (C) 2009-2017 D. R. Commander\n" \ - "Copyright (C) 2011-2016 Siarhei Siamashka\n" \ - "Copyright (C) 2015-2016 Matthieu Darbois\n" \ - "Copyright (C) 2015 Google, Inc.\n" \ - "Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \ - "Copyright (C) 2013 Linaro Limited\n" \ - "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \ - "Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \ - "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \ - "Copyright (C) 1991-2016 Thomas G. Lane, Guido Vollbeding" \ - -#define JCOPYRIGHT_SHORT "Copyright (C) 1991-2017 The libjpeg-turbo Project and many others" +#define JCOPYRIGHT \ + "Copyright (C) 2009-2018 D. R. Commander\n" \ + "Copyright (C) 2011-2016 Siarhei Siamashka\n" \ + "Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \ + "Copyright (C) 2015 Intel Corporation\n" \ + "Copyright (C) 2015 Google, Inc.\n" \ + "Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \ + "Copyright (C) 2013 Linaro Limited\n" \ + "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \ + "Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \ + "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \ + "Copyright (C) 1991-2016 Thomas G. Lane, Guido Vollbeding" + +#define JCOPYRIGHT_SHORT \ + "Copyright (C) 1991-2018 The libjpeg-turbo Project and many others" diff --git a/libjpeg.txt b/libjpeg.txt index 5181afc..c50cf90 100644 --- a/libjpeg.txt +++ b/libjpeg.txt @@ -3,7 +3,7 @@ USING THE IJG JPEG LIBRARY This file was part of the Independent JPEG Group's software: Copyright (C) 1994-2013, Thomas G. Lane, Guido Vollbeding. libjpeg-turbo Modifications: -Copyright (C) 2010, 2014-2017, D. R. Commander. +Copyright (C) 2010, 2014-2018, D. R. Commander. Copyright (C) 2015, Google, Inc. For conditions of distribution and use, see the accompanying README.ijg file. @@ -11,7 +11,7 @@ For conditions of distribution and use, see the accompanying README.ijg file. This file describes how to use the IJG JPEG library within an application program. Read it if you want to write a program that uses the library. -The file example.c provides heavily commented skeleton code for calling the +The file example.txt provides heavily commented skeleton code for calling the JPEG library. Also see jpeglib.h (the include file to be used by application programs) for full details about data structures and function parameter lists. The library source code, of course, is the ultimate reference. @@ -47,6 +47,7 @@ Advanced features: Buffered-image mode Abbreviated datastreams and multiple images Special markers + ICC profiles Raw (downsampled) image data Really raw data: DCT coefficients Progress monitoring @@ -387,13 +388,13 @@ to the total image height. In most applications it is convenient to pass just one or a few scanlines at a time. The expected format for the passed data is discussed under "Data formats", above. -Image data should be written in top-to-bottom scanline order. The JPEG spec -contains some weasel wording about how top and bottom are application-defined -terms (a curious interpretation of the English language...) but if you want -your files to be compatible with everyone else's, you WILL use top-to-bottom -order. If the source data must be read in bottom-to-top order, you can use -the JPEG library's virtual array mechanism to invert the data efficiently. -Examples of this can be found in the sample application cjpeg. +Image data should be written in top-to-bottom scanline order. +Rec. ITU-T T.81 | ISO/IEC 10918-1 says, "Applications determine which edges of +a source image are defined as top, bottom, left, and right." However, if you +want your files to be compatible with everyone else's, then top-to-bottom order +must be used. If the source data must be read in bottom-to-top order, then you +can use the JPEG library's virtual array mechanism to invert the data +efficiently. Examples of this can be found in the sample application cjpeg. The library maintains a count of the number of scanlines written so far in the next_scanline field of the JPEG object. Usually you can just use @@ -401,7 +402,7 @@ this variable as the loop counter, so that the loop test looks like "while (cinfo.next_scanline < cinfo.image_height)". Code for this step depends heavily on the way that you store the source data. -example.c shows the following code for the case of a full-size 2-D source +example.txt shows the following code for the case of a full-size 2-D source array containing 3-byte RGB pixels: JSAMPROW row_pointer[1]; /* pointer to a single row */ @@ -410,7 +411,7 @@ array containing 3-byte RGB pixels: row_stride = image_width * 3; /* JSAMPLEs per row in image_buffer */ while (cinfo.next_scanline < cinfo.image_height) { - row_pointer[0] = & image_buffer[cinfo.next_scanline * row_stride]; + row_pointer[0] = &image_buffer[cinfo.next_scanline * row_stride]; jpeg_write_scanlines(&cinfo, row_pointer, 1); } @@ -916,7 +917,8 @@ jpeg_set_quality (j_compress_ptr cinfo, int quality, boolean force_baseline) jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor, boolean force_baseline) Same as jpeg_set_quality() except that the generated tables are the - sample tables given in the JPEC spec section K.1, multiplied by the + sample tables given in Annex K (Clause K.1) of + Rec. ITU-T T.81 (1992) | ISO/IEC 10918-1:1994, multiplied by the specified scale factor (which is expressed as a percentage; thus scale_factor = 100 reproduces the spec's tables). Note that larger scale factors give lower quality. This entry point is useful for @@ -960,8 +962,8 @@ jpeg_simple_progression (j_compress_ptr cinfo) Compression parameters (cinfo fields) include: boolean arith_code - If TRUE, use arithmetic coding. - If FALSE, use Huffman coding. + If TRUE, use arithmetic coding. + If FALSE, use Huffman coding. J_DCT_METHOD dct_method Selects the algorithm used for the DCT step. Choices are: @@ -1436,7 +1438,7 @@ When the default error handler is used, any error detected inside the JPEG routines will cause a message to be printed on stderr, followed by exit(). You can supply your own error handling routines to override this behavior and to control the treatment of nonfatal warnings and trace/debug messages. -The file example.c illustrates the most common case, which is to have the +The file example.txt illustrates the most common case, which is to have the application regain control after an error rather than exiting. The JPEG library never writes any message directly; it always goes through @@ -1453,7 +1455,7 @@ You may, if you wish, simply replace the entire JPEG error handling module only replacing some of the routines depending on the behavior you need. This is accomplished by calling jpeg_std_error() as usual, but then overriding some of the method pointers in the jpeg_error_mgr struct, as illustrated by -example.c. +example.txt. All of the error handling routines will receive a pointer to the JPEG object (a j_common_ptr which points to either a jpeg_compress_struct or a @@ -1464,7 +1466,7 @@ additional data which is not known to the JPEG library or the standard error handler. The most convenient way to do this is to embed either the JPEG object or the jpeg_error_mgr struct in a larger structure that contains additional fields; then casting the passed pointer provides access to the -additional fields. Again, see example.c for one way to do it. (Beginning +additional fields. Again, see example.txt for one way to do it. (Beginning with IJG version 6b, there is also a void pointer "client_data" in each JPEG object, which the application can also use to find related data. The library does not touch client_data at all.) @@ -1973,7 +1975,7 @@ and how to display each pass. The simplest approach to displaying progressive images is to do one display pass for each scan appearing in the input file. In this case the outer loop condition is typically - while (! jpeg_input_complete(&cinfo)) + while (!jpeg_input_complete(&cinfo)) and the start-output call should read jpeg_start_output(&cinfo, cinfo.input_scan_number); The second parameter to jpeg_start_output() indicates which scan of the input @@ -2094,7 +2096,7 @@ something like this: jpeg_start_output(&cinfo, cinfo.input_scan_number); ... jpeg_finish_output() - } while (! final_pass); + } while (!final_pass); rather than quitting as soon as jpeg_input_complete() returns TRUE. This arrangement makes it simple to use higher-quality decoding parameters for the final pass. But if you don't want to use special parameters for @@ -2633,6 +2635,44 @@ A simple example of an external COM processor can be found in djpeg.c. Also, see jpegtran.c for an example of using jpeg_save_markers. +ICC profiles +------------ + +Two functions are provided for writing and reading International Color +Consortium (ICC) device profiles embedded in JFIF JPEG image files: + + void jpeg_write_icc_profile (j_compress_ptr cinfo, + const JOCTET *icc_data_ptr, + unsigned int icc_data_len); + boolean jpeg_read_icc_profile (j_decompress_ptr cinfo, + JOCTET **icc_data_ptr, + unsigned int *icc_data_len); + +The ICC has defined a standard for including such data in JPEG "APP2" markers. +The aforementioned functions do not know anything about the internal structure +of the ICC profile data; they just know how to embed the profile data into a +JPEG file while writing it, or to extract the profile data from a JPEG file +while reading it. + +jpeg_write_icc_profile() must be called after calling jpeg_start_compress() and +before the first call to jpeg_write_scanlines() or jpeg_write_raw_data(). This +ordering ensures that the APP2 marker(s) will appear after the SOI and JFIF or +Adobe markers, but before all other data. + +jpeg_read_icc_profile() returns TRUE if an ICC profile was found and FALSE +otherwise. If an ICC profile was found, then the function will allocate a +memory region containing the profile and will return a pointer to that memory +region in *icc_data_ptr, as well as the length of the region in *icc_data_len. +This memory region is allocated by the library using malloc() and must be freed +by the caller using free() when the memory region is no longer needed. Callers +wishing to use jpeg_read_icc_profile() must call + + jpeg_save_markers(cinfo, JPEG_APP0 + 2, 0xFFFF); + +prior to calling jpeg_read_header(). jpeg_read_icc_profile() can be called at +any point between jpeg_read_header() and jpeg_finish_decompress(). + + Raw (downsampled) image data ---------------------------- @@ -2929,7 +2969,7 @@ object is destroyed. Most data is allocated "per image" and is freed by jpeg_finish_compress, jpeg_finish_decompress, or jpeg_abort. You can call the memory manager yourself to allocate structures that will automatically be freed at these times. Typical code for this is - ptr = (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, size); + ptr = (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, size); Use JPOOL_PERMANENT to get storage that lasts as long as the JPEG object. Use alloc_large instead of alloc_small for anything bigger than a few Kbytes. There are also alloc_sarray and alloc_barray routines that automatically diff --git a/md5/Makefile.am b/md5/Makefile.am deleted file mode 100644 index b36f019..0000000 --- a/md5/Makefile.am +++ /dev/null @@ -1,4 +0,0 @@ -noinst_PROGRAMS = md5cmp - -md5cmp_SOURCES = md5cmp.c md5.c md5hl.c md5.h -md5cmp_CFLAGS = -I$(srcdir) diff --git a/md5/md5.c b/md5/md5.c index 4b5ba5e..9ef5daa 100644 --- a/md5/md5.c +++ b/md5/md5.c @@ -1,340 +1,275 @@ /* - * MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm + * This code implements the MD5 message-digest algorithm. + * The algorithm is due to Ron Rivest. This code was + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. * - * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All - * rights reserved. + * Equivalent code is available from RSA Data Security, Inc. + * This code has been tested against that, and is equivalent, + * except that you don't need to include two pages of legalese + * with every copy. * - * License to copy and use this software is granted provided that it - * is identified as the "RSA Data Security, Inc. MD5 Message-Digest - * Algorithm" in all material mentioning or referencing this software - * or this function. + * To compute the message digest of a chunk of bytes, declare an + * MD5Context structure, pass it to MD5Init, call MD5Update as + * needed on buffers full of bytes, and then call MD5Final, which + * will fill a supplied 16-byte array with the digest. + * ---------------------------------------------------------------------------- + * libjpeg-turbo Modifications: + * Copyright (C)2018, D. R. Commander. All Rights Reserved. * - * License is also granted to make and use derivative works provided - * that such works are identified as "derived from the RSA Data - * Security, Inc. MD5 Message-Digest Algorithm" in all material - * mentioning or referencing the derived work. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: * - * RSA Data Security, Inc. makes no representations concerning either - * the merchantability of this software or the suitability of this - * software for any particular purpose. It is provided "as is" - * without express or implied warranty of any kind. + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. * - * These notices must be retained in any copies of any part of this - * documentation and/or software. - * - * This code is the same as the code published by RSA Inc. It has been - * edited for clarity and style only. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * ---------------------------------------------------------------------------- */ -#include -#include - -#include "./md5.h" - -#ifdef __amigaos4__ -#include -#define le32toh(x) (((x & 0xff) << 24) | \ - ((x & 0xff00) << 8) | \ - ((x & 0xff0000) >> 8) | \ - ((x & 0xff000000) >> 24)) -#define htole32(x) le32toh(x) -#endif - -static void MD5Transform(unsigned int [4], const unsigned char [64]); +#include /* for memcpy() */ +#include "md5.h" #if (BYTE_ORDER == LITTLE_ENDIAN) -#define Encode memcpy -#define Decode memcpy +#define byteReverse(buf, len) /* Nothing */ #else - -/* - * OS X doesn't have le32toh() or htole32() - */ -#ifdef __APPLE__ -#include -#define le32toh(x) OSSwapLittleToHostInt32(x) -#define htole32(x) OSSwapHostToLittleInt32(x) -#endif - /* - * Encodes input (unsigned int) into output (unsigned char). Assumes len is - * a multiple of 4. + * Note: this code is harmless on little-endian machines. */ - -static void -Encode (unsigned char *output, unsigned int *input, unsigned int len) +static void byteReverse(unsigned char *buf, unsigned int longs) { - unsigned int i; - unsigned int *op = (unsigned int *)output; - - for (i = 0; i < len / 4; i++) - op[i] = htole32(input[i]); + uint32 t; + do { + t = (uint32)((unsigned int)buf[3] << 8 | buf[2]) << 16 | + ((unsigned int)buf[1] << 8 | buf[0]); + *(uint32 *)buf = t; + buf += 4; + } while (--longs); } +#endif /* - * Decodes input (unsigned char) into output (unsigned int). Assumes len is - * a multiple of 4. + * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious + * initialization constants. */ - -static void -Decode (unsigned int *output, const unsigned char *input, unsigned int len) +void MD5Init(struct MD5Context *ctx) { - unsigned int i; - const unsigned int *ip = (const unsigned int *)input; + ctx->buf[0] = 0x67452301; + ctx->buf[1] = 0xefcdab89; + ctx->buf[2] = 0x98badcfe; + ctx->buf[3] = 0x10325476; - for (i = 0; i < len / 4; i++) - output[i] = le32toh(ip[i]); + ctx->bits[0] = 0; + ctx->bits[1] = 0; } -#endif - -static unsigned char PADDING[64] = { - 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* F, G, H and I are basic MD5 functions. */ -#define F(x, y, z) (((x) & (y)) | ((~x) & (z))) -#define G(x, y, z) (((x) & (z)) | ((y) & (~z))) -#define H(x, y, z) ((x) ^ (y) ^ (z)) -#define I(x, y, z) ((y) ^ ((x) | (~z))) - -/* ROTATE_LEFT rotates x left n bits. */ -#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) /* - * FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4. - * Rotation is separate from addition to prevent recomputation. + * Update context to reflect the concatenation of another buffer full + * of bytes. */ -#define FF(a, b, c, d, x, s, ac) { \ - (a) += F ((b), (c), (d)) + (x) + (unsigned int)(ac); \ - (a) = ROTATE_LEFT ((a), (s)); \ - (a) += (b); \ - } -#define GG(a, b, c, d, x, s, ac) { \ - (a) += G ((b), (c), (d)) + (x) + (unsigned int)(ac); \ - (a) = ROTATE_LEFT ((a), (s)); \ - (a) += (b); \ - } -#define HH(a, b, c, d, x, s, ac) { \ - (a) += H ((b), (c), (d)) + (x) + (unsigned int)(ac); \ - (a) = ROTATE_LEFT ((a), (s)); \ - (a) += (b); \ - } -#define II(a, b, c, d, x, s, ac) { \ - (a) += I ((b), (c), (d)) + (x) + (unsigned int)(ac); \ - (a) = ROTATE_LEFT ((a), (s)); \ - (a) += (b); \ - } - -/* MD5 initialization. Begins an MD5 operation, writing a new context. */ - -void -MD5Init (context) - MD5_CTX *context; +void MD5Update(struct MD5Context *ctx, unsigned char *buf, unsigned int len) { - - context->count[0] = context->count[1] = 0; - - /* Load magic initialization constants. */ - context->state[0] = 0x67452301; - context->state[1] = 0xefcdab89; - context->state[2] = 0x98badcfe; - context->state[3] = 0x10325476; + uint32 t; + + /* Update bitcount */ + + t = ctx->bits[0]; + if ((ctx->bits[0] = t + ((uint32)len << 3)) < t) + ctx->bits[1]++; /* Carry from low to high */ + ctx->bits[1] += len >> 29; + + t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ + + /* Handle any leading odd-sized chunks */ + + if (t) { + unsigned char *p = (unsigned char *)ctx->in + t; + + t = 64 - t; + if (len < t) { + memcpy(p, buf, len); + return; + } + memcpy(p, buf, t); + byteReverse(ctx->in, 16); + MD5Transform(ctx->buf, (uint32 *)ctx->in); + buf += t; + len -= t; + } + /* Process data in 64-byte chunks */ + + while (len >= 64) { + memcpy(ctx->in, buf, 64); + byteReverse(ctx->in, 16); + MD5Transform(ctx->buf, (uint32 *)ctx->in); + buf += 64; + len -= 64; + } + + /* Handle any remaining bytes of data. */ + + memcpy(ctx->in, buf, len); } /* - * MD5 block update operation. Continues an MD5 message-digest - * operation, processing another message block, and updating the - * context. + * Final wrapup - pad to 64-byte boundary with the bit pattern + * 1 0* (64-bit count of bits processed, MSB-first) */ - -void -MD5Update (context, in, inputLen) - MD5_CTX *context; - const void *in; - unsigned int inputLen; +void MD5Final(unsigned char digest[16], struct MD5Context *ctx) { - unsigned int i, idx, partLen; - const unsigned char *input = in; - - /* Compute number of bytes mod 64 */ - idx = (unsigned int)((context->count[0] >> 3) & 0x3F); - - /* Update number of bits */ - if ((context->count[0] += ((unsigned int)inputLen << 3)) - < ((unsigned int)inputLen << 3)) - context->count[1]++; - context->count[1] += ((unsigned int)inputLen >> 29); - - partLen = 64 - idx; - - /* Transform as many times as possible. */ - if (inputLen >= partLen) { - memcpy((void *)&context->buffer[idx], (const void *)input, - partLen); - MD5Transform (context->state, context->buffer); - - for (i = partLen; i + 63 < inputLen; i += 64) - MD5Transform (context->state, &input[i]); - - idx = 0; - } - else - i = 0; - - /* Buffer remaining input */ - memcpy ((void *)&context->buffer[idx], (const void *)&input[i], - inputLen-i); + unsigned int count; + unsigned char *p; + uint32 *in32 = (uint32 *)ctx->in; + + /* Compute number of bytes mod 64 */ + count = (ctx->bits[0] >> 3) & 0x3F; + + /* Set the first char of padding to 0x80. This is safe since there is + always at least one byte free */ + p = ctx->in + count; + *p++ = 0x80; + + /* Bytes of padding needed to make 64 bytes */ + count = 64 - 1 - count; + + /* Pad out to 56 mod 64 */ + if (count < 8) { + /* Two lots of padding: Pad the first block to 64 bytes */ + memset(p, 0, count); + byteReverse(ctx->in, 16); + MD5Transform(ctx->buf, (uint32 *)ctx->in); + + /* Now fill the next block with 56 bytes */ + memset(ctx->in, 0, 56); + } else { + /* Pad block to 56 bytes */ + memset(p, 0, count - 8); + } + byteReverse(ctx->in, 14); + + /* Append length in bits and transform */ + in32[14] = ctx->bits[0]; + in32[15] = ctx->bits[1]; + + MD5Transform(ctx->buf, (uint32 *)ctx->in); + byteReverse((unsigned char *)ctx->buf, 4); + memcpy(digest, ctx->buf, 16); + memset(ctx, 0, sizeof(struct MD5Context)); /* In case it's sensitive */ } -/* - * MD5 padding. Adds padding followed by original length. - */ - -void -MD5Pad (context) - MD5_CTX *context; -{ - unsigned char bits[8]; - unsigned int idx, padLen; - /* Save number of bits */ - Encode (bits, context->count, 8); +/* The four core functions - F1 is optimized somewhat */ - /* Pad out to 56 mod 64. */ - idx = (unsigned int)((context->count[0] >> 3) & 0x3f); - padLen = (idx < 56) ? (56 - idx) : (120 - idx); - MD5Update (context, PADDING, padLen); +/* #define F1(x, y, z) (x & y | ~x & z) */ +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) - /* Append length (before padding) */ - MD5Update (context, bits, 8); -} +/* This is the central step in the MD5 algorithm. */ +#define MD5STEP(f, w, x, y, z, data, s) \ + ( w += f(x, y, z) + data, w = w << s | w >> (32 - s), w += x ) /* - * MD5 finalization. Ends an MD5 message-digest operation, writing the - * the message digest and zeroizing the context. + * The core of the MD5 algorithm, this alters an existing MD5 hash to + * reflect the addition of 16 longwords of new data. MD5Update blocks + * the data and converts bytes into longwords for this routine. */ - -void -MD5Final (digest, context) - unsigned char digest[16]; - MD5_CTX *context; -{ - /* Do padding. */ - MD5Pad (context); - - /* Store state in digest */ - Encode (digest, context->state, 16); - - /* Zeroize sensitive information. */ - memset ((void *)context, 0, sizeof (*context)); -} - -/* MD5 basic transformation. Transforms state based on block. */ - -static void -MD5Transform (state, block) - unsigned int state[4]; - const unsigned char block[64]; +void MD5Transform(uint32 buf[4], uint32 in[16]) { - unsigned int a = state[0], b = state[1], c = state[2], d = state[3], x[16]; - - Decode (x, block, 64); - - /* Round 1 */ -#define S11 7 -#define S12 12 -#define S13 17 -#define S14 22 - FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ - FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */ - FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */ - FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */ - FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */ - FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */ - FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */ - FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */ - FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */ - FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */ - FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */ - FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */ - FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */ - FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */ - FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */ - FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */ - - /* Round 2 */ -#define S21 5 -#define S22 9 -#define S23 14 -#define S24 20 - GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */ - GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */ - GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */ - GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */ - GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */ - GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */ - GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */ - GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */ - GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */ - GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */ - GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */ - GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */ - GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */ - GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */ - GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */ - GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */ - - /* Round 3 */ -#define S31 4 -#define S32 11 -#define S33 16 -#define S34 23 - HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */ - HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */ - HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ - HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ - HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */ - HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */ - HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */ - HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ - HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ - HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */ - HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */ - HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */ - HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */ - HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ - HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ - HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */ - - /* Round 4 */ -#define S41 6 -#define S42 10 -#define S43 15 -#define S44 21 - II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */ - II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */ - II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ - II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */ - II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ - II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */ - II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */ - II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */ - II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */ - II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */ - II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */ - II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */ - II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */ - II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */ - II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */ - II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */ - - state[0] += a; - state[1] += b; - state[2] += c; - state[3] += d; - - /* Zeroize sensitive information. */ - memset ((void *)x, 0, sizeof (x)); + register uint32 a, b, c, d; + + a = buf[0]; + b = buf[1]; + c = buf[2]; + d = buf[3]; + + MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; } diff --git a/md5/md5.h b/md5/md5.h index 551e252..6d745e9 100644 --- a/md5/md5.h +++ b/md5/md5.h @@ -1,49 +1,57 @@ -/* MD5.H - header file for MD5C.C - * $FreeBSD$ +/* + * libjpeg-turbo Modifications: + * Copyright (C)2018 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. */ -/*- - Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All -rights reserved. +#ifndef MD5_H +#define MD5_H -License to copy and use this software is granted provided that it -is identified as the "RSA Data Security, Inc. MD5 Message-Digest -Algorithm" in all material mentioning or referencing this software -or this function. +#include +#ifdef __amigaos4__ +#include +#endif -License is also granted to make and use derivative works provided -that such works are identified as "derived from the RSA Data -Security, Inc. MD5 Message-Digest Algorithm" in all material -mentioning or referencing the derived work. +/* On machines where "long" is 64 bits, we need to declare + uint32 as something guaranteed to be 32 bits. */ -RSA Data Security, Inc. makes no representations concerning either -the merchantability of this software or the suitability of this -software for any particular purpose. It is provided "as is" -without express or implied warranty of any kind. +typedef unsigned int uint32; -These notices must be retained in any copies of any part of this -documentation and/or software. - */ - -#ifndef _SYS_MD5_H_ -#define _SYS_MD5_H_ - -#define MD5_BLOCK_LENGTH 64 -#define MD5_DIGEST_LENGTH 16 -#define MD5_DIGEST_STRING_LENGTH (MD5_DIGEST_LENGTH * 2 + 1) - -/* MD5 context. */ typedef struct MD5Context { - unsigned int state[4]; /* state (ABCD) */ - unsigned int count[2]; /* number of bits, modulo 2^64 (lsb first) */ - unsigned char buffer[64]; /* input buffer */ + uint32 buf[4]; + uint32 bits[2]; + unsigned char in[64]; } MD5_CTX; -void MD5Init (MD5_CTX *); -void MD5Update (MD5_CTX *, const void *, unsigned int); -void MD5Final (unsigned char [16], MD5_CTX *); -char * MD5End(MD5_CTX *, char *); -char * MD5File(const char *, char *); -char * MD5FileChunk(const char *, char *, off_t, off_t); -char * MD5Data(const void *, unsigned int, char *); -#endif /* _SYS_MD5_H_ */ +extern void MD5Init(struct MD5Context *ctx); +extern void MD5Update(struct MD5Context *ctx, unsigned char *buf, + unsigned int len); +extern void MD5Final(unsigned char digest[16], struct MD5Context *ctx); +extern void MD5Transform(uint32 buf[4], uint32 in[16]); +extern char *MD5File(const char *, char *); +extern char *MD5FileChunk(const char *, char *, off_t, off_t); + +#endif /* !MD5_H */ diff --git a/md5/md5cmp.c b/md5/md5cmp.c index dfd60bd..42b94ce 100644 --- a/md5/md5cmp.c +++ b/md5/md5cmp.c @@ -28,33 +28,32 @@ #include #include -#include #include "./md5.h" #include "../tjutil.h" int main(int argc, char *argv[]) { - char *md5sum = NULL, buf[65]; + char *md5sum = NULL, buf[65]; - if (argc < 3) { - fprintf(stderr, "USAGE: %s \n", argv[0]); - return -1; - } + if (argc < 3) { + fprintf(stderr, "USAGE: %s \n", argv[0]); + return -1; + } - if (strlen(argv[1]) != 32) - fprintf(stderr, "WARNING: MD5 hash size is wrong.\n"); + if (strlen(argv[1]) != 32) + fprintf(stderr, "WARNING: MD5 hash size is wrong.\n"); - md5sum = MD5File(argv[2], buf); - if (!md5sum) { - perror("Could not obtain MD5 sum"); - return -1; - } + md5sum = MD5File(argv[2], buf); + if (!md5sum) { + perror("Could not obtain MD5 sum"); + return -1; + } - if (!strcasecmp(md5sum, argv[1])) { - fprintf(stderr, "%s: OK\n", argv[2]); - return 0; - } else { - fprintf(stderr, "%s: FAILED. Checksum is %s\n", argv[2], md5sum); - return -1; - } + if (!strcasecmp(md5sum, argv[1])) { + fprintf(stderr, "%s: OK\n", argv[2]); + return 0; + } else { + fprintf(stderr, "%s: FAILED. Checksum is %s\n", argv[2], md5sum); + return -1; + } } diff --git a/md5/md5hl.c b/md5/md5hl.c index 983ea76..ecd2e23 100644 --- a/md5/md5hl.c +++ b/md5/md5hl.c @@ -1,12 +1,36 @@ -/* mdXhl.c * ---------------------------------------------------------------------------- +/* mdXhl.c + * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you * can do whatever you want with this stuff. If we meet some day, and you think * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * libjpeg-turbo Modifications: - * Copyright (C) 2016, D. R. Commander. - * Modifications are under the same license as the original code (see above) + * Copyright (C)2016, 2018 D. R. Commander. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. * ---------------------------------------------------------------------------- */ @@ -15,11 +39,11 @@ #include #ifdef _WIN32 #include -#define close _close -#define fstat _fstat -#define lseek _lseek -#define read _read -#define stat _stat +#define close _close +#define fstat _fstat +#define lseek _lseek +#define read _read +#define stat _stat #else #include #endif @@ -28,87 +52,83 @@ #include #include -#define LENGTH 16 +#define LENGTH 16 #include "./md5.h" -char * -MD5End(MD5_CTX *ctx, char *buf) +char *MD5End(MD5_CTX *ctx, char *buf) { - int i; - unsigned char digest[LENGTH]; - static const char hex[]="0123456789abcdef"; + int i; + unsigned char digest[LENGTH]; + static const char hex[] = "0123456789abcdef"; - if (!buf) - buf = malloc(2*LENGTH + 1); - if (!buf) - return 0; - MD5Final(digest, ctx); - for (i = 0; i < LENGTH; i++) { - buf[i+i] = hex[digest[i] >> 4]; - buf[i+i+1] = hex[digest[i] & 0x0f]; - } - buf[i+i] = '\0'; - return buf; + if (!buf) + buf = malloc(2 * LENGTH + 1); + if (!buf) + return 0; + MD5Final(digest, ctx); + for (i = 0; i < LENGTH; i++) { + buf[i + i] = hex[digest[i] >> 4]; + buf[i + i + 1] = hex[digest[i] & 0x0f]; + } + buf[i + i] = '\0'; + return buf; } -char * -MD5File(const char *filename, char *buf) +char *MD5File(const char *filename, char *buf) { - return (MD5FileChunk(filename, buf, 0, 0)); + return (MD5FileChunk(filename, buf, 0, 0)); } -char * -MD5FileChunk(const char *filename, char *buf, off_t ofs, off_t len) +char *MD5FileChunk(const char *filename, char *buf, off_t ofs, off_t len) { - unsigned char buffer[BUFSIZ]; - MD5_CTX ctx; - struct stat stbuf; - int f, i, e; - off_t n; + unsigned char buffer[BUFSIZ]; + MD5_CTX ctx; + struct stat stbuf; + int f, i, e; + off_t n; - MD5Init(&ctx); + MD5Init(&ctx); #if _WIN32 - f = _open(filename, O_RDONLY|O_BINARY); + f = _open(filename, O_RDONLY | O_BINARY); #else - f = open(filename, O_RDONLY); + f = open(filename, O_RDONLY); #endif - if (f < 0) - return 0; - if (fstat(f, &stbuf) < 0) - return 0; - if (ofs > stbuf.st_size) - ofs = stbuf.st_size; - if ((len == 0) || (len > stbuf.st_size - ofs)) - len = stbuf.st_size - ofs; - if (lseek(f, ofs, SEEK_SET) < 0) - return 0; - n = len; - i = 0; - while (n > 0) { - if (n > sizeof(buffer)) - i = read(f, buffer, sizeof(buffer)); - else - i = read(f, buffer, n); - if (i < 0) - break; - MD5Update(&ctx, buffer, i); - n -= i; - } - e = errno; - close(f); - errno = e; - if (i < 0) - return 0; - return (MD5End(&ctx, buf)); + if (f < 0) + return 0; + if (fstat(f, &stbuf) < 0) + return 0; + if (ofs > stbuf.st_size) + ofs = stbuf.st_size; + if ((len == 0) || (len > stbuf.st_size - ofs)) + len = stbuf.st_size - ofs; + if (lseek(f, ofs, SEEK_SET) < 0) + return 0; + n = len; + i = 0; + while (n > 0) { + if (n > sizeof(buffer)) + i = read(f, buffer, sizeof(buffer)); + else + i = read(f, buffer, n); + if (i < 0) + break; + MD5Update(&ctx, buffer, i); + n -= i; + } + e = errno; + close(f); + errno = e; + if (i < 0) + return 0; + return (MD5End(&ctx, buf)); } -char * -MD5Data (const void *data, unsigned int len, char *buf) +char *MD5Data(const void *data, unsigned int len, char *buf) { - MD5_CTX ctx; + MD5_CTX ctx; - MD5Init(&ctx); - MD5Update(&ctx,data,len); - return (MD5End(&ctx, buf)); + MD5Init(&ctx); + MD5Update(&ctx, (unsigned char *)data, len); + return (MD5End(&ctx, buf)); } diff --git a/rdbmp.c b/rdbmp.c index eaa7086..51af237 100644 --- a/rdbmp.c +++ b/rdbmp.c @@ -3,10 +3,10 @@ * * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. - * Modified 2009-2010 by Guido Vollbeding. + * Modified 2009-2017 by Guido Vollbeding. * libjpeg-turbo Modifications: * Modified 2011 by Siarhei Siamashka. - * Copyright (C) 2015, D. R. Commander. + * Copyright (C) 2015, 2017-2018, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -26,6 +26,7 @@ * This code contributed by James Arthur Boucher. */ +#include "cmyk.h" #include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ #ifdef BMP_SUPPORTED @@ -35,19 +36,24 @@ #ifdef HAVE_UNSIGNED_CHAR typedef unsigned char U_CHAR; -#define UCH(x) ((int) (x)) +#define UCH(x) ((int)(x)) #else /* !HAVE_UNSIGNED_CHAR */ #ifdef __CHAR_UNSIGNED__ typedef char U_CHAR; -#define UCH(x) ((int) (x)) +#define UCH(x) ((int)(x)) #else typedef char U_CHAR; -#define UCH(x) ((int) (x) & 0xFF) +#define UCH(x) ((int)(x) & 0xFF) #endif #endif /* HAVE_UNSIGNED_CHAR */ -#define ReadOK(file,buffer,len) (JFREAD(file,buffer,len) == ((size_t) (len))) +#define ReadOK(file, buffer, len) \ + (JFREAD(file, buffer, len) == ((size_t)(len))) + +static int alpha_index[JPEG_NUMCS] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1 +}; /* Private version of data source object */ @@ -66,11 +72,23 @@ typedef struct _bmp_source_struct { JDIMENSION row_width; /* Physical width of scanlines in file */ int bits_per_pixel; /* remembers 8- or 24-bit format */ + int cmap_length; /* colormap length */ + + boolean use_inversion_array; /* TRUE = preload the whole image, which is + stored in bottom-up order, and feed it to + the calling program in top-down order + + FALSE = the calling program will maintain + its own image buffer and read the rows in + bottom-up order */ + + U_CHAR *iobuffer; /* I/O buffer (used to buffer a single row from + disk if use_inversion_array == FALSE) */ } bmp_source_struct; LOCAL(int) -read_byte (bmp_source_ptr sinfo) +read_byte(bmp_source_ptr sinfo) /* Read next byte from BMP file */ { register FILE *infile = sinfo->pub.input_file; @@ -83,33 +101,45 @@ read_byte (bmp_source_ptr sinfo) LOCAL(void) -read_colormap (bmp_source_ptr sinfo, int cmaplen, int mapentrysize) +read_colormap(bmp_source_ptr sinfo, int cmaplen, int mapentrysize) /* Read the colormap from a BMP file */ { - int i; + int i, gray = 1; switch (mapentrysize) { case 3: /* BGR format (occurs in OS/2 files) */ for (i = 0; i < cmaplen; i++) { - sinfo->colormap[2][i] = (JSAMPLE) read_byte(sinfo); - sinfo->colormap[1][i] = (JSAMPLE) read_byte(sinfo); - sinfo->colormap[0][i] = (JSAMPLE) read_byte(sinfo); + sinfo->colormap[2][i] = (JSAMPLE)read_byte(sinfo); + sinfo->colormap[1][i] = (JSAMPLE)read_byte(sinfo); + sinfo->colormap[0][i] = (JSAMPLE)read_byte(sinfo); + if (sinfo->colormap[2][i] != sinfo->colormap[1][i] || + sinfo->colormap[1][i] != sinfo->colormap[0][i]) + gray = 0; } break; case 4: /* BGR0 format (occurs in MS Windows files) */ for (i = 0; i < cmaplen; i++) { - sinfo->colormap[2][i] = (JSAMPLE) read_byte(sinfo); - sinfo->colormap[1][i] = (JSAMPLE) read_byte(sinfo); - sinfo->colormap[0][i] = (JSAMPLE) read_byte(sinfo); - (void) read_byte(sinfo); + sinfo->colormap[2][i] = (JSAMPLE)read_byte(sinfo); + sinfo->colormap[1][i] = (JSAMPLE)read_byte(sinfo); + sinfo->colormap[0][i] = (JSAMPLE)read_byte(sinfo); + (void)read_byte(sinfo); + if (sinfo->colormap[2][i] != sinfo->colormap[1][i] || + sinfo->colormap[1][i] != sinfo->colormap[0][i]) + gray = 0; } break; default: ERREXIT(sinfo->cinfo, JERR_BMP_BADCMAP); break; } + + if (sinfo->cinfo->in_color_space == JCS_UNKNOWN && gray) + sinfo->cinfo->in_color_space = JCS_GRAYSCALE; + + if (sinfo->cinfo->in_color_space == JCS_GRAYSCALE && !gray) + ERREXIT(sinfo->cinfo, JERR_BAD_IN_COLORSPACE); } @@ -121,30 +151,77 @@ read_colormap (bmp_source_ptr sinfo, int cmaplen, int mapentrysize) */ METHODDEF(JDIMENSION) -get_8bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_8bit_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* This version is for reading 8-bit colormap indexes */ { - bmp_source_ptr source = (bmp_source_ptr) sinfo; + bmp_source_ptr source = (bmp_source_ptr)sinfo; register JSAMPARRAY colormap = source->colormap; + int cmaplen = source->cmap_length; JSAMPARRAY image_ptr; register int t; register JSAMPROW inptr, outptr; register JDIMENSION col; - /* Fetch next row from virtual array */ - source->source_row--; - image_ptr = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, source->whole_image, - source->source_row, (JDIMENSION) 1, FALSE); + if (source->use_inversion_array) { + /* Fetch next row from virtual array */ + source->source_row--; + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr)cinfo, source->whole_image, + source->source_row, (JDIMENSION)1, FALSE); + inptr = image_ptr[0]; + } else { + if (!ReadOK(source->pub.input_file, source->iobuffer, source->row_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); + inptr = source->iobuffer; + } /* Expand the colormap indexes to real data */ - inptr = image_ptr[0]; outptr = source->pub.buffer[0]; - for (col = cinfo->image_width; col > 0; col--) { - t = GETJSAMPLE(*inptr++); - *outptr++ = colormap[0][t]; /* can omit GETJSAMPLE() safely */ - *outptr++ = colormap[1][t]; - *outptr++ = colormap[2][t]; + if (cinfo->in_color_space == JCS_GRAYSCALE) { + for (col = cinfo->image_width; col > 0; col--) { + t = GETJSAMPLE(*inptr++); + if (t >= cmaplen) + ERREXIT(cinfo, JERR_BMP_OUTOFRANGE); + *outptr++ = colormap[0][t]; + } + } else if (cinfo->in_color_space == JCS_CMYK) { + for (col = cinfo->image_width; col > 0; col--) { + t = GETJSAMPLE(*inptr++); + if (t >= cmaplen) + ERREXIT(cinfo, JERR_BMP_OUTOFRANGE); + rgb_to_cmyk(colormap[0][t], colormap[1][t], colormap[2][t], outptr, + outptr + 1, outptr + 2, outptr + 3); + outptr += 4; + } + } else { + register int rindex = rgb_red[cinfo->in_color_space]; + register int gindex = rgb_green[cinfo->in_color_space]; + register int bindex = rgb_blue[cinfo->in_color_space]; + register int aindex = alpha_index[cinfo->in_color_space]; + register int ps = rgb_pixelsize[cinfo->in_color_space]; + + if (aindex >= 0) { + for (col = cinfo->image_width; col > 0; col--) { + t = GETJSAMPLE(*inptr++); + if (t >= cmaplen) + ERREXIT(cinfo, JERR_BMP_OUTOFRANGE); + outptr[rindex] = colormap[0][t]; + outptr[gindex] = colormap[1][t]; + outptr[bindex] = colormap[2][t]; + outptr[aindex] = 0xFF; + outptr += ps; + } + } else { + for (col = cinfo->image_width; col > 0; col--) { + t = GETJSAMPLE(*inptr++); + if (t >= cmaplen) + ERREXIT(cinfo, JERR_BMP_OUTOFRANGE); + outptr[rindex] = colormap[0][t]; + outptr[gindex] = colormap[1][t]; + outptr[bindex] = colormap[2][t]; + outptr += ps; + } + } } return 1; @@ -152,30 +229,63 @@ get_8bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) METHODDEF(JDIMENSION) -get_24bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_24bit_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* This version is for reading 24-bit pixels */ { - bmp_source_ptr source = (bmp_source_ptr) sinfo; + bmp_source_ptr source = (bmp_source_ptr)sinfo; JSAMPARRAY image_ptr; register JSAMPROW inptr, outptr; register JDIMENSION col; - /* Fetch next row from virtual array */ - source->source_row--; - image_ptr = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, source->whole_image, - source->source_row, (JDIMENSION) 1, FALSE); + if (source->use_inversion_array) { + /* Fetch next row from virtual array */ + source->source_row--; + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr)cinfo, source->whole_image, + source->source_row, (JDIMENSION)1, FALSE); + inptr = image_ptr[0]; + } else { + if (!ReadOK(source->pub.input_file, source->iobuffer, source->row_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); + inptr = source->iobuffer; + } /* Transfer data. Note source values are in BGR order * (even though Microsoft's own documents say the opposite). */ - inptr = image_ptr[0]; outptr = source->pub.buffer[0]; - for (col = cinfo->image_width; col > 0; col--) { - outptr[2] = *inptr++; /* can omit GETJSAMPLE() safely */ - outptr[1] = *inptr++; - outptr[0] = *inptr++; - outptr += 3; + if (cinfo->in_color_space == JCS_EXT_BGR) { + MEMCOPY(outptr, inptr, source->row_width); + } else if (cinfo->in_color_space == JCS_CMYK) { + for (col = cinfo->image_width; col > 0; col--) { + /* can omit GETJSAMPLE() safely */ + JSAMPLE b = *inptr++, g = *inptr++, r = *inptr++; + rgb_to_cmyk(r, g, b, outptr, outptr + 1, outptr + 2, outptr + 3); + outptr += 4; + } + } else { + register int rindex = rgb_red[cinfo->in_color_space]; + register int gindex = rgb_green[cinfo->in_color_space]; + register int bindex = rgb_blue[cinfo->in_color_space]; + register int aindex = alpha_index[cinfo->in_color_space]; + register int ps = rgb_pixelsize[cinfo->in_color_space]; + + if (aindex >= 0) { + for (col = cinfo->image_width; col > 0; col--) { + outptr[bindex] = *inptr++; /* can omit GETJSAMPLE() safely */ + outptr[gindex] = *inptr++; + outptr[rindex] = *inptr++; + outptr[aindex] = 0xFF; + outptr += ps; + } + } else { + for (col = cinfo->image_width; col > 0; col--) { + outptr[bindex] = *inptr++; /* can omit GETJSAMPLE() safely */ + outptr[gindex] = *inptr++; + outptr[rindex] = *inptr++; + outptr += ps; + } + } } return 1; @@ -183,30 +293,66 @@ get_24bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) METHODDEF(JDIMENSION) -get_32bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_32bit_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* This version is for reading 32-bit pixels */ { - bmp_source_ptr source = (bmp_source_ptr) sinfo; + bmp_source_ptr source = (bmp_source_ptr)sinfo; JSAMPARRAY image_ptr; register JSAMPROW inptr, outptr; register JDIMENSION col; - /* Fetch next row from virtual array */ - source->source_row--; - image_ptr = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, source->whole_image, - source->source_row, (JDIMENSION) 1, FALSE); + if (source->use_inversion_array) { + /* Fetch next row from virtual array */ + source->source_row--; + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr)cinfo, source->whole_image, + source->source_row, (JDIMENSION)1, FALSE); + inptr = image_ptr[0]; + } else { + if (!ReadOK(source->pub.input_file, source->iobuffer, source->row_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); + inptr = source->iobuffer; + } + /* Transfer data. Note source values are in BGR order * (even though Microsoft's own documents say the opposite). */ - inptr = image_ptr[0]; outptr = source->pub.buffer[0]; - for (col = cinfo->image_width; col > 0; col--) { - outptr[2] = *inptr++; /* can omit GETJSAMPLE() safely */ - outptr[1] = *inptr++; - outptr[0] = *inptr++; - inptr++; /* skip the 4th byte (Alpha channel) */ - outptr += 3; + if (cinfo->in_color_space == JCS_EXT_BGRX || + cinfo->in_color_space == JCS_EXT_BGRA) { + MEMCOPY(outptr, inptr, source->row_width); + } else if (cinfo->in_color_space == JCS_CMYK) { + for (col = cinfo->image_width; col > 0; col--) { + /* can omit GETJSAMPLE() safely */ + JSAMPLE b = *inptr++, g = *inptr++, r = *inptr++; + rgb_to_cmyk(r, g, b, outptr, outptr + 1, outptr + 2, outptr + 3); + inptr++; /* skip the 4th byte (Alpha channel) */ + outptr += 4; + } + } else { + register int rindex = rgb_red[cinfo->in_color_space]; + register int gindex = rgb_green[cinfo->in_color_space]; + register int bindex = rgb_blue[cinfo->in_color_space]; + register int aindex = alpha_index[cinfo->in_color_space]; + register int ps = rgb_pixelsize[cinfo->in_color_space]; + + if (aindex >= 0) { + for (col = cinfo->image_width; col > 0; col--) { + outptr[bindex] = *inptr++; /* can omit GETJSAMPLE() safely */ + outptr[gindex] = *inptr++; + outptr[rindex] = *inptr++; + outptr[aindex] = *inptr++; + outptr += ps; + } + } else { + for (col = cinfo->image_width; col > 0; col--) { + outptr[bindex] = *inptr++; /* can omit GETJSAMPLE() safely */ + outptr[gindex] = *inptr++; + outptr[rindex] = *inptr++; + inptr++; /* skip the 4th byte (Alpha channel) */ + outptr += ps; + } + } } return 1; @@ -220,25 +366,24 @@ get_32bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ METHODDEF(JDIMENSION) -preload_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +preload_image(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) { - bmp_source_ptr source = (bmp_source_ptr) sinfo; + bmp_source_ptr source = (bmp_source_ptr)sinfo; register FILE *infile = source->pub.input_file; register JSAMPROW out_ptr; JSAMPARRAY image_ptr; JDIMENSION row; - cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + cd_progress_ptr progress = (cd_progress_ptr)cinfo->progress; /* Read the data into a virtual array in input-file row order. */ for (row = 0; row < cinfo->image_height; row++) { if (progress != NULL) { - progress->pub.pass_counter = (long) row; - progress->pub.pass_limit = (long) cinfo->image_height; - (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + progress->pub.pass_counter = (long)row; + progress->pub.pass_limit = (long)cinfo->image_height; + (*progress->pub.progress_monitor) ((j_common_ptr)cinfo); } image_ptr = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, source->whole_image, - row, (JDIMENSION) 1, TRUE); + ((j_common_ptr)cinfo, source->whole_image, row, (JDIMENSION)1, TRUE); out_ptr = image_ptr[0]; if (fread(out_ptr, 1, source->row_width, infile) != source->row_width) { if (feof(infile)) @@ -276,55 +421,59 @@ preload_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ METHODDEF(void) -start_input_bmp (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +start_input_bmp(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) { - bmp_source_ptr source = (bmp_source_ptr) sinfo; + bmp_source_ptr source = (bmp_source_ptr)sinfo; U_CHAR bmpfileheader[14]; U_CHAR bmpinfoheader[64]; -#define GET_2B(array,offset) ((unsigned short) UCH(array[offset]) + \ - (((unsigned short) UCH(array[offset+1])) << 8)) -#define GET_4B(array,offset) ((unsigned int) UCH(array[offset]) + \ - (((unsigned int) UCH(array[offset+1])) << 8) + \ - (((unsigned int) UCH(array[offset+2])) << 16) + \ - (((unsigned int) UCH(array[offset+3])) << 24)) + +#define GET_2B(array, offset) \ + ((unsigned short)UCH(array[offset]) + \ + (((unsigned short)UCH(array[offset + 1])) << 8)) +#define GET_4B(array, offset) \ + ((unsigned int)UCH(array[offset]) + \ + (((unsigned int)UCH(array[offset + 1])) << 8) + \ + (((unsigned int)UCH(array[offset + 2])) << 16) + \ + (((unsigned int)UCH(array[offset + 3])) << 24)) + unsigned int bfOffBits; unsigned int headerSize; int biWidth; int biHeight; unsigned short biPlanes; unsigned int biCompression; - int biXPelsPerMeter,biYPelsPerMeter; + int biXPelsPerMeter, biYPelsPerMeter; unsigned int biClrUsed = 0; int mapentrysize = 0; /* 0 indicates no colormap */ int bPad; - JDIMENSION row_width; + JDIMENSION row_width = 0; /* Read and verify the bitmap file header */ - if (! ReadOK(source->pub.input_file, bmpfileheader, 14)) + if (!ReadOK(source->pub.input_file, bmpfileheader, 14)) ERREXIT(cinfo, JERR_INPUT_EOF); - if (GET_2B(bmpfileheader,0) != 0x4D42) /* 'BM' */ + if (GET_2B(bmpfileheader, 0) != 0x4D42) /* 'BM' */ ERREXIT(cinfo, JERR_BMP_NOT); - bfOffBits = GET_4B(bmpfileheader,10); + bfOffBits = GET_4B(bmpfileheader, 10); /* We ignore the remaining fileheader fields */ /* The infoheader might be 12 bytes (OS/2 1.x), 40 bytes (Windows), * or 64 bytes (OS/2 2.x). Check the first 4 bytes to find out which. */ - if (! ReadOK(source->pub.input_file, bmpinfoheader, 4)) + if (!ReadOK(source->pub.input_file, bmpinfoheader, 4)) ERREXIT(cinfo, JERR_INPUT_EOF); - headerSize = GET_4B(bmpinfoheader,0); + headerSize = GET_4B(bmpinfoheader, 0); if (headerSize < 12 || headerSize > 64) ERREXIT(cinfo, JERR_BMP_BADHEADER); - if (! ReadOK(source->pub.input_file, bmpinfoheader+4, headerSize-4)) + if (!ReadOK(source->pub.input_file, bmpinfoheader + 4, headerSize - 4)) ERREXIT(cinfo, JERR_INPUT_EOF); switch (headerSize) { case 12: /* Decode OS/2 1.x header (Microsoft calls this a BITMAPCOREHEADER) */ - biWidth = (int) GET_2B(bmpinfoheader,4); - biHeight = (int) GET_2B(bmpinfoheader,6); - biPlanes = GET_2B(bmpinfoheader,8); - source->bits_per_pixel = (int) GET_2B(bmpinfoheader,10); + biWidth = (int)GET_2B(bmpinfoheader, 4); + biHeight = (int)GET_2B(bmpinfoheader, 6); + biPlanes = GET_2B(bmpinfoheader, 8); + source->bits_per_pixel = (int)GET_2B(bmpinfoheader, 10); switch (source->bits_per_pixel) { case 8: /* colormapped image */ @@ -343,14 +492,14 @@ start_input_bmp (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) case 64: /* Decode Windows 3.x header (Microsoft calls this a BITMAPINFOHEADER) */ /* or OS/2 2.x header, which has additional fields that we ignore */ - biWidth = (int) GET_4B(bmpinfoheader,4); - biHeight = (int) GET_4B(bmpinfoheader,8); - biPlanes = GET_2B(bmpinfoheader,12); - source->bits_per_pixel = (int) GET_2B(bmpinfoheader,14); - biCompression = GET_4B(bmpinfoheader,16); - biXPelsPerMeter = (int) GET_4B(bmpinfoheader,24); - biYPelsPerMeter = (int) GET_4B(bmpinfoheader,28); - biClrUsed = GET_4B(bmpinfoheader,32); + biWidth = (int)GET_4B(bmpinfoheader, 4); + biHeight = (int)GET_4B(bmpinfoheader, 8); + biPlanes = GET_2B(bmpinfoheader, 12); + source->bits_per_pixel = (int)GET_2B(bmpinfoheader, 14); + biCompression = GET_4B(bmpinfoheader, 16); + biXPelsPerMeter = (int)GET_4B(bmpinfoheader, 24); + biYPelsPerMeter = (int)GET_4B(bmpinfoheader, 28); + biClrUsed = GET_4B(bmpinfoheader, 32); /* biSizeImage, biClrImportant fields are ignored */ switch (source->bits_per_pixel) { @@ -373,8 +522,8 @@ start_input_bmp (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) if (biXPelsPerMeter > 0 && biYPelsPerMeter > 0) { /* Set JFIF density parameters from the BMP data */ - cinfo->X_density = (UINT16) (biXPelsPerMeter/100); /* 100 cm per meter */ - cinfo->Y_density = (UINT16) (biYPelsPerMeter/100); + cinfo->X_density = (UINT16)(biXPelsPerMeter / 100); /* 100 cm per meter */ + cinfo->Y_density = (UINT16)(biYPelsPerMeter / 100); cinfo->density_unit = 2; /* dots/cm */ } break; @@ -399,10 +548,10 @@ start_input_bmp (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) ERREXIT(cinfo, JERR_BMP_BADCMAP); /* Allocate space to store the colormap */ source->colormap = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) biClrUsed, (JDIMENSION) 3); + ((j_common_ptr)cinfo, JPOOL_IMAGE, (JDIMENSION)biClrUsed, (JDIMENSION)3); + source->cmap_length = (int)biClrUsed; /* and read it from the file */ - read_colormap(source, (int) biClrUsed, mapentrysize); + read_colormap(source, (int)biClrUsed, mapentrysize); /* account for size of colormap */ bPad -= biClrUsed * mapentrysize; } @@ -411,40 +560,95 @@ start_input_bmp (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) if (bPad < 0) /* incorrect bfOffBits value? */ ERREXIT(cinfo, JERR_BMP_BADHEADER); while (--bPad >= 0) { - (void) read_byte(source); + (void)read_byte(source); } /* Compute row width in file, including padding to 4-byte boundary */ - if (source->bits_per_pixel == 24) - row_width = (JDIMENSION) (biWidth * 3); - else if (source->bits_per_pixel == 32) - row_width = (JDIMENSION) (biWidth * 4); - else - row_width = (JDIMENSION) biWidth; + switch (source->bits_per_pixel) { + case 8: + if (cinfo->in_color_space == JCS_UNKNOWN) + cinfo->in_color_space = JCS_EXT_RGB; + if (IsExtRGB(cinfo->in_color_space)) + cinfo->input_components = rgb_pixelsize[cinfo->in_color_space]; + else if (cinfo->in_color_space == JCS_GRAYSCALE) + cinfo->input_components = 1; + else if (cinfo->in_color_space == JCS_CMYK) + cinfo->input_components = 4; + else + ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); + row_width = (JDIMENSION)biWidth; + break; + case 24: + if (cinfo->in_color_space == JCS_UNKNOWN) + cinfo->in_color_space = JCS_EXT_BGR; + if (IsExtRGB(cinfo->in_color_space)) + cinfo->input_components = rgb_pixelsize[cinfo->in_color_space]; + else if (cinfo->in_color_space == JCS_CMYK) + cinfo->input_components = 4; + else + ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); + row_width = (JDIMENSION)(biWidth * 3); + break; + case 32: + if (cinfo->in_color_space == JCS_UNKNOWN) + cinfo->in_color_space = JCS_EXT_BGRA; + if (IsExtRGB(cinfo->in_color_space)) + cinfo->input_components = rgb_pixelsize[cinfo->in_color_space]; + else if (cinfo->in_color_space == JCS_CMYK) + cinfo->input_components = 4; + else + ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); + row_width = (JDIMENSION)(biWidth * 4); + break; + default: + ERREXIT(cinfo, JERR_BMP_BADDEPTH); + } while ((row_width & 3) != 0) row_width++; source->row_width = row_width; - /* Allocate space for inversion array, prepare for preload pass */ - source->whole_image = (*cinfo->mem->request_virt_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, - row_width, (JDIMENSION) biHeight, (JDIMENSION) 1); - source->pub.get_pixel_rows = preload_image; - if (cinfo->progress != NULL) { - cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; - progress->total_extra_passes++; /* count file input as separate pass */ + if (source->use_inversion_array) { + /* Allocate space for inversion array, prepare for preload pass */ + source->whole_image = (*cinfo->mem->request_virt_sarray) + ((j_common_ptr)cinfo, JPOOL_IMAGE, FALSE, + row_width, (JDIMENSION)biHeight, (JDIMENSION)1); + source->pub.get_pixel_rows = preload_image; + if (cinfo->progress != NULL) { + cd_progress_ptr progress = (cd_progress_ptr)cinfo->progress; + progress->total_extra_passes++; /* count file input as separate pass */ + } + } else { + source->iobuffer = (U_CHAR *) + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, row_width); + switch (source->bits_per_pixel) { + case 8: + source->pub.get_pixel_rows = get_8bit_row; + break; + case 24: + source->pub.get_pixel_rows = get_24bit_row; + break; + case 32: + source->pub.get_pixel_rows = get_32bit_row; + break; + default: + ERREXIT(cinfo, JERR_BMP_BADDEPTH); + } } + /* Ensure that biWidth * cinfo->input_components doesn't exceed the maximum + value of the JDIMENSION type. This is only a danger with BMP files, since + their width and height fields are 32-bit integers. */ + if ((unsigned long long)biWidth * + (unsigned long long)cinfo->input_components > 0xFFFFFFFFULL) + ERREXIT(cinfo, JERR_WIDTH_OVERFLOW); /* Allocate one-row buffer for returned data */ source->pub.buffer = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) (biWidth * 3), (JDIMENSION) 1); + ((j_common_ptr)cinfo, JPOOL_IMAGE, + (JDIMENSION)(biWidth * cinfo->input_components), (JDIMENSION)1); source->pub.buffer_height = 1; - cinfo->in_color_space = JCS_RGB; - cinfo->input_components = 3; cinfo->data_precision = 8; - cinfo->image_width = (JDIMENSION) biWidth; - cinfo->image_height = (JDIMENSION) biHeight; + cinfo->image_width = (JDIMENSION)biWidth; + cinfo->image_height = (JDIMENSION)biHeight; } @@ -453,7 +657,7 @@ start_input_bmp (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ METHODDEF(void) -finish_input_bmp (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +finish_input_bmp(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) { /* no work */ } @@ -464,20 +668,22 @@ finish_input_bmp (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ GLOBAL(cjpeg_source_ptr) -jinit_read_bmp (j_compress_ptr cinfo) +jinit_read_bmp(j_compress_ptr cinfo, boolean use_inversion_array) { bmp_source_ptr source; /* Create module interface object */ source = (bmp_source_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - sizeof(bmp_source_struct)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + sizeof(bmp_source_struct)); source->cinfo = cinfo; /* make back link for subroutines */ /* Fill in method ptrs, except get_pixel_rows which start_input sets */ source->pub.start_input = start_input_bmp; source->pub.finish_input = finish_input_bmp; - return (cjpeg_source_ptr) source; + source->use_inversion_array = use_inversion_array; + + return (cjpeg_source_ptr)source; } #endif /* BMP_SUPPORTED */ diff --git a/rdcolmap.c b/rdcolmap.c index ed8ca3b..cbbef59 100644 --- a/rdcolmap.c +++ b/rdcolmap.c @@ -44,7 +44,7 @@ */ LOCAL(void) -add_map_entry (j_decompress_ptr cinfo, int R, int G, int B) +add_map_entry(j_decompress_ptr cinfo, int R, int G, int B) { JSAMPROW colormap0 = cinfo->colormap[0]; JSAMPROW colormap1 = cinfo->colormap[1]; @@ -61,13 +61,13 @@ add_map_entry (j_decompress_ptr cinfo, int R, int G, int B) } /* Check for map overflow. */ - if (ncolors >= (MAXJSAMPLE+1)) - ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, (MAXJSAMPLE+1)); + if (ncolors >= (MAXJSAMPLE + 1)) + ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, (MAXJSAMPLE + 1)); /* OK, add color to map. */ - colormap0[ncolors] = (JSAMPLE) R; - colormap1[ncolors] = (JSAMPLE) G; - colormap2[ncolors] = (JSAMPLE) B; + colormap0[ncolors] = (JSAMPLE)R; + colormap1[ncolors] = (JSAMPLE)G; + colormap2[ncolors] = (JSAMPLE)B; cinfo->actual_number_of_colors++; } @@ -77,7 +77,7 @@ add_map_entry (j_decompress_ptr cinfo, int R, int G, int B) */ LOCAL(void) -read_gif_map (j_decompress_ptr cinfo, FILE *infile) +read_gif_map(j_decompress_ptr cinfo, FILE *infile) { int header[13]; int i, colormaplen; @@ -108,9 +108,9 @@ read_gif_map (j_decompress_ptr cinfo, FILE *infile) if (R == EOF || G == EOF || B == EOF) ERREXIT(cinfo, JERR_BAD_CMAP_FILE); add_map_entry(cinfo, - R << (BITS_IN_JSAMPLE-8), - G << (BITS_IN_JSAMPLE-8), - B << (BITS_IN_JSAMPLE-8)); + R << (BITS_IN_JSAMPLE - 8), + G << (BITS_IN_JSAMPLE - 8), + B << (BITS_IN_JSAMPLE - 8)); } } @@ -119,7 +119,7 @@ read_gif_map (j_decompress_ptr cinfo, FILE *infile) LOCAL(int) -pbm_getc (FILE *infile) +pbm_getc(FILE *infile) /* Read next char, skipping over any comments */ /* A comment/newline sequence is returned as a newline */ { @@ -136,7 +136,7 @@ pbm_getc (FILE *infile) LOCAL(unsigned int) -read_pbm_integer (j_decompress_ptr cinfo, FILE *infile) +read_pbm_integer(j_decompress_ptr cinfo, FILE *infile) /* Read an unsigned decimal integer from the PPM file */ /* Swallows one trailing character after the integer */ /* Note that on a 16-bit-int machine, only values up to 64k can be read. */ @@ -169,7 +169,7 @@ read_pbm_integer (j_decompress_ptr cinfo, FILE *infile) */ LOCAL(void) -read_ppm_map (j_decompress_ptr cinfo, FILE *infile) +read_ppm_map(j_decompress_ptr cinfo, FILE *infile) { int c; unsigned int w, h, maxval, row, col; @@ -187,7 +187,7 @@ read_ppm_map (j_decompress_ptr cinfo, FILE *infile) ERREXIT(cinfo, JERR_BAD_CMAP_FILE); /* For now, we don't support rescaling from an unusual maxval. */ - if (maxval != (unsigned int) MAXJSAMPLE) + if (maxval != (unsigned int)MAXJSAMPLE) ERREXIT(cinfo, JERR_BAD_CMAP_FILE); switch (c) { @@ -229,12 +229,12 @@ read_ppm_map (j_decompress_ptr cinfo, FILE *infile) */ GLOBAL(void) -read_color_map (j_decompress_ptr cinfo, FILE *infile) +read_color_map(j_decompress_ptr cinfo, FILE *infile) { /* Allocate space for a color map of maximum supported size. */ cinfo->colormap = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) (MAXJSAMPLE+1), (JDIMENSION) 3); + ((j_common_ptr)cinfo, JPOOL_IMAGE, + (JDIMENSION)(MAXJSAMPLE + 1), (JDIMENSION)3); cinfo->actual_number_of_colors = 0; /* initialize map to empty */ /* Read first byte to determine file format */ diff --git a/rdgif.c b/rdgif.c index ce689f7..ff9258d 100644 --- a/rdgif.c +++ b/rdgif.c @@ -29,7 +29,7 @@ */ GLOBAL(cjpeg_source_ptr) -jinit_read_gif (j_compress_ptr cinfo) +jinit_read_gif(j_compress_ptr cinfo) { fprintf(stderr, "GIF input is unsupported for legal reasons. Sorry.\n"); exit(EXIT_FAILURE); diff --git a/rdjpgcom.c b/rdjpgcom.c index b3076dd..e9f31d2 100644 --- a/rdjpgcom.c +++ b/rdjpgcom.c @@ -69,7 +69,7 @@ static FILE *infile; /* input JPEG file */ /* Read one byte, testing for EOF */ static int -read_1_byte (void) +read_1_byte(void) { int c; @@ -82,7 +82,7 @@ read_1_byte (void) /* Read 2 bytes, convert to unsigned int */ /* All 2-byte quantities in JPEG markers are MSB first */ static unsigned int -read_2_bytes (void) +read_2_bytes(void) { int c1, c2; @@ -92,7 +92,7 @@ read_2_bytes (void) c2 = NEXTBYTE(); if (c2 == EOF) ERREXIT("Premature EOF in JPEG file"); - return (((unsigned int) c1) << 8) + ((unsigned int) c2); + return (((unsigned int)c1) << 8) + ((unsigned int)c2); } @@ -102,25 +102,25 @@ read_2_bytes (void) * in this program. (See jdmarker.c for a more complete list.) */ -#define M_SOF0 0xC0 /* Start Of Frame N */ -#define M_SOF1 0xC1 /* N indicates which compression process */ -#define M_SOF2 0xC2 /* Only SOF0-SOF2 are now in common use */ -#define M_SOF3 0xC3 -#define M_SOF5 0xC5 /* NB: codes C4 and CC are NOT SOF markers */ -#define M_SOF6 0xC6 -#define M_SOF7 0xC7 -#define M_SOF9 0xC9 -#define M_SOF10 0xCA -#define M_SOF11 0xCB -#define M_SOF13 0xCD -#define M_SOF14 0xCE -#define M_SOF15 0xCF -#define M_SOI 0xD8 /* Start Of Image (beginning of datastream) */ -#define M_EOI 0xD9 /* End Of Image (end of datastream) */ -#define M_SOS 0xDA /* Start Of Scan (begins compressed data) */ -#define M_APP0 0xE0 /* Application-specific marker, type N */ -#define M_APP12 0xEC /* (we don't bother to list all 16 APPn's) */ -#define M_COM 0xFE /* COMment */ +#define M_SOF0 0xC0 /* Start Of Frame N */ +#define M_SOF1 0xC1 /* N indicates which compression process */ +#define M_SOF2 0xC2 /* Only SOF0-SOF2 are now in common use */ +#define M_SOF3 0xC3 +#define M_SOF5 0xC5 /* NB: codes C4 and CC are NOT SOF markers */ +#define M_SOF6 0xC6 +#define M_SOF7 0xC7 +#define M_SOF9 0xC9 +#define M_SOF10 0xCA +#define M_SOF11 0xCB +#define M_SOF13 0xCD +#define M_SOF14 0xCE +#define M_SOF15 0xCF +#define M_SOI 0xD8 /* Start Of Image (beginning of datastream) */ +#define M_EOI 0xD9 /* End Of Image (end of datastream) */ +#define M_SOS 0xDA /* Start Of Scan (begins compressed data) */ +#define M_APP0 0xE0 /* Application-specific marker, type N */ +#define M_APP12 0xEC /* (we don't bother to list all 16 APPn's) */ +#define M_COM 0xFE /* COMment */ /* @@ -134,7 +134,7 @@ read_2_bytes (void) */ static int -next_marker (void) +next_marker(void) { int c; int discarded_bytes = 0; @@ -169,7 +169,7 @@ next_marker (void) */ static int -first_marker (void) +first_marker(void) { int c1, c2; @@ -191,7 +191,7 @@ first_marker (void) */ static void -skip_variable (void) +skip_variable(void) /* Skip over an unknown or uninteresting variable-length marker */ { unsigned int length; @@ -204,7 +204,7 @@ skip_variable (void) length -= 2; /* Skip over the remaining bytes */ while (length > 0) { - (void) read_1_byte(); + (void)read_1_byte(); length--; } } @@ -217,7 +217,7 @@ skip_variable (void) */ static void -process_COM (int raw) +process_COM(int raw) { unsigned int length; int ch; @@ -274,7 +274,7 @@ process_COM (int raw) */ static void -process_SOFn (int marker) +process_SOFn(int marker) { unsigned int length; unsigned int image_height, image_width; @@ -301,7 +301,8 @@ process_SOFn (int marker) case M_SOF10: process = "Progressive, arithmetic coding"; break; case M_SOF11: process = "Lossless, arithmetic coding"; break; case M_SOF13: process = "Differential sequential, arithmetic coding"; break; - case M_SOF14: process = "Differential progressive, arithmetic coding"; break; + case M_SOF14: + process = "Differential progressive, arithmetic coding"; break; case M_SOF15: process = "Differential lossless, arithmetic coding"; break; default: process = "Unknown"; break; } @@ -310,13 +311,13 @@ process_SOFn (int marker) image_width, image_height, num_components, data_precision); printf("JPEG process: %s\n", process); - if (length != (unsigned int) (8 + num_components * 3)) + if (length != (unsigned int)(8 + num_components * 3)) ERREXIT("Bogus SOF marker length"); for (ci = 0; ci < num_components; ci++) { - (void) read_1_byte(); /* Component ID code */ - (void) read_1_byte(); /* H, V sampling factors */ - (void) read_1_byte(); /* Quantization table number */ + (void)read_1_byte(); /* Component ID code */ + (void)read_1_byte(); /* H, V sampling factors */ + (void)read_1_byte(); /* Quantization table number */ } } @@ -332,7 +333,7 @@ process_SOFn (int marker) */ static int -scan_JPEG_header (int verbose, int raw) +scan_JPEG_header(int verbose, int raw) { int marker; @@ -401,7 +402,7 @@ static const char *progname; /* program name for error messages */ static void -usage (void) +usage(void) /* complain about bad command line */ { fprintf(stderr, "rdjpgcom displays any textual comments in a JPEG file.\n"); @@ -417,7 +418,7 @@ usage (void) static int -keymatch (char *arg, const char *keyword, int minchars) +keymatch(char *arg, const char *keyword, int minchars) /* Case-insensitive matching of (possibly abbreviated) keyword switches. */ /* keyword is the constant keyword (must be lower case already), */ /* minchars is length of minimum legal abbreviation. */ @@ -446,7 +447,7 @@ keymatch (char *arg, const char *keyword, int minchars) */ int -main (int argc, char **argv) +main(int argc, char **argv) { int argn; char *arg; @@ -477,7 +478,7 @@ main (int argc, char **argv) /* Open the input file. */ /* Unix style: expect zero or one file name */ - if (argn < argc-1) { + if (argn < argc - 1) { fprintf(stderr, "%s: only one input file\n", progname); usage(); } @@ -502,7 +503,7 @@ main (int argc, char **argv) } /* Scan the JPEG headers. */ - (void) scan_JPEG_header(verbose, raw); + (void)scan_JPEG_header(verbose, raw); /* All done. */ exit(EXIT_SUCCESS); diff --git a/rdppm.c b/rdppm.c index 33ff749..87bc330 100644 --- a/rdppm.c +++ b/rdppm.c @@ -5,7 +5,7 @@ * Copyright (C) 1991-1997, Thomas G. Lane. * Modified 2009 by Bill Allombert, Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2015, 2016, D. R. Commander. + * Copyright (C) 2015-2017, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -22,6 +22,7 @@ * the file is indeed PPM format). */ +#include "cmyk.h" #include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ #ifdef PPM_SUPPORTED @@ -44,19 +45,24 @@ #ifdef HAVE_UNSIGNED_CHAR typedef unsigned char U_CHAR; -#define UCH(x) ((int) (x)) +#define UCH(x) ((int)(x)) #else /* !HAVE_UNSIGNED_CHAR */ #ifdef __CHAR_UNSIGNED__ typedef char U_CHAR; -#define UCH(x) ((int) (x)) +#define UCH(x) ((int)(x)) #else typedef char U_CHAR; -#define UCH(x) ((int) (x) & 0xFF) +#define UCH(x) ((int)(x) & 0xFF) #endif #endif /* HAVE_UNSIGNED_CHAR */ -#define ReadOK(file,buffer,len) (JFREAD(file,buffer,len) == ((size_t) (len))) +#define ReadOK(file, buffer, len) \ + (JFREAD(file, buffer, len) == ((size_t)(len))) + +static int alpha_index[JPEG_NUMCS] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1 +}; /* Private version of data source object */ @@ -69,14 +75,14 @@ typedef struct { JSAMPROW pixrow; /* compressor input buffer */ size_t buffer_width; /* width of I/O buffer */ JSAMPLE *rescale; /* => maxval-remapping array, or NULL */ - int maxval; + unsigned int maxval; } ppm_source_struct; typedef ppm_source_struct *ppm_source_ptr; LOCAL(int) -pbm_getc (FILE *infile) +pbm_getc(FILE *infile) /* Read next char, skipping over any comments */ /* A comment/newline sequence is returned as a newline */ { @@ -93,7 +99,7 @@ pbm_getc (FILE *infile) LOCAL(unsigned int) -read_pbm_integer (j_compress_ptr cinfo, FILE *infile, unsigned int maxval) +read_pbm_integer(j_compress_ptr cinfo, FILE *infile, unsigned int maxval) /* Read an unsigned decimal integer from the PPM file */ /* Swallows one trailing character after the integer */ /* Note that on a 16-bit-int machine, only values up to 64k can be read. */ @@ -119,7 +125,7 @@ read_pbm_integer (j_compress_ptr cinfo, FILE *infile, unsigned int maxval) } if (val > maxval) - ERREXIT(cinfo, JERR_PPM_TOOLARGE); + ERREXIT(cinfo, JERR_PPM_OUTOFRANGE); return val; } @@ -137,10 +143,10 @@ read_pbm_integer (j_compress_ptr cinfo, FILE *infile, unsigned int maxval) METHODDEF(JDIMENSION) -get_text_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_text_gray_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* This version is for reading text-format PGM files with any maxval */ { - ppm_source_ptr source = (ppm_source_ptr) sinfo; + ppm_source_ptr source = (ppm_source_ptr)sinfo; FILE *infile = source->pub.input_file; register JSAMPROW ptr; register JSAMPLE *rescale = source->rescale; @@ -155,38 +161,168 @@ get_text_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) } +#define GRAY_RGB_READ_LOOP(read_op, alpha_set_op) { \ + for (col = cinfo->image_width; col > 0; col--) { \ + ptr[rindex] = ptr[gindex] = ptr[bindex] = read_op; \ + alpha_set_op \ + ptr += ps; \ + } \ +} + +METHODDEF(JDIMENSION) +get_text_gray_rgb_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading text-format PGM files with any maxval and + converting to extended RGB */ +{ + ppm_source_ptr source = (ppm_source_ptr)sinfo; + FILE *infile = source->pub.input_file; + register JSAMPROW ptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + unsigned int maxval = source->maxval; + register int rindex = rgb_red[cinfo->in_color_space]; + register int gindex = rgb_green[cinfo->in_color_space]; + register int bindex = rgb_blue[cinfo->in_color_space]; + register int aindex = alpha_index[cinfo->in_color_space]; + register int ps = rgb_pixelsize[cinfo->in_color_space]; + + ptr = source->pub.buffer[0]; + if (maxval == MAXJSAMPLE) { + if (aindex >= 0) + GRAY_RGB_READ_LOOP(read_pbm_integer(cinfo, infile, maxval), + ptr[aindex] = 0xFF;) + else + GRAY_RGB_READ_LOOP(read_pbm_integer(cinfo, infile, maxval),) + } else { + if (aindex >= 0) + GRAY_RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)], + ptr[aindex] = 0xFF;) + else + GRAY_RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)],) + } + return 1; +} + + METHODDEF(JDIMENSION) -get_text_rgb_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_text_gray_cmyk_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading text-format PGM files with any maxval and + converting to CMYK */ +{ + ppm_source_ptr source = (ppm_source_ptr)sinfo; + FILE *infile = source->pub.input_file; + register JSAMPROW ptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + unsigned int maxval = source->maxval; + + ptr = source->pub.buffer[0]; + if (maxval == MAXJSAMPLE) { + for (col = cinfo->image_width; col > 0; col--) { + JSAMPLE gray = read_pbm_integer(cinfo, infile, maxval); + rgb_to_cmyk(gray, gray, gray, ptr, ptr + 1, ptr + 2, ptr + 3); + ptr += 4; + } + } else { + for (col = cinfo->image_width; col > 0; col--) { + JSAMPLE gray = rescale[read_pbm_integer(cinfo, infile, maxval)]; + rgb_to_cmyk(gray, gray, gray, ptr, ptr + 1, ptr + 2, ptr + 3); + ptr += 4; + } + } + return 1; +} + + +#define RGB_READ_LOOP(read_op, alpha_set_op) { \ + for (col = cinfo->image_width; col > 0; col--) { \ + ptr[rindex] = read_op; \ + ptr[gindex] = read_op; \ + ptr[bindex] = read_op; \ + alpha_set_op \ + ptr += ps; \ + } \ +} + +METHODDEF(JDIMENSION) +get_text_rgb_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* This version is for reading text-format PPM files with any maxval */ { - ppm_source_ptr source = (ppm_source_ptr) sinfo; + ppm_source_ptr source = (ppm_source_ptr)sinfo; FILE *infile = source->pub.input_file; register JSAMPROW ptr; register JSAMPLE *rescale = source->rescale; JDIMENSION col; unsigned int maxval = source->maxval; + register int rindex = rgb_red[cinfo->in_color_space]; + register int gindex = rgb_green[cinfo->in_color_space]; + register int bindex = rgb_blue[cinfo->in_color_space]; + register int aindex = alpha_index[cinfo->in_color_space]; + register int ps = rgb_pixelsize[cinfo->in_color_space]; ptr = source->pub.buffer[0]; - for (col = cinfo->image_width; col > 0; col--) { - *ptr++ = rescale[read_pbm_integer(cinfo, infile, maxval)]; - *ptr++ = rescale[read_pbm_integer(cinfo, infile, maxval)]; - *ptr++ = rescale[read_pbm_integer(cinfo, infile, maxval)]; + if (maxval == MAXJSAMPLE) { + if (aindex >= 0) + RGB_READ_LOOP(read_pbm_integer(cinfo, infile, maxval), + ptr[aindex] = 0xFF;) + else + RGB_READ_LOOP(read_pbm_integer(cinfo, infile, maxval),) + } else { + if (aindex >= 0) + RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)], + ptr[aindex] = 0xFF;) + else + RGB_READ_LOOP(rescale[read_pbm_integer(cinfo, infile, maxval)],) } return 1; } METHODDEF(JDIMENSION) -get_scaled_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_text_rgb_cmyk_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading text-format PPM files with any maxval and + converting to CMYK */ +{ + ppm_source_ptr source = (ppm_source_ptr)sinfo; + FILE *infile = source->pub.input_file; + register JSAMPROW ptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + unsigned int maxval = source->maxval; + + ptr = source->pub.buffer[0]; + if (maxval == MAXJSAMPLE) { + for (col = cinfo->image_width; col > 0; col--) { + JSAMPLE r = read_pbm_integer(cinfo, infile, maxval); + JSAMPLE g = read_pbm_integer(cinfo, infile, maxval); + JSAMPLE b = read_pbm_integer(cinfo, infile, maxval); + rgb_to_cmyk(r, g, b, ptr, ptr + 1, ptr + 2, ptr + 3); + ptr += 4; + } + } else { + for (col = cinfo->image_width; col > 0; col--) { + JSAMPLE r = rescale[read_pbm_integer(cinfo, infile, maxval)]; + JSAMPLE g = rescale[read_pbm_integer(cinfo, infile, maxval)]; + JSAMPLE b = rescale[read_pbm_integer(cinfo, infile, maxval)]; + rgb_to_cmyk(r, g, b, ptr, ptr + 1, ptr + 2, ptr + 3); + ptr += 4; + } + } + return 1; +} + + +METHODDEF(JDIMENSION) +get_scaled_gray_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* This version is for reading raw-byte-format PGM files with any maxval */ { - ppm_source_ptr source = (ppm_source_ptr) sinfo; + ppm_source_ptr source = (ppm_source_ptr)sinfo; register JSAMPROW ptr; register U_CHAR *bufferptr; register JSAMPLE *rescale = source->rescale; JDIMENSION col; - if (! ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) ERREXIT(cinfo, JERR_INPUT_EOF); ptr = source->pub.buffer[0]; bufferptr = source->iobuffer; @@ -198,55 +334,173 @@ get_scaled_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) METHODDEF(JDIMENSION) -get_scaled_rgb_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_gray_rgb_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading raw-byte-format PGM files with any maxval + and converting to extended RGB */ +{ + ppm_source_ptr source = (ppm_source_ptr)sinfo; + register JSAMPROW ptr; + register U_CHAR *bufferptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + unsigned int maxval = source->maxval; + register int rindex = rgb_red[cinfo->in_color_space]; + register int gindex = rgb_green[cinfo->in_color_space]; + register int bindex = rgb_blue[cinfo->in_color_space]; + register int aindex = alpha_index[cinfo->in_color_space]; + register int ps = rgb_pixelsize[cinfo->in_color_space]; + + if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); + ptr = source->pub.buffer[0]; + bufferptr = source->iobuffer; + if (maxval == MAXJSAMPLE) { + if (aindex >= 0) + GRAY_RGB_READ_LOOP(*bufferptr++, ptr[aindex] = 0xFF;) + else + GRAY_RGB_READ_LOOP(*bufferptr++,) + } else { + if (aindex >= 0) + GRAY_RGB_READ_LOOP(rescale[UCH(*bufferptr++)], ptr[aindex] = 0xFF;) + else + GRAY_RGB_READ_LOOP(rescale[UCH(*bufferptr++)],) + } + return 1; +} + + +METHODDEF(JDIMENSION) +get_gray_cmyk_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading raw-byte-format PGM files with any maxval + and converting to CMYK */ +{ + ppm_source_ptr source = (ppm_source_ptr)sinfo; + register JSAMPROW ptr; + register U_CHAR *bufferptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + unsigned int maxval = source->maxval; + + if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); + ptr = source->pub.buffer[0]; + bufferptr = source->iobuffer; + if (maxval == MAXJSAMPLE) { + for (col = cinfo->image_width; col > 0; col--) { + JSAMPLE gray = *bufferptr++; + rgb_to_cmyk(gray, gray, gray, ptr, ptr + 1, ptr + 2, ptr + 3); + ptr += 4; + } + } else { + for (col = cinfo->image_width; col > 0; col--) { + JSAMPLE gray = rescale[UCH(*bufferptr++)]; + rgb_to_cmyk(gray, gray, gray, ptr, ptr + 1, ptr + 2, ptr + 3); + ptr += 4; + } + } + return 1; +} + + +METHODDEF(JDIMENSION) +get_rgb_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* This version is for reading raw-byte-format PPM files with any maxval */ { - ppm_source_ptr source = (ppm_source_ptr) sinfo; + ppm_source_ptr source = (ppm_source_ptr)sinfo; register JSAMPROW ptr; register U_CHAR *bufferptr; register JSAMPLE *rescale = source->rescale; JDIMENSION col; + unsigned int maxval = source->maxval; + register int rindex = rgb_red[cinfo->in_color_space]; + register int gindex = rgb_green[cinfo->in_color_space]; + register int bindex = rgb_blue[cinfo->in_color_space]; + register int aindex = alpha_index[cinfo->in_color_space]; + register int ps = rgb_pixelsize[cinfo->in_color_space]; - if (! ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) ERREXIT(cinfo, JERR_INPUT_EOF); ptr = source->pub.buffer[0]; bufferptr = source->iobuffer; - for (col = cinfo->image_width; col > 0; col--) { - *ptr++ = rescale[UCH(*bufferptr++)]; - *ptr++ = rescale[UCH(*bufferptr++)]; - *ptr++ = rescale[UCH(*bufferptr++)]; + if (maxval == MAXJSAMPLE) { + if (aindex >= 0) + RGB_READ_LOOP(*bufferptr++, ptr[aindex] = 0xFF;) + else + RGB_READ_LOOP(*bufferptr++,) + } else { + if (aindex >= 0) + RGB_READ_LOOP(rescale[UCH(*bufferptr++)], ptr[aindex] = 0xFF;) + else + RGB_READ_LOOP(rescale[UCH(*bufferptr++)],) } return 1; } METHODDEF(JDIMENSION) -get_raw_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_rgb_cmyk_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +/* This version is for reading raw-byte-format PPM files with any maxval and + converting to CMYK */ +{ + ppm_source_ptr source = (ppm_source_ptr)sinfo; + register JSAMPROW ptr; + register U_CHAR *bufferptr; + register JSAMPLE *rescale = source->rescale; + JDIMENSION col; + unsigned int maxval = source->maxval; + + if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + ERREXIT(cinfo, JERR_INPUT_EOF); + ptr = source->pub.buffer[0]; + bufferptr = source->iobuffer; + if (maxval == MAXJSAMPLE) { + for (col = cinfo->image_width; col > 0; col--) { + JSAMPLE r = *bufferptr++; + JSAMPLE g = *bufferptr++; + JSAMPLE b = *bufferptr++; + rgb_to_cmyk(r, g, b, ptr, ptr + 1, ptr + 2, ptr + 3); + ptr += 4; + } + } else { + for (col = cinfo->image_width; col > 0; col--) { + JSAMPLE r = rescale[UCH(*bufferptr++)]; + JSAMPLE g = rescale[UCH(*bufferptr++)]; + JSAMPLE b = rescale[UCH(*bufferptr++)]; + rgb_to_cmyk(r, g, b, ptr, ptr + 1, ptr + 2, ptr + 3); + ptr += 4; + } + } + return 1; +} + + +METHODDEF(JDIMENSION) +get_raw_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* This version is for reading raw-byte-format files with maxval = MAXJSAMPLE. * In this case we just read right into the JSAMPLE buffer! * Note that same code works for PPM and PGM files. */ { - ppm_source_ptr source = (ppm_source_ptr) sinfo; + ppm_source_ptr source = (ppm_source_ptr)sinfo; - if (! ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) ERREXIT(cinfo, JERR_INPUT_EOF); return 1; } METHODDEF(JDIMENSION) -get_word_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_word_gray_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* This version is for reading raw-word-format PGM files with any maxval */ { - ppm_source_ptr source = (ppm_source_ptr) sinfo; + ppm_source_ptr source = (ppm_source_ptr)sinfo; register JSAMPROW ptr; register U_CHAR *bufferptr; register JSAMPLE *rescale = source->rescale; JDIMENSION col; unsigned int maxval = source->maxval; - if (! ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) ERREXIT(cinfo, JERR_INPUT_EOF); ptr = source->pub.buffer[0]; bufferptr = source->iobuffer; @@ -255,7 +509,7 @@ get_word_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) temp = UCH(*bufferptr++) << 8; temp |= UCH(*bufferptr++); if (temp > maxval) - ERREXIT(cinfo, JERR_PPM_TOOLARGE); + ERREXIT(cinfo, JERR_PPM_OUTOFRANGE); *ptr++ = rescale[temp]; } return 1; @@ -263,17 +517,17 @@ get_word_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) METHODDEF(JDIMENSION) -get_word_rgb_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_word_rgb_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* This version is for reading raw-word-format PPM files with any maxval */ { - ppm_source_ptr source = (ppm_source_ptr) sinfo; + ppm_source_ptr source = (ppm_source_ptr)sinfo; register JSAMPROW ptr; register U_CHAR *bufferptr; register JSAMPLE *rescale = source->rescale; JDIMENSION col; unsigned int maxval = source->maxval; - if (! ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) + if (!ReadOK(source->pub.input_file, source->iobuffer, source->buffer_width)) ERREXIT(cinfo, JERR_INPUT_EOF); ptr = source->pub.buffer[0]; bufferptr = source->iobuffer; @@ -282,17 +536,17 @@ get_word_rgb_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) temp = UCH(*bufferptr++) << 8; temp |= UCH(*bufferptr++); if (temp > maxval) - ERREXIT(cinfo, JERR_PPM_TOOLARGE); + ERREXIT(cinfo, JERR_PPM_OUTOFRANGE); *ptr++ = rescale[temp]; temp = UCH(*bufferptr++) << 8; temp |= UCH(*bufferptr++); if (temp > maxval) - ERREXIT(cinfo, JERR_PPM_TOOLARGE); + ERREXIT(cinfo, JERR_PPM_OUTOFRANGE); *ptr++ = rescale[temp]; temp = UCH(*bufferptr++) << 8; temp |= UCH(*bufferptr++); if (temp > maxval) - ERREXIT(cinfo, JERR_PPM_TOOLARGE); + ERREXIT(cinfo, JERR_PPM_OUTOFRANGE); *ptr++ = rescale[temp]; } return 1; @@ -304,9 +558,9 @@ get_word_rgb_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ METHODDEF(void) -start_input_ppm (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +start_input_ppm(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) { - ppm_source_ptr source = (ppm_source_ptr) sinfo; + ppm_source_ptr source = (ppm_source_ptr)sinfo; int c; unsigned int w, h, maxval; boolean need_iobuffer, use_raw_buffer, need_rescale; @@ -337,8 +591,8 @@ start_input_ppm (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) ERREXIT(cinfo, JERR_PPM_NOT); cinfo->data_precision = BITS_IN_JSAMPLE; /* we always rescale data to this */ - cinfo->image_width = (JDIMENSION) w; - cinfo->image_height = (JDIMENSION) h; + cinfo->image_width = (JDIMENSION)w; + cinfo->image_height = (JDIMENSION)h; source->maxval = maxval; /* initialize flags to most common settings */ @@ -348,58 +602,99 @@ start_input_ppm (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) switch (c) { case '2': /* it's a text-format PGM file */ - cinfo->input_components = 1; - cinfo->in_color_space = JCS_GRAYSCALE; + if (cinfo->in_color_space == JCS_UNKNOWN) + cinfo->in_color_space = JCS_GRAYSCALE; TRACEMS2(cinfo, 1, JTRC_PGM_TEXT, w, h); - source->pub.get_pixel_rows = get_text_gray_row; + if (cinfo->in_color_space == JCS_GRAYSCALE) + source->pub.get_pixel_rows = get_text_gray_row; + else if (IsExtRGB(cinfo->in_color_space)) + source->pub.get_pixel_rows = get_text_gray_rgb_row; + else if (cinfo->in_color_space == JCS_CMYK) + source->pub.get_pixel_rows = get_text_gray_cmyk_row; + else + ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); need_iobuffer = FALSE; break; case '3': /* it's a text-format PPM file */ - cinfo->input_components = 3; - cinfo->in_color_space = JCS_RGB; + if (cinfo->in_color_space == JCS_UNKNOWN) + cinfo->in_color_space = JCS_EXT_RGB; TRACEMS2(cinfo, 1, JTRC_PPM_TEXT, w, h); - source->pub.get_pixel_rows = get_text_rgb_row; + if (IsExtRGB(cinfo->in_color_space)) + source->pub.get_pixel_rows = get_text_rgb_row; + else if (cinfo->in_color_space == JCS_CMYK) + source->pub.get_pixel_rows = get_text_rgb_cmyk_row; + else + ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); need_iobuffer = FALSE; break; case '5': /* it's a raw-format PGM file */ - cinfo->input_components = 1; - cinfo->in_color_space = JCS_GRAYSCALE; + if (cinfo->in_color_space == JCS_UNKNOWN) + cinfo->in_color_space = JCS_GRAYSCALE; TRACEMS2(cinfo, 1, JTRC_PGM, w, h); if (maxval > 255) { source->pub.get_pixel_rows = get_word_gray_row; - } else if (maxval == MAXJSAMPLE && sizeof(JSAMPLE) == sizeof(U_CHAR)) { + } else if (maxval == MAXJSAMPLE && sizeof(JSAMPLE) == sizeof(U_CHAR) && + cinfo->in_color_space == JCS_GRAYSCALE) { source->pub.get_pixel_rows = get_raw_row; use_raw_buffer = TRUE; need_rescale = FALSE; } else { - source->pub.get_pixel_rows = get_scaled_gray_row; + if (cinfo->in_color_space == JCS_GRAYSCALE) + source->pub.get_pixel_rows = get_scaled_gray_row; + else if (IsExtRGB(cinfo->in_color_space)) + source->pub.get_pixel_rows = get_gray_rgb_row; + else if (cinfo->in_color_space == JCS_CMYK) + source->pub.get_pixel_rows = get_gray_cmyk_row; + else + ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); } break; case '6': /* it's a raw-format PPM file */ - cinfo->input_components = 3; - cinfo->in_color_space = JCS_RGB; + if (cinfo->in_color_space == JCS_UNKNOWN) + cinfo->in_color_space = JCS_EXT_RGB; TRACEMS2(cinfo, 1, JTRC_PPM, w, h); if (maxval > 255) { source->pub.get_pixel_rows = get_word_rgb_row; - } else if (maxval == MAXJSAMPLE && sizeof(JSAMPLE) == sizeof(U_CHAR)) { + } else if (maxval == MAXJSAMPLE && sizeof(JSAMPLE) == sizeof(U_CHAR) && + (cinfo->in_color_space == JCS_EXT_RGB +#if RGB_RED == 0 && RGB_GREEN == 1 && RGB_BLUE == 2 && RGB_PIXELSIZE == 3 + || cinfo->in_color_space == JCS_RGB +#endif + )) { source->pub.get_pixel_rows = get_raw_row; use_raw_buffer = TRUE; need_rescale = FALSE; } else { - source->pub.get_pixel_rows = get_scaled_rgb_row; + if (IsExtRGB(cinfo->in_color_space)) + source->pub.get_pixel_rows = get_rgb_row; + else if (cinfo->in_color_space == JCS_CMYK) + source->pub.get_pixel_rows = get_rgb_cmyk_row; + else + ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); } break; } + if (IsExtRGB(cinfo->in_color_space)) + cinfo->input_components = rgb_pixelsize[cinfo->in_color_space]; + else if (cinfo->in_color_space == JCS_GRAYSCALE) + cinfo->input_components = 1; + else if (cinfo->in_color_space == JCS_CMYK) + cinfo->input_components = 4; + /* Allocate space for I/O buffer: 1 or 3 bytes or words/pixel. */ if (need_iobuffer) { - source->buffer_width = (size_t) w * cinfo->input_components * - ((maxval <= 255) ? sizeof(U_CHAR) : (2 * sizeof(U_CHAR))); + if (c == '6') + source->buffer_width = (size_t)w * 3 * + ((maxval <= 255) ? sizeof(U_CHAR) : (2 * sizeof(U_CHAR))); + else + source->buffer_width = (size_t)w * + ((maxval <= 255) ? sizeof(U_CHAR) : (2 * sizeof(U_CHAR))); source->iobuffer = (U_CHAR *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, source->buffer_width); } @@ -407,14 +702,14 @@ start_input_ppm (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) if (use_raw_buffer) { /* For unscaled raw-input case, we can just map it onto the I/O buffer. */ /* Synthesize a JSAMPARRAY pointer structure */ - source->pixrow = (JSAMPROW) source->iobuffer; - source->pub.buffer = & source->pixrow; + source->pixrow = (JSAMPROW)source->iobuffer; + source->pub.buffer = &source->pixrow; source->pub.buffer_height = 1; } else { /* Need to translate anyway, so make a separate sample buffer. */ source->pub.buffer = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) w * cinfo->input_components, (JDIMENSION) 1); + ((j_common_ptr)cinfo, JPOOL_IMAGE, + (JDIMENSION)w * cinfo->input_components, (JDIMENSION)1); source->pub.buffer_height = 1; } @@ -424,13 +719,13 @@ start_input_ppm (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* On 16-bit-int machines we have to be careful of maxval = 65535 */ source->rescale = (JSAMPLE *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (size_t) (((long) maxval + 1L) * - sizeof(JSAMPLE))); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + (size_t)(((long)maxval + 1L) * + sizeof(JSAMPLE))); half_maxval = maxval / 2; - for (val = 0; val <= (long) maxval; val++) { + for (val = 0; val <= (long)maxval; val++) { /* The multiplication here must be done in 32 bits to avoid overflow */ - source->rescale[val] = (JSAMPLE) ((val * MAXJSAMPLE + half_maxval) / + source->rescale[val] = (JSAMPLE)((val * MAXJSAMPLE + half_maxval) / maxval); } } @@ -442,7 +737,7 @@ start_input_ppm (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ METHODDEF(void) -finish_input_ppm (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +finish_input_ppm(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) { /* no work */ } @@ -453,19 +748,19 @@ finish_input_ppm (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ GLOBAL(cjpeg_source_ptr) -jinit_read_ppm (j_compress_ptr cinfo) +jinit_read_ppm(j_compress_ptr cinfo) { ppm_source_ptr source; /* Create module interface object */ source = (ppm_source_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - sizeof(ppm_source_struct)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + sizeof(ppm_source_struct)); /* Fill in method ptrs, except get_pixel_rows which start_input sets */ source->pub.start_input = start_input_ppm; source->pub.finish_input = finish_input_ppm; - return (cjpeg_source_ptr) source; + return (cjpeg_source_ptr)source; } #endif /* PPM_SUPPORTED */ diff --git a/rdrle.c b/rdrle.c index 226c528..b694514 100644 --- a/rdrle.c +++ b/rdrle.c @@ -81,12 +81,12 @@ typedef struct _rle_source_struct { */ METHODDEF(void) -start_input_rle (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +start_input_rle(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) { - rle_source_ptr source = (rle_source_ptr) sinfo; + rle_source_ptr source = (rle_source_ptr)sinfo; JDIMENSION width, height; #ifdef PROGRESS_REPORT - cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + cd_progress_ptr progress = (cd_progress_ptr)cinfo->progress; #endif /* Use RLE library routine to get the header info */ @@ -118,7 +118,7 @@ start_input_rle (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) width = source->header.xmax - source->header.xmin + 1; height = source->header.ymax - source->header.ymin + 1; source->header.xmin = 0; /* realign horizontally */ - source->header.xmax = width-1; + source->header.xmax = width - 1; cinfo->image_width = width; cinfo->image_height = height; @@ -158,16 +158,16 @@ start_input_rle (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) * (GRAYSCALE scanlines don't need converting) */ if (source->visual != GRAYSCALE) { - source->rle_row = (rle_pixel**) (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) width, (JDIMENSION) cinfo->input_components); + source->rle_row = (rle_pixel **)(*cinfo->mem->alloc_sarray) + ((j_common_ptr)cinfo, JPOOL_IMAGE, + (JDIMENSION)width, (JDIMENSION)cinfo->input_components); } /* request a virtual array to hold the image */ source->image = (*cinfo->mem->request_virt_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, - (JDIMENSION) (width * source->header.ncolors), - (JDIMENSION) height, (JDIMENSION) 1); + ((j_common_ptr)cinfo, JPOOL_IMAGE, FALSE, + (JDIMENSION)(width * source->header.ncolors), + (JDIMENSION)height, (JDIMENSION)1); #ifdef PROGRESS_REPORT if (progress != NULL) { @@ -187,13 +187,13 @@ start_input_rle (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ METHODDEF(JDIMENSION) -get_rle_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_rle_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) { - rle_source_ptr source = (rle_source_ptr) sinfo; + rle_source_ptr source = (rle_source_ptr)sinfo; source->row--; source->pub.buffer = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, source->image, source->row, (JDIMENSION) 1, FALSE); + ((j_common_ptr)cinfo, source->image, source->row, (JDIMENSION)1, FALSE); return 1; } @@ -205,9 +205,9 @@ get_rle_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ METHODDEF(JDIMENSION) -get_pseudocolor_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_pseudocolor_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) { - rle_source_ptr source = (rle_source_ptr) sinfo; + rle_source_ptr source = (rle_source_ptr)sinfo; JSAMPROW src_row, dest_row; JDIMENSION col; rle_map *colormap; @@ -217,13 +217,13 @@ get_pseudocolor_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) dest_row = source->pub.buffer[0]; source->row--; src_row = *(*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, source->image, source->row, (JDIMENSION) 1, FALSE); + ((j_common_ptr)cinfo, source->image, source->row, (JDIMENSION)1, FALSE); for (col = cinfo->image_width; col > 0; col--) { val = GETJSAMPLE(*src_row++); - *dest_row++ = (JSAMPLE) (colormap[val ] >> 8); - *dest_row++ = (JSAMPLE) (colormap[val + 256] >> 8); - *dest_row++ = (JSAMPLE) (colormap[val + 512] >> 8); + *dest_row++ = (JSAMPLE)(colormap[val ] >> 8); + *dest_row++ = (JSAMPLE)(colormap[val + 256] >> 8); + *dest_row++ = (JSAMPLE)(colormap[val + 512] >> 8); } return 1; @@ -241,16 +241,16 @@ get_pseudocolor_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ METHODDEF(JDIMENSION) -load_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +load_image(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) { - rle_source_ptr source = (rle_source_ptr) sinfo; + rle_source_ptr source = (rle_source_ptr)sinfo; JDIMENSION row, col; - JSAMPROW scanline, red_ptr, green_ptr, blue_ptr; + JSAMPROW scanline, red_ptr, green_ptr, blue_ptr; rle_pixel **rle_row; rle_map *colormap; char channel; #ifdef PROGRESS_REPORT - cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + cd_progress_ptr progress = (cd_progress_ptr)cinfo->progress; #endif colormap = source->header.cmap; @@ -265,7 +265,7 @@ load_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) if (progress != NULL) { progress->pub.pass_limit = cinfo->image_height; progress->pub.pass_counter = 0; - (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + (*progress->pub.progress_monitor) ((j_common_ptr)cinfo); } #endif @@ -274,13 +274,13 @@ load_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) case GRAYSCALE: case PSEUDOCOLOR: for (row = 0; row < cinfo->image_height; row++) { - rle_row = (rle_pixel **) (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, source->image, row, (JDIMENSION) 1, TRUE); + rle_row = (rle_pixel **)(*cinfo->mem->access_virt_sarray) + ((j_common_ptr)cinfo, source->image, row, (JDIMENSION)1, TRUE); rle_getrow(&source->header, rle_row); #ifdef PROGRESS_REPORT if (progress != NULL) { progress->pub.pass_counter++; - (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + (*progress->pub.progress_monitor) ((j_common_ptr)cinfo); } #endif } @@ -290,7 +290,7 @@ load_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) case TRUECOLOR: for (row = 0; row < cinfo->image_height; row++) { scanline = *(*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, source->image, row, (JDIMENSION) 1, TRUE); + ((j_common_ptr)cinfo, source->image, row, (JDIMENSION)1, TRUE); rle_row = source->rle_row; rle_getrow(&source->header, rle_row); @@ -304,7 +304,7 @@ load_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) #ifdef PROGRESS_REPORT if (progress != NULL) { progress->pub.pass_counter++; - (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + (*progress->pub.progress_monitor) ((j_common_ptr)cinfo); } #endif } @@ -313,7 +313,7 @@ load_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) case DIRECTCOLOR: for (row = 0; row < cinfo->image_height; row++) { scanline = *(*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, source->image, row, (JDIMENSION) 1, TRUE); + ((j_common_ptr)cinfo, source->image, row, (JDIMENSION)1, TRUE); rle_getrow(&source->header, rle_row); red_ptr = rle_row[0]; @@ -329,7 +329,7 @@ load_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) #ifdef PROGRESS_REPORT if (progress != NULL) { progress->pub.pass_counter++; - (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + (*progress->pub.progress_monitor) ((j_common_ptr)cinfo); } #endif } @@ -359,7 +359,7 @@ load_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ METHODDEF(void) -finish_input_rle (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +finish_input_rle(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) { /* no work */ } @@ -370,20 +370,20 @@ finish_input_rle (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ GLOBAL(cjpeg_source_ptr) -jinit_read_rle (j_compress_ptr cinfo) +jinit_read_rle(j_compress_ptr cinfo) { rle_source_ptr source; /* Create module interface object */ source = (rle_source_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - sizeof(rle_source_struct)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + sizeof(rle_source_struct)); /* Fill in method ptrs */ source->pub.start_input = start_input_rle; source->pub.finish_input = finish_input_rle; source->pub.get_pixel_rows = load_image; - return (cjpeg_source_ptr) source; + return (cjpeg_source_ptr)source; } #endif /* RLE_SUPPORTED */ diff --git a/rdswitch.c b/rdswitch.c index 7d870c3..c50c33e 100644 --- a/rdswitch.c +++ b/rdswitch.c @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1996, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2010, D. R. Commander. + * Copyright (C) 2010, 2018, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -22,7 +22,7 @@ LOCAL(int) -text_getc (FILE *file) +text_getc(FILE *file) /* Read next char, skipping over any comments (# to end of line) */ /* A comment/newline sequence is returned as a newline */ { @@ -39,7 +39,7 @@ text_getc (FILE *file) LOCAL(boolean) -read_text_integer (FILE *file, long *result, int *termchar) +read_text_integer(FILE *file, long *result, int *termchar) /* Read an unsigned decimal integer from a file, store it in result */ /* Reads one trailing character after the integer; returns it in termchar */ { @@ -55,14 +55,14 @@ read_text_integer (FILE *file, long *result, int *termchar) } } while (isspace(ch)); - if (! isdigit(ch)) { + if (!isdigit(ch)) { *termchar = ch; return FALSE; } val = ch - '0'; while ((ch = text_getc(file)) != EOF) { - if (! isdigit(ch)) + if (!isdigit(ch)) break; val *= 10; val += ch - '0'; @@ -74,12 +74,11 @@ read_text_integer (FILE *file, long *result, int *termchar) #if JPEG_LIB_VERSION < 70 -static int q_scale_factor[NUM_QUANT_TBLS] = {100, 100, 100, 100}; +static int q_scale_factor[NUM_QUANT_TBLS] = { 100, 100, 100, 100 }; #endif GLOBAL(boolean) -read_quant_tables (j_compress_ptr cinfo, char *filename, - boolean force_baseline) +read_quant_tables(j_compress_ptr cinfo, char *filename, boolean force_baseline) /* Read a set of quantization tables from the specified file. * The file is plain ASCII text: decimal numbers with whitespace between. * Comments preceded by '#' may be included in the file. @@ -107,14 +106,14 @@ read_quant_tables (j_compress_ptr cinfo, char *filename, fclose(fp); return FALSE; } - table[0] = (unsigned int) val; + table[0] = (unsigned int)val; for (i = 1; i < DCTSIZE2; i++) { - if (! read_text_integer(fp, &val, &termchar)) { + if (!read_text_integer(fp, &val, &termchar)) { fprintf(stderr, "Invalid table data in file %s\n", filename); fclose(fp); return FALSE; } - table[i] = (unsigned int) val; + table[i] = (unsigned int)val; } #if JPEG_LIB_VERSION >= 70 jpeg_add_quant_table(cinfo, tblno, table, cinfo->q_scale_factor[tblno], @@ -140,14 +139,14 @@ read_quant_tables (j_compress_ptr cinfo, char *filename, #ifdef C_MULTISCAN_FILES_SUPPORTED LOCAL(boolean) -read_scan_integer (FILE *file, long *result, int *termchar) +read_scan_integer(FILE *file, long *result, int *termchar) /* Variant of read_text_integer that always looks for a non-space termchar; * this simplifies parsing of punctuation in scan scripts. */ { register int ch; - if (! read_text_integer(file, result, termchar)) + if (!read_text_integer(file, result, termchar)) return FALSE; ch = *termchar; while (ch != EOF && isspace(ch)) @@ -169,7 +168,7 @@ read_scan_integer (FILE *file, long *result, int *termchar) GLOBAL(boolean) -read_scan_script (j_compress_ptr cinfo, char *filename) +read_scan_script(j_compress_ptr cinfo, char *filename) /* Read a scan script from the specified text file. * Each entry in the file defines one scan to be emitted. * Entries are separated by semicolons ';'. @@ -206,7 +205,7 @@ read_scan_script (j_compress_ptr cinfo, char *filename) fclose(fp); return FALSE; } - scanptr->component_index[0] = (int) val; + scanptr->component_index[0] = (int)val; ncomps = 1; while (termchar == ' ') { if (ncomps >= MAX_COMPS_IN_SCAN) { @@ -215,29 +214,29 @@ read_scan_script (j_compress_ptr cinfo, char *filename) fclose(fp); return FALSE; } - if (! read_scan_integer(fp, &val, &termchar)) + if (!read_scan_integer(fp, &val, &termchar)) goto bogus; - scanptr->component_index[ncomps] = (int) val; + scanptr->component_index[ncomps] = (int)val; ncomps++; } scanptr->comps_in_scan = ncomps; if (termchar == ':') { - if (! read_scan_integer(fp, &val, &termchar) || termchar != ' ') + if (!read_scan_integer(fp, &val, &termchar) || termchar != ' ') goto bogus; - scanptr->Ss = (int) val; - if (! read_scan_integer(fp, &val, &termchar) || termchar != ' ') + scanptr->Ss = (int)val; + if (!read_scan_integer(fp, &val, &termchar) || termchar != ' ') goto bogus; - scanptr->Se = (int) val; - if (! read_scan_integer(fp, &val, &termchar) || termchar != ' ') + scanptr->Se = (int)val; + if (!read_scan_integer(fp, &val, &termchar) || termchar != ' ') goto bogus; - scanptr->Ah = (int) val; - if (! read_scan_integer(fp, &val, &termchar)) + scanptr->Ah = (int)val; + if (!read_scan_integer(fp, &val, &termchar)) goto bogus; - scanptr->Al = (int) val; + scanptr->Al = (int)val; } else { /* set non-progressive parameters */ scanptr->Ss = 0; - scanptr->Se = DCTSIZE2-1; + scanptr->Se = DCTSIZE2 - 1; scanptr->Ah = 0; scanptr->Al = 0; } @@ -262,7 +261,7 @@ bogus: * but if you want to compress multiple images you'd want JPOOL_PERMANENT. */ scanptr = (jpeg_scan_info *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, scanno * sizeof(jpeg_scan_info)); MEMCOPY(scanptr, scans, scanno * sizeof(jpeg_scan_info)); cinfo->scan_info = scanptr; @@ -277,7 +276,8 @@ bogus: #if JPEG_LIB_VERSION < 70 -/* These are the sample quantization tables given in JPEG spec section K.1. +/* These are the sample quantization tables given in Annex K (Clause K.1) of + * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994. * The spec says that the values given produce "good" quality, and * when divided by 2, "very good" quality. */ @@ -304,18 +304,18 @@ static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = { LOCAL(void) -jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline) +jpeg_default_qtables(j_compress_ptr cinfo, boolean force_baseline) { - jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, - q_scale_factor[0], force_baseline); - jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl, - q_scale_factor[1], force_baseline); + jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, q_scale_factor[0], + force_baseline); + jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl, q_scale_factor[1], + force_baseline); } #endif GLOBAL(boolean) -set_quality_ratings (j_compress_ptr cinfo, char *arg, boolean force_baseline) +set_quality_ratings(j_compress_ptr cinfo, char *arg, boolean force_baseline) /* Process a quality-ratings parameter string, of the form * N[,N,...] * If there are more q-table slots than parameters, the last value is replicated. @@ -355,7 +355,7 @@ set_quality_ratings (j_compress_ptr cinfo, char *arg, boolean force_baseline) GLOBAL(boolean) -set_quant_slots (j_compress_ptr cinfo, char *arg) +set_quant_slots(j_compress_ptr cinfo, char *arg) /* Process a quantization-table-selectors parameter string, of the form * N[,N,...] * If there are more components than parameters, the last value is replicated. @@ -374,7 +374,7 @@ set_quant_slots (j_compress_ptr cinfo, char *arg) return FALSE; if (val < 0 || val >= NUM_QUANT_TBLS) { fprintf(stderr, "JPEG quantization tables are numbered 0..%d\n", - NUM_QUANT_TBLS-1); + NUM_QUANT_TBLS - 1); return FALSE; } cinfo->comp_info[ci].quant_tbl_no = val; @@ -390,7 +390,7 @@ set_quant_slots (j_compress_ptr cinfo, char *arg) GLOBAL(boolean) -set_sample_factors (j_compress_ptr cinfo, char *arg) +set_sample_factors(j_compress_ptr cinfo, char *arg) /* Process a sample-factors parameter string, of the form * HxV[,HxV,...] * If there are more components than parameters, "1x1" is assumed for the rest. diff --git a/rdtarga.c b/rdtarga.c index b9bbd07..37bd286 100644 --- a/rdtarga.c +++ b/rdtarga.c @@ -3,8 +3,9 @@ * * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1996, Thomas G. Lane. - * It was modified by The libjpeg-turbo Project to include only code relevant - * to libjpeg-turbo. + * Modified 2017 by Guido Vollbeding. + * libjpeg-turbo Modifications: + * Copyright (C) 2018, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -29,19 +30,20 @@ #ifdef HAVE_UNSIGNED_CHAR typedef unsigned char U_CHAR; -#define UCH(x) ((int) (x)) +#define UCH(x) ((int)(x)) #else /* !HAVE_UNSIGNED_CHAR */ #ifdef __CHAR_UNSIGNED__ typedef char U_CHAR; -#define UCH(x) ((int) (x)) +#define UCH(x) ((int)(x)) #else typedef char U_CHAR; -#define UCH(x) ((int) (x) & 0xFF) +#define UCH(x) ((int)(x) & 0xFF) #endif #endif /* HAVE_UNSIGNED_CHAR */ -#define ReadOK(file,buffer,len) (JFREAD(file,buffer,len) == ((size_t) (len))) +#define ReadOK(file, buffer, len) \ + (JFREAD(file, buffer, len) == ((size_t)(len))) /* Private version of data source object */ @@ -65,6 +67,7 @@ typedef struct _tga_source_struct { U_CHAR tga_pixel[4]; int pixel_size; /* Bytes per Targa pixel (1 to 4) */ + int cmap_length; /* colormap length */ /* State info for reading RLE-coded pixels; both counts must be init to 0 */ int block_count; /* # of pixels remaining in RLE block */ @@ -87,7 +90,7 @@ static const UINT8 c5to8bits[32] = { LOCAL(int) -read_byte (tga_source_ptr sinfo) +read_byte(tga_source_ptr sinfo) /* Read next byte from Targa file */ { register FILE *infile = sinfo->pub.input_file; @@ -100,7 +103,7 @@ read_byte (tga_source_ptr sinfo) LOCAL(void) -read_colormap (tga_source_ptr sinfo, int cmaplen, int mapentrysize) +read_colormap(tga_source_ptr sinfo, int cmaplen, int mapentrysize) /* Read the colormap from a Targa file */ { int i; @@ -110,9 +113,9 @@ read_colormap (tga_source_ptr sinfo, int cmaplen, int mapentrysize) ERREXIT(sinfo->cinfo, JERR_TGA_BADCMAP); for (i = 0; i < cmaplen; i++) { - sinfo->colormap[2][i] = (JSAMPLE) read_byte(sinfo); - sinfo->colormap[1][i] = (JSAMPLE) read_byte(sinfo); - sinfo->colormap[0][i] = (JSAMPLE) read_byte(sinfo); + sinfo->colormap[2][i] = (JSAMPLE)read_byte(sinfo); + sinfo->colormap[1][i] = (JSAMPLE)read_byte(sinfo); + sinfo->colormap[0][i] = (JSAMPLE)read_byte(sinfo); } } @@ -122,23 +125,21 @@ read_colormap (tga_source_ptr sinfo, int cmaplen, int mapentrysize) */ METHODDEF(void) -read_non_rle_pixel (tga_source_ptr sinfo) +read_non_rle_pixel(tga_source_ptr sinfo) /* Read one Targa pixel from the input file; no RLE expansion */ { - register FILE *infile = sinfo->pub.input_file; register int i; for (i = 0; i < sinfo->pixel_size; i++) { - sinfo->tga_pixel[i] = (U_CHAR) getc(infile); + sinfo->tga_pixel[i] = (U_CHAR)read_byte(sinfo); } } METHODDEF(void) -read_rle_pixel (tga_source_ptr sinfo) +read_rle_pixel(tga_source_ptr sinfo) /* Read one Targa pixel from the input file, expanding RLE data as needed */ { - register FILE *infile = sinfo->pub.input_file; register int i; /* Duplicate previously read pixel? */ @@ -160,7 +161,7 @@ read_rle_pixel (tga_source_ptr sinfo) /* Read next pixel */ for (i = 0; i < sinfo->pixel_size; i++) { - sinfo->tga_pixel[i] = (U_CHAR) getc(infile); + sinfo->tga_pixel[i] = (U_CHAR)read_byte(sinfo); } } @@ -173,35 +174,38 @@ read_rle_pixel (tga_source_ptr sinfo) METHODDEF(JDIMENSION) -get_8bit_gray_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_8bit_gray_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* This version is for reading 8-bit grayscale pixels */ { - tga_source_ptr source = (tga_source_ptr) sinfo; + tga_source_ptr source = (tga_source_ptr)sinfo; register JSAMPROW ptr; register JDIMENSION col; ptr = source->pub.buffer[0]; for (col = cinfo->image_width; col > 0; col--) { (*source->read_pixel) (source); /* Load next pixel into tga_pixel */ - *ptr++ = (JSAMPLE) UCH(source->tga_pixel[0]); + *ptr++ = (JSAMPLE)UCH(source->tga_pixel[0]); } return 1; } METHODDEF(JDIMENSION) -get_8bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_8bit_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* This version is for reading 8-bit colormap indexes */ { - tga_source_ptr source = (tga_source_ptr) sinfo; + tga_source_ptr source = (tga_source_ptr)sinfo; register int t; register JSAMPROW ptr; register JDIMENSION col; register JSAMPARRAY colormap = source->colormap; + int cmaplen = source->cmap_length; ptr = source->pub.buffer[0]; for (col = cinfo->image_width; col > 0; col--) { (*source->read_pixel) (source); /* Load next pixel into tga_pixel */ t = UCH(source->tga_pixel[0]); + if (t >= cmaplen) + ERREXIT(cinfo, JERR_TGA_BADPARMS); *ptr++ = colormap[0][t]; *ptr++ = colormap[1][t]; *ptr++ = colormap[2][t]; @@ -210,10 +214,10 @@ get_8bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) } METHODDEF(JDIMENSION) -get_16bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_16bit_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* This version is for reading 16-bit pixels */ { - tga_source_ptr source = (tga_source_ptr) sinfo; + tga_source_ptr source = (tga_source_ptr)sinfo; register int t; register JSAMPROW ptr; register JDIMENSION col; @@ -227,30 +231,30 @@ get_16bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) * The format of the 16-bit (LSB first) input word is * xRRRRRGGGGGBBBBB */ - ptr[2] = (JSAMPLE) c5to8bits[t & 0x1F]; + ptr[2] = (JSAMPLE)c5to8bits[t & 0x1F]; t >>= 5; - ptr[1] = (JSAMPLE) c5to8bits[t & 0x1F]; + ptr[1] = (JSAMPLE)c5to8bits[t & 0x1F]; t >>= 5; - ptr[0] = (JSAMPLE) c5to8bits[t & 0x1F]; + ptr[0] = (JSAMPLE)c5to8bits[t & 0x1F]; ptr += 3; } return 1; } METHODDEF(JDIMENSION) -get_24bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_24bit_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* This version is for reading 24-bit pixels */ { - tga_source_ptr source = (tga_source_ptr) sinfo; + tga_source_ptr source = (tga_source_ptr)sinfo; register JSAMPROW ptr; register JDIMENSION col; ptr = source->pub.buffer[0]; for (col = cinfo->image_width; col > 0; col--) { (*source->read_pixel) (source); /* Load next pixel into tga_pixel */ - *ptr++ = (JSAMPLE) UCH(source->tga_pixel[2]); /* change BGR to RGB order */ - *ptr++ = (JSAMPLE) UCH(source->tga_pixel[1]); - *ptr++ = (JSAMPLE) UCH(source->tga_pixel[0]); + *ptr++ = (JSAMPLE)UCH(source->tga_pixel[2]); /* change BGR to RGB order */ + *ptr++ = (JSAMPLE)UCH(source->tga_pixel[1]); + *ptr++ = (JSAMPLE)UCH(source->tga_pixel[0]); } return 1; } @@ -272,9 +276,9 @@ get_24bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ METHODDEF(JDIMENSION) -get_memory_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +get_memory_row(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) { - tga_source_ptr source = (tga_source_ptr) sinfo; + tga_source_ptr source = (tga_source_ptr)sinfo; JDIMENSION source_row; /* Compute row of source that maps to current_row of normal order */ @@ -284,8 +288,8 @@ get_memory_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* Fetch that row from virtual array */ source->pub.buffer = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, source->whole_image, - source_row, (JDIMENSION) 1, FALSE); + ((j_common_ptr)cinfo, source->whole_image, + source_row, (JDIMENSION)1, FALSE); source->current_row++; return 1; @@ -299,21 +303,21 @@ get_memory_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ METHODDEF(JDIMENSION) -preload_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +preload_image(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) { - tga_source_ptr source = (tga_source_ptr) sinfo; + tga_source_ptr source = (tga_source_ptr)sinfo; JDIMENSION row; - cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + cd_progress_ptr progress = (cd_progress_ptr)cinfo->progress; /* Read the data into a virtual array in input-file row order. */ for (row = 0; row < cinfo->image_height; row++) { if (progress != NULL) { - progress->pub.pass_counter = (long) row; - progress->pub.pass_limit = (long) cinfo->image_height; - (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + progress->pub.pass_counter = (long)row; + progress->pub.pass_limit = (long)cinfo->image_height; + (*progress->pub.progress_monitor) ((j_common_ptr)cinfo); } source->pub.buffer = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, source->whole_image, row, (JDIMENSION) 1, TRUE); + ((j_common_ptr)cinfo, source->whole_image, row, (JDIMENSION)1, TRUE); (*source->get_pixel_rows) (cinfo, sinfo); } if (progress != NULL) @@ -332,18 +336,18 @@ preload_image (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ METHODDEF(void) -start_input_tga (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +start_input_tga(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) { - tga_source_ptr source = (tga_source_ptr) sinfo; + tga_source_ptr source = (tga_source_ptr)sinfo; U_CHAR targaheader[18]; int idlen, cmaptype, subtype, flags, interlace_type, components; unsigned int width, height, maplen; boolean is_bottom_up; -#define GET_2B(offset) ((unsigned int) UCH(targaheader[offset]) + \ - (((unsigned int) UCH(targaheader[offset+1])) << 8)) +#define GET_2B(offset) ((unsigned int)UCH(targaheader[offset]) + \ + (((unsigned int)UCH(targaheader[offset + 1])) << 8)) - if (! ReadOK(source->pub.input_file, targaheader, 18)) + if (!ReadOK(source->pub.input_file, targaheader, 18)) ERREXIT(cinfo, JERR_INPUT_EOF); /* Pretend "15-bit" pixels are 16-bit --- we ignore attribute bit anyway */ @@ -425,10 +429,10 @@ start_input_tga (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) if (is_bottom_up) { /* Create a virtual array to buffer the upside-down image. */ source->whole_image = (*cinfo->mem->request_virt_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, - (JDIMENSION) width * components, (JDIMENSION) height, (JDIMENSION) 1); + ((j_common_ptr)cinfo, JPOOL_IMAGE, FALSE, + (JDIMENSION)width * components, (JDIMENSION)height, (JDIMENSION)1); if (cinfo->progress != NULL) { - cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + cd_progress_ptr progress = (cd_progress_ptr)cinfo->progress; progress->total_extra_passes++; /* count file input as separate pass */ } /* source->pub.buffer will point to the virtual array. */ @@ -438,27 +442,29 @@ start_input_tga (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) /* Don't need a virtual array, but do need a one-row input buffer. */ source->whole_image = NULL; source->pub.buffer = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) width * components, (JDIMENSION) 1); + ((j_common_ptr)cinfo, JPOOL_IMAGE, + (JDIMENSION)width * components, (JDIMENSION)1); source->pub.buffer_height = 1; source->pub.get_pixel_rows = source->get_pixel_rows; } while (idlen--) /* Throw away ID field */ - (void) read_byte(source); + (void)read_byte(source); if (maplen > 0) { if (maplen > 256 || GET_2B(3) != 0) ERREXIT(cinfo, JERR_TGA_BADCMAP); /* Allocate space to store the colormap */ source->colormap = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, (JDIMENSION) maplen, (JDIMENSION) 3); + ((j_common_ptr)cinfo, JPOOL_IMAGE, (JDIMENSION)maplen, (JDIMENSION)3); + source->cmap_length = (int)maplen; /* and read it from the file */ - read_colormap(source, (int) maplen, UCH(targaheader[7])); + read_colormap(source, (int)maplen, UCH(targaheader[7])); } else { if (cmaptype) /* but you promised a cmap! */ ERREXIT(cinfo, JERR_TGA_BADPARMS); source->colormap = NULL; + source->cmap_length = 0; } cinfo->input_components = components; @@ -473,7 +479,7 @@ start_input_tga (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ METHODDEF(void) -finish_input_tga (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) +finish_input_tga(j_compress_ptr cinfo, cjpeg_source_ptr sinfo) { /* no work */ } @@ -484,20 +490,20 @@ finish_input_tga (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) */ GLOBAL(cjpeg_source_ptr) -jinit_read_targa (j_compress_ptr cinfo) +jinit_read_targa(j_compress_ptr cinfo) { tga_source_ptr source; /* Create module interface object */ source = (tga_source_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - sizeof(tga_source_struct)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + sizeof(tga_source_struct)); source->cinfo = cinfo; /* make back link for subroutines */ /* Fill in method ptrs, except get_pixel_rows which start_input sets */ source->pub.start_input = start_input_tga; source->pub.finish_input = finish_input_tga; - return (cjpeg_source_ptr) source; + return (cjpeg_source_ptr)source; } #endif /* TARGA_SUPPORTED */ diff --git a/release/Distribution.xml b/release/Distribution.xml.in similarity index 63% rename from release/Distribution.xml rename to release/Distribution.xml.in index ee73ab0..e1f79ee 100644 --- a/release/Distribution.xml +++ b/release/Distribution.xml.in @@ -1,6 +1,6 @@ - libjpeg-turbo + @CMAKE_PROJECT_NAME@ @@ -12,13 +12,13 @@ - + - - + + libjpeg-turbo.pkg + id="@PKGID@">@PKGNAME@.pkg diff --git a/release/ReadMe.txt b/release/ReadMe.txt index 7fb8d0f..cf9012a 100644 --- a/release/ReadMe.txt +++ b/release/ReadMe.txt @@ -1,4 +1,4 @@ -libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2, NEON, AltiVec) to accelerate baseline JPEG compression and decompression on x86, x86-64, ARM, and PowerPC systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg, all else being equal. On other types of systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by virtue of its highly-optimized Huffman coding routines. In many cases, the performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs. +libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2, AVX2, NEON, AltiVec) to accelerate baseline JPEG compression and decompression on x86, x86-64, ARM, and PowerPC systems, as well as progressive JPEG compression on x86 and x86-64 systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg, all else being equal. On other types of systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by virtue of its highly-optimized Huffman coding routines. In many cases, the performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs. libjpeg-turbo implements both the traditional libjpeg API as well as the less powerful but more straightforward TurboJPEG API. libjpeg-turbo also features colorspace extensions that allow it to compress from/decompress to 32-bit and big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java interface. diff --git a/release/deb-control.tmpl b/release/deb-control.in similarity index 82% rename from release/deb-control.tmpl rename to release/deb-control.in index 681721d..08131c6 100644 --- a/release/deb-control.tmpl +++ b/release/deb-control.in @@ -1,16 +1,17 @@ Package: {__PKGNAME} -Version: {__VERSION}-{__BUILD} +Version: @VERSION@-@BUILD@ Section: misc Priority: optional Architecture: {__ARCH} Essential: no -Maintainer: The libjpeg-turbo Project -Homepage: http://www.libjpeg-turbo.org +Maintainer: @PKGVENDOR@ <@PKGEMAIL@> +Homepage: @PKGURL@ Installed-Size: {__SIZE} Description: A SIMD-accelerated JPEG codec that provides both the libjpeg and TurboJPEG APIs libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2, - NEON, AltiVec) to accelerate baseline JPEG compression and decompression on - x86, x86-64, ARM, and PowerPC systems. On such systems, libjpeg-turbo is + AVX2, NEON, AltiVec) to accelerate baseline JPEG compression and decompression + on x86, x86-64, ARM, and PowerPC systems, as well as progressive JPEG + compression on x86 and x86-64 systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg, all else being equal. On other types of systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by virtue of its highly-optimized Huffman coding routines. In many cases, the diff --git a/release/installer.nsi.in b/release/installer.nsi.in new file mode 100755 index 0000000..44419fa --- /dev/null +++ b/release/installer.nsi.in @@ -0,0 +1,191 @@ +!include x64.nsh +Name "@CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@" +OutFile "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}@INST_NAME@.exe" +InstallDir "@INST_DIR@" + +SetCompressor bzip2 + +Page directory +Page instfiles + +UninstPage uninstConfirm +UninstPage instfiles + +Section "@CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@ (required)" +!ifdef WIN64 + ${If} ${RunningX64} + ${DisableX64FSRedirection} + ${Endif} +!endif + SectionIn RO +!ifdef GCC + IfFileExists $SYSDIR/libturbojpeg.dll exists 0 +!else + IfFileExists $SYSDIR/turbojpeg.dll exists 0 +!endif + goto notexists + exists: +!ifdef GCC + MessageBox MB_OK "An existing version of the @CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@ is already installed. Please uninstall it first." +!else + MessageBox MB_OK "An existing version of the @CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@ or the TurboJPEG SDK is already installed. Please uninstall it first." +!endif + quit + + notexists: + SetOutPath $SYSDIR +!ifdef GCC + File "@CMAKE_CURRENT_BINARY_DIR@\libturbojpeg.dll" +!else + File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}turbojpeg.dll" +!endif + SetOutPath $INSTDIR\bin +!ifdef GCC + File "@CMAKE_CURRENT_BINARY_DIR@\libturbojpeg.dll" +!else + File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}turbojpeg.dll" +!endif +!ifdef GCC + File "@CMAKE_CURRENT_BINARY_DIR@\libjpeg-@SO_MAJOR_VERSION@.dll" +!else + File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}jpeg@SO_MAJOR_VERSION@.dll" +!endif + File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}cjpeg.exe" + File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}djpeg.exe" + File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}jpegtran.exe" + File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}tjbench.exe" + File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}rdjpgcom.exe" + File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}wrjpgcom.exe" + SetOutPath $INSTDIR\lib +!ifdef GCC + File "@CMAKE_CURRENT_BINARY_DIR@\libturbojpeg.dll.a" + File "@CMAKE_CURRENT_BINARY_DIR@\libturbojpeg.a" + File "@CMAKE_CURRENT_BINARY_DIR@\libjpeg.dll.a" + File "@CMAKE_CURRENT_BINARY_DIR@\libjpeg.a" +!else + File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}turbojpeg.lib" + File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}turbojpeg-static.lib" + File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}jpeg.lib" + File "@CMAKE_CURRENT_BINARY_DIR@\${BUILDDIR}jpeg-static.lib" +!endif + SetOutPath $INSTDIR\lib\pkgconfig + File "@CMAKE_CURRENT_BINARY_DIR@\pkgscripts\libjpeg.pc" + File "@CMAKE_CURRENT_BINARY_DIR@\pkgscripts\libturbojpeg.pc" +!ifdef JAVA + SetOutPath $INSTDIR\classes + File "@CMAKE_CURRENT_BINARY_DIR@\java\turbojpeg.jar" +!endif + SetOutPath $INSTDIR\include + File "@CMAKE_CURRENT_BINARY_DIR@\jconfig.h" + File "@CMAKE_CURRENT_SOURCE_DIR@\jerror.h" + File "@CMAKE_CURRENT_SOURCE_DIR@\jmorecfg.h" + File "@CMAKE_CURRENT_SOURCE_DIR@\jpeglib.h" + File "@CMAKE_CURRENT_SOURCE_DIR@\turbojpeg.h" + SetOutPath $INSTDIR\doc + File "@CMAKE_CURRENT_SOURCE_DIR@\README.ijg" + File "@CMAKE_CURRENT_SOURCE_DIR@\README.md" + File "@CMAKE_CURRENT_SOURCE_DIR@\LICENSE.md" + File "@CMAKE_CURRENT_SOURCE_DIR@\example.txt" + File "@CMAKE_CURRENT_SOURCE_DIR@\libjpeg.txt" + File "@CMAKE_CURRENT_SOURCE_DIR@\structure.txt" + File "@CMAKE_CURRENT_SOURCE_DIR@\usage.txt" + File "@CMAKE_CURRENT_SOURCE_DIR@\wizard.txt" + File "@CMAKE_CURRENT_SOURCE_DIR@\tjexample.c" + File "@CMAKE_CURRENT_SOURCE_DIR@\java\TJExample.java" +!ifdef GCC + SetOutPath $INSTDIR\man\man1 + File "@CMAKE_CURRENT_SOURCE_DIR@\cjpeg.1" + File "@CMAKE_CURRENT_SOURCE_DIR@\djpeg.1" + File "@CMAKE_CURRENT_SOURCE_DIR@\jpegtran.1" + File "@CMAKE_CURRENT_SOURCE_DIR@\rdjpgcom.1" + File "@CMAKE_CURRENT_SOURCE_DIR@\wrjpgcom.1" +!endif + + WriteRegStr HKLM "SOFTWARE\@INST_REG_NAME@ @VERSION@" "Install_Dir" "$INSTDIR" + + WriteRegStr HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" "DisplayName" "@CMAKE_PROJECT_NAME@ SDK v@VERSION@ for @INST_PLATFORM@" + WriteRegStr HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" "UninstallString" '"$INSTDIR\uninstall_@VERSION@.exe"' + WriteRegDWORD HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" "NoModify" 1 + WriteRegDWORD HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" "NoRepair" 1 + WriteUninstaller "uninstall_@VERSION@.exe" +SectionEnd + +Section "Uninstall" +!ifdef WIN64 + ${If} ${RunningX64} + ${DisableX64FSRedirection} + ${Endif} +!endif + + SetShellVarContext all + + DeleteRegKey HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" + DeleteRegKey HKLM "SOFTWARE\@INST_REG_NAME@ @VERSION@" + +!ifdef GCC + Delete $INSTDIR\bin\libjpeg-@SO_MAJOR_VERSION@.dll + Delete $INSTDIR\bin\libturbojpeg.dll + Delete $SYSDIR\libturbojpeg.dll + Delete $INSTDIR\lib\libturbojpeg.dll.a + Delete $INSTDIR\lib\libturbojpeg.a + Delete $INSTDIR\lib\libjpeg.dll.a + Delete $INSTDIR\lib\libjpeg.a +!else + Delete $INSTDIR\bin\jpeg@SO_MAJOR_VERSION@.dll + Delete $INSTDIR\bin\turbojpeg.dll + Delete $SYSDIR\turbojpeg.dll + Delete $INSTDIR\lib\jpeg.lib + Delete $INSTDIR\lib\jpeg-static.lib + Delete $INSTDIR\lib\turbojpeg.lib + Delete $INSTDIR\lib\turbojpeg-static.lib +!endif + Delete $INSTDIR\lib\pkgconfig\libjpeg.pc + Delete $INSTDIR\lib\pkgconfig\libturbojpeg.pc +!ifdef JAVA + Delete $INSTDIR\classes\turbojpeg.jar +!endif + Delete $INSTDIR\bin\cjpeg.exe + Delete $INSTDIR\bin\djpeg.exe + Delete $INSTDIR\bin\jpegtran.exe + Delete $INSTDIR\bin\tjbench.exe + Delete $INSTDIR\bin\rdjpgcom.exe + Delete $INSTDIR\bin\wrjpgcom.exe + Delete $INSTDIR\include\jconfig.h + Delete $INSTDIR\include\jerror.h + Delete $INSTDIR\include\jmorecfg.h + Delete $INSTDIR\include\jpeglib.h + Delete $INSTDIR\include\turbojpeg.h + Delete $INSTDIR\uninstall_@VERSION@.exe + Delete $INSTDIR\doc\README.ijg + Delete $INSTDIR\doc\README.md + Delete $INSTDIR\doc\LICENSE.md + Delete $INSTDIR\doc\example.txt + Delete $INSTDIR\doc\libjpeg.txt + Delete $INSTDIR\doc\structure.txt + Delete $INSTDIR\doc\usage.txt + Delete $INSTDIR\doc\wizard.txt + Delete $INSTDIR\doc\tjexample.c + Delete $INSTDIR\doc\TJExample.java +!ifdef GCC + Delete $INSTDIR\man\man1\cjpeg.1 + Delete $INSTDIR\man\man1\djpeg.1 + Delete $INSTDIR\man\man1\jpegtran.1 + Delete $INSTDIR\man\man1\rdjpgcom.1 + Delete $INSTDIR\man\man1\wrjpgcom.1 +!endif + + RMDir "$INSTDIR\include" + RMDir "$INSTDIR\lib\pkgconfig" + RMDir "$INSTDIR\lib" + RMDir "$INSTDIR\doc" +!ifdef GCC + RMDir "$INSTDIR\man\man1" + RMDir "$INSTDIR\man" +!endif +!ifdef JAVA + RMDir "$INSTDIR\classes" +!endif + RMDir "$INSTDIR\bin" + RMDir "$INSTDIR" + +SectionEnd diff --git a/release/libjpeg-turbo.nsi.in b/release/libjpeg-turbo.nsi.in deleted file mode 100755 index f458b81..0000000 --- a/release/libjpeg-turbo.nsi.in +++ /dev/null @@ -1,162 +0,0 @@ -!include x64.nsh -Name "@CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@" -OutFile "@CMAKE_BINARY_DIR@\${BUILDDIR}@INST_NAME@.exe" -InstallDir @INST_DIR@ - -SetCompressor bzip2 - -Page directory -Page instfiles - -UninstPage uninstConfirm -UninstPage instfiles - -Section "@CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@ (required)" -!ifdef WIN64 - ${If} ${RunningX64} - ${DisableX64FSRedirection} - ${Endif} -!endif - SectionIn RO -!ifdef GCC - IfFileExists $SYSDIR/libturbojpeg.dll exists 0 -!else - IfFileExists $SYSDIR/turbojpeg.dll exists 0 -!endif - goto notexists - exists: -!ifdef GCC - MessageBox MB_OK "An existing version of the @CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@ is already installed. Please uninstall it first." -!else - MessageBox MB_OK "An existing version of the @CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@ or the TurboJPEG SDK is already installed. Please uninstall it first." -!endif - quit - - notexists: - SetOutPath $SYSDIR -!ifdef GCC - File "@CMAKE_BINARY_DIR@\libturbojpeg.dll" -!else - File "@CMAKE_BINARY_DIR@\${BUILDDIR}turbojpeg.dll" -!endif - SetOutPath $INSTDIR\bin -!ifdef GCC - File "@CMAKE_BINARY_DIR@\libturbojpeg.dll" -!else - File "@CMAKE_BINARY_DIR@\${BUILDDIR}turbojpeg.dll" -!endif -!ifdef GCC - File "/oname=libjpeg-@DLL_VERSION@.dll" "@CMAKE_BINARY_DIR@\sharedlib\libjpeg-*.dll" -!else - File "@CMAKE_BINARY_DIR@\sharedlib\${BUILDDIR}jpeg@DLL_VERSION@.dll" -!endif - File "@CMAKE_BINARY_DIR@\sharedlib\${BUILDDIR}cjpeg.exe" - File "@CMAKE_BINARY_DIR@\sharedlib\${BUILDDIR}djpeg.exe" - File "@CMAKE_BINARY_DIR@\sharedlib\${BUILDDIR}jpegtran.exe" - File "@CMAKE_BINARY_DIR@\${BUILDDIR}tjbench.exe" - File "@CMAKE_BINARY_DIR@\${BUILDDIR}rdjpgcom.exe" - File "@CMAKE_BINARY_DIR@\${BUILDDIR}wrjpgcom.exe" - SetOutPath $INSTDIR\lib -!ifdef GCC - File "@CMAKE_BINARY_DIR@\libturbojpeg.dll.a" - File "@CMAKE_BINARY_DIR@\libturbojpeg.a" - File "@CMAKE_BINARY_DIR@\sharedlib\libjpeg.dll.a" - File "@CMAKE_BINARY_DIR@\libjpeg.a" -!else - File "@CMAKE_BINARY_DIR@\${BUILDDIR}turbojpeg.lib" - File "@CMAKE_BINARY_DIR@\${BUILDDIR}turbojpeg-static.lib" - File "@CMAKE_BINARY_DIR@\sharedlib\${BUILDDIR}jpeg.lib" - File "@CMAKE_BINARY_DIR@\${BUILDDIR}jpeg-static.lib" -!endif -!ifdef JAVA - SetOutPath $INSTDIR\classes - File "@CMAKE_BINARY_DIR@\java\${BUILDDIR}turbojpeg.jar" -!endif - SetOutPath $INSTDIR\include - File "@CMAKE_BINARY_DIR@\jconfig.h" - File "@CMAKE_SOURCE_DIR@\jerror.h" - File "@CMAKE_SOURCE_DIR@\jmorecfg.h" - File "@CMAKE_SOURCE_DIR@\jpeglib.h" - File "@CMAKE_SOURCE_DIR@\turbojpeg.h" - SetOutPath $INSTDIR\doc - File "@CMAKE_SOURCE_DIR@\README.ijg" - File "@CMAKE_SOURCE_DIR@\README.md" - File "@CMAKE_SOURCE_DIR@\LICENSE.md" - File "@CMAKE_SOURCE_DIR@\example.c" - File "@CMAKE_SOURCE_DIR@\libjpeg.txt" - File "@CMAKE_SOURCE_DIR@\structure.txt" - File "@CMAKE_SOURCE_DIR@\usage.txt" - File "@CMAKE_SOURCE_DIR@\wizard.txt" - - WriteRegStr HKLM "SOFTWARE\@INST_REG_NAME@ @VERSION@" "Install_Dir" "$INSTDIR" - - WriteRegStr HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" "DisplayName" "@CMAKE_PROJECT_NAME@ SDK v@VERSION@ for @INST_PLATFORM@" - WriteRegStr HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" "UninstallString" '"$INSTDIR\uninstall_@VERSION@.exe"' - WriteRegDWORD HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" "NoModify" 1 - WriteRegDWORD HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" "NoRepair" 1 - WriteUninstaller "uninstall_@VERSION@.exe" -SectionEnd - -Section "Uninstall" -!ifdef WIN64 - ${If} ${RunningX64} - ${DisableX64FSRedirection} - ${Endif} -!endif - - SetShellVarContext all - - DeleteRegKey HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" - DeleteRegKey HKLM "SOFTWARE\@INST_REG_NAME@ @VERSION@" - -!ifdef GCC - Delete $INSTDIR\bin\libjpeg-@DLL_VERSION@.dll - Delete $INSTDIR\bin\libturbojpeg.dll - Delete $SYSDIR\libturbojpeg.dll - Delete $INSTDIR\lib\libturbojpeg.dll.a" - Delete $INSTDIR\lib\libturbojpeg.a" - Delete $INSTDIR\lib\libjpeg.dll.a" - Delete $INSTDIR\lib\libjpeg.a" -!else - Delete $INSTDIR\bin\jpeg@DLL_VERSION@.dll - Delete $INSTDIR\bin\turbojpeg.dll - Delete $SYSDIR\turbojpeg.dll - Delete $INSTDIR\lib\jpeg.lib - Delete $INSTDIR\lib\jpeg-static.lib - Delete $INSTDIR\lib\turbojpeg.lib - Delete $INSTDIR\lib\turbojpeg-static.lib -!endif -!ifdef JAVA - Delete $INSTDIR\classes\turbojpeg.jar -!endif - Delete $INSTDIR\bin\cjpeg.exe - Delete $INSTDIR\bin\djpeg.exe - Delete $INSTDIR\bin\jpegtran.exe - Delete $INSTDIR\bin\tjbench.exe - Delete $INSTDIR\bin\rdjpgcom.exe - Delete $INSTDIR\bin\wrjpgcom.exe - Delete $INSTDIR\include\jconfig.h" - Delete $INSTDIR\include\jerror.h" - Delete $INSTDIR\include\jmorecfg.h" - Delete $INSTDIR\include\jpeglib.h" - Delete $INSTDIR\include\turbojpeg.h" - Delete $INSTDIR\uninstall_@VERSION@.exe - Delete $INSTDIR\doc\README.ijg - Delete $INSTDIR\doc\README.md - Delete $INSTDIR\doc\LICENSE.md - Delete $INSTDIR\doc\example.c - Delete $INSTDIR\doc\libjpeg.txt - Delete $INSTDIR\doc\structure.txt - Delete $INSTDIR\doc\usage.txt - Delete $INSTDIR\doc\wizard.txt - - RMDir "$INSTDIR\include" - RMDir "$INSTDIR\lib" - RMDir "$INSTDIR\doc" -!ifdef JAVA - RMDir "$INSTDIR\classes" -!endif - RMDir "$INSTDIR\bin" - RMDir "$INSTDIR" - -SectionEnd diff --git a/release/libjpeg-turbo.spec.in b/release/libjpeg-turbo.spec.in deleted file mode 100644 index e4e4b9c..0000000 --- a/release/libjpeg-turbo.spec.in +++ /dev/null @@ -1,164 +0,0 @@ -# Path under which libjpeg-turbo should be installed -%define _prefix %{__prefix} - -# Path under which executables should be installed -%define _bindir %{__bindir} - -# Path under which Java classes and man pages should be installed -%define _datadir %{__datadir} - -# Path under which docs should be installed -%define _docdir /usr/share/doc/%{name}-%{version} - -# Path under which headers should be installed -%define _includedir %{__includedir} - -%if "%{?__isa_bits:1}" == "1" -%define _bits %{__isa_bits} -%else -# RPM < 4.6 -%if "%{_lib}" == "lib64" -%define _bits 64 -%else -%define _bits 32 -%endif -%endif - -%if "%{_bits}" == "64" -%define _libdir %{_exec_prefix}/lib64 -%else -%if "%{_prefix}" == "/opt/libjpeg-turbo" -%define _libdir %{_exec_prefix}/lib32 -%endif -%endif - -# Path under which man pages should be installed -%define _mandir %{__mandir} - -Summary: A SIMD-accelerated JPEG codec that provides both the libjpeg and TurboJPEG APIs -Name: @PKGNAME@ -Version: @VERSION@ -Vendor: The libjpeg-turbo Project -URL: http://www.libjpeg-turbo.org -Group: System Environment/Libraries -#-->Source0: http://prdownloads.sourceforge.net/libjpeg-turbo/libjpeg-turbo-%{version}.tar.gz -Release: @BUILD@ -License: BSD-style -BuildRoot: %{_blddir}/%{name}-buildroot-%{version}-%{release} -Prereq: /sbin/ldconfig -%if "%{_bits}" == "64" -Provides: %{name} = %{version}-%{release}, @PACKAGE_NAME@ = %{version}-%{release}, libturbojpeg.so()(64bit) -%else -Provides: %{name} = %{version}-%{release}, @PACKAGE_NAME@ = %{version}-%{release}, libturbojpeg.so -%endif - -%description -libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2, -NEON, AltiVec) to accelerate baseline JPEG compression and decompression on -x86, x86-64, ARM, and PowerPC systems. On such systems, libjpeg-turbo is -generally 2-6x as fast as libjpeg, all else being equal. On other types of -systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by -virtue of its highly-optimized Huffman coding routines. In many cases, the -performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs. - -libjpeg-turbo implements both the traditional libjpeg API as well as the less -powerful but more straightforward TurboJPEG API. libjpeg-turbo also features -colorspace extensions that allow it to compress from/decompress to 32-bit and -big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java -interface. - -libjpeg-turbo was originally based on libjpeg/SIMD, an MMX-accelerated -derivative of libjpeg v6b developed by Miyasaka Masaru. The TigerVNC and -VirtualGL projects made numerous enhancements to the codec in 2009, and in -early 2010, libjpeg-turbo spun off into an independent project, with the goal -of making high-speed JPEG compression/decompression technology available to a -broader range of users and developers. - -#-->%prep -#-->%setup -q -n libjpeg-turbo-%{version} - -#-->%build -#-->./configure prefix=%{_prefix} bindir=%{_bindir} datadir=%{_datadir} \ -#--> docdir=%{_docdir} includedir=%{_includedir} libdir=%{_libdir} \ -#--> mandir=%{_mandir} JPEG_LIB_VERSION=@JPEG_LIB_VERSION@ \ -#--> SO_MAJOR_VERSION=@SO_MAJOR_VERSION@ SO_MINOR_VERSION=@SO_MINOR_VERSION@ \ -#--> --with-pic @RPM_CONFIG_ARGS@ -#-->export NUMCPUS=`grep -c '^processor' /proc/cpuinfo` -#-->make -j$NUMCPUS --load-average=$NUMCPUS DESTDIR=$RPM_BUILD_ROOT - -%install - -rm -rf $RPM_BUILD_ROOT -make install DESTDIR=$RPM_BUILD_ROOT docdir=%{_docdir} exampledir=%{_docdir} -rm -f $RPM_BUILD_ROOT%{_libdir}/*.la -/sbin/ldconfig -n $RPM_BUILD_ROOT%{_libdir} - -#-->%if 0 - -LJT_LIBDIR=%{__libdir} -if [ ! "$LJT_LIBDIR" = "%{_libdir}" ]; then - echo ERROR: libjpeg-turbo must be configured with libdir=%{_libdir} when generating an in-tree RPM for this architecture. - exit 1 -fi - -#-->%endif - -LJT_DOCDIR=%{__docdir} -if [ "%{_prefix}" = "/opt/libjpeg-turbo" -a "$LJT_DOCDIR" = "/opt/libjpeg-turbo/doc" ]; then - ln -fs %{_docdir} $RPM_BUILD_ROOT/$LJT_DOCDIR -fi - -%post -p /sbin/ldconfig - -%postun -p /sbin/ldconfig - -%clean -rm -rf $RPM_BUILD_ROOT - -%files -%defattr(-,root,root) -%dir %{_docdir} -%doc %{_docdir}/* -%dir %{_prefix} -%if "%{_prefix}" == "/opt/libjpeg-turbo" && "%{_docdir}" != "%{_prefix}/doc" - %{_prefix}/doc -%endif -%dir %{_bindir} -%{_bindir}/cjpeg -%{_bindir}/djpeg -%{_bindir}/jpegtran -%{_bindir}/tjbench -%{_bindir}/rdjpgcom -%{_bindir}/wrjpgcom -%dir %{_libdir} -%{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@.@SO_AGE@.@SO_MINOR_VERSION@ -%{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@ -%{_libdir}/libjpeg.so -%{_libdir}/libjpeg.a -%{_libdir}/pkgconfig -%{_libdir}/pkgconfig/libjpeg.pc -%{_libdir}/libturbojpeg.so.0.1.0 -%{_libdir}/libturbojpeg.so.0 -%{_libdir}/libturbojpeg.so -%{_libdir}/libturbojpeg.a -%{_libdir}/pkgconfig/libturbojpeg.pc -%dir %{_includedir} -%{_includedir}/jconfig.h -%{_includedir}/jerror.h -%{_includedir}/jmorecfg.h -%{_includedir}/jpeglib.h -%{_includedir}/turbojpeg.h -%dir %{_mandir} -%dir %{_mandir}/man1 -%{_mandir}/man1/cjpeg.1* -%{_mandir}/man1/djpeg.1* -%{_mandir}/man1/jpegtran.1* -%{_mandir}/man1/rdjpgcom.1* -%{_mandir}/man1/wrjpgcom.1* -%if "%{_prefix}" != "%{_datadir}" - %dir %{_datadir} -%endif -@JAVA_RPM_CONTENTS_1@ -@JAVA_RPM_CONTENTS_2@ - -%changelog diff --git a/release/libjpeg.pc.in b/release/libjpeg.pc.in index 40795f7..74fb7fc 100644 --- a/release/libjpeg.pc.in +++ b/release/libjpeg.pc.in @@ -1,10 +1,10 @@ -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=@CMAKE_INSTALL_PREFIX@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ Name: libjpeg Description: A SIMD-accelerated JPEG codec that provides the libjpeg API -Version: @PACKAGE_VERSION@ +Version: @VERSION@ Libs: -L${libdir} -ljpeg Cflags: -I${includedir} diff --git a/release/libturbojpeg.pc.in b/release/libturbojpeg.pc.in index 7d4b656..81a0063 100644 --- a/release/libturbojpeg.pc.in +++ b/release/libturbojpeg.pc.in @@ -1,10 +1,10 @@ -prefix=@prefix@ -exec_prefix=@exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=@CMAKE_INSTALL_PREFIX@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ Name: libturbojpeg Description: A SIMD-accelerated JPEG codec that provides the TurboJPEG API -Version: @PACKAGE_VERSION@ +Version: @VERSION@ Libs: -L${libdir} -lturbojpeg Cflags: -I${includedir} diff --git a/release/makecygwinpkg.in b/release/makecygwinpkg.in index f303546..b7f353e 100755 --- a/release/makecygwinpkg.in +++ b/release/makecygwinpkg.in @@ -15,27 +15,51 @@ onexit() fi } -PACKAGE_NAME=@PKGNAME@ +safedirmove () +{ + if [ "$1" = "$2" ]; then + return 0 + fi + if [ "$1" = "" -o ! -d "$1" ]; then + echo safedirmove: source dir $1 is not valid + return 1 + fi + if [ "$2" = "" -o -e "$2" ]; then + echo safedirmove: dest dir $2 is not valid + return 1 + fi + if [ "$3" = "" -o -e "$3" ]; then + echo safedirmove: tmp dir $3 is not valid + return 1 + fi + mkdir -p $3 + mv $1/* $3/ + rmdir $1 + mkdir -p $2 + mv $3/* $2/ + rmdir $3 + return 0 +} + +PKGNAME=@PKGNAME@ VERSION=@VERSION@ BUILD=@BUILD@ -SRCDIR=@abs_top_srcdir@ -PREFIX=%{__prefix} -DOCDIR=%{__docdir} -LIBDIR=%{__libdir} +PREFIX=@CMAKE_INSTALL_PREFIX@ +DOCDIR=@CMAKE_INSTALL_FULL_DOCDIR@ +LIBDIR=@CMAKE_INSTALL_FULL_LIBDIR@ umask 022 -rm -f $PACKAGE_NAME-$VERSION-$BUILD.tar.bz2 +rm -f $PKGNAME-$VERSION-$BUILD.tar.bz2 TMPDIR=`mktemp -d /tmp/ljtbuild.XXXXXX` __PWD=`pwd` -make install DESTDIR=$TMPDIR/pkg docdir=/usr/share/doc/$PACKAGE_NAME-$VERSION \ - exampledir=/usr/share/doc/$PACKAGE_NAME-$VERSION -rm $TMPDIR/pkg$LIBDIR/*.la -if [ "$PREFIX" = "/opt/libjpeg-turbo" -a "$DOCDIR" = "/opt/libjpeg-turbo/doc" ]; then - ln -fs /usr/share/doc/$PACKAGE_NAME-$VERSION $TMPDIR/pkg$DOCDIR +make install DESTDIR=$TMPDIR/pkg +if [ "$PREFIX" = "@CMAKE_INSTALL_DEFAULT_PREFIX@" -a "$DOCDIR" = "@CMAKE_INSTALL_DEFAULT_PREFIX@/doc" ]; then + safedirmove $TMPDIR/pkg$DOCDIR $TMPDIR/pkg/usr/share/doc/$PKGNAME-$VERSION $TMPDIR/__tmpdoc + ln -fs /usr/share/doc/$PKGNAME-$VERSION $TMPDIR/pkg$DOCDIR fi cd $TMPDIR/pkg -tar cfj ../$PACKAGE_NAME-$VERSION-$BUILD.tar.bz2 * +tar cfj ../$PKGNAME-$VERSION-$BUILD.tar.bz2 * cd $__PWD mv $TMPDIR/*.tar.bz2 . diff --git a/release/makedpkg.in b/release/makedpkg.in index 80cc89b..77836dd 100644 --- a/release/makedpkg.in +++ b/release/makedpkg.in @@ -21,56 +21,89 @@ uid() id | cut -f2 -d = | cut -f1 -d \(; } +safedirmove () +{ + if [ "$1" = "$2" ]; then + return 0 + fi + if [ "$1" = "" -o ! -d "$1" ]; then + echo safedirmove: source dir $1 is not valid + return 1 + fi + if [ "$2" = "" -o -e "$2" ]; then + echo safedirmove: dest dir $2 is not valid + return 1 + fi + if [ "$3" = "" -o -e "$3" ]; then + echo safedirmove: tmp dir $3 is not valid + return 1 + fi + mkdir -p $3 + mv $1/* $3/ + rmdir $1 + mkdir -p $2 + mv $3/* $2/ + rmdir $3 + return 0 +} + makedeb() { SUPPLEMENT=$1 - DIRNAME=$PACKAGE_NAME + DIRNAME=$PKGNAME if [ $SUPPLEMENT = 1 ]; then - PACKAGE_NAME=$PACKAGE_NAME\32 + PKGNAME=$PKGNAME\32 DEBARCH=amd64 fi umask 022 - rm -f $PACKAGE_NAME\_$VERSION\_$DEBARCH.deb - TMPDIR=`mktemp -d /tmp/$PACKAGE_NAME-build.XXXXXX` + rm -f $PKGNAME\_$VERSION\_$DEBARCH.deb + TMPDIR=`mktemp -d /tmp/$PKGNAME-build.XXXXXX` mkdir $TMPDIR/DEBIAN if [ $SUPPLEMENT = 1 ]; then - make install DESTDIR=$TMPDIR bindir=/dummy/bin datadir=/dummy/data \ - docdir=/dummy/doc includedir=/dummy/include mandir=/dummy/man - rm -f $TMPDIR$LIBDIR/*.la - rm -rf $TMPDIR/dummy + make install DESTDIR=$TMPDIR + rm -rf $TMPDIR$BINDIR + if [ "$DATAROOTDIR" != "$PREFIX" ]; then + rm -rf $TMPDIR$DATAROOTDIR + fi + if [ "$JAVADIR" != "" ]; then + rm -rf $TMPDIR$JAVADIR + fi + rm -rf $TMPDIR$DOCDIR + rm -rf $TMPDIR$INCLUDEDIR + rm -rf $TMPDIR$MANDIR else - make install DESTDIR=$TMPDIR docdir=/usr/share/doc/$DIRNAME-$VERSION \ - exampledir=/usr/share/doc/$DIRNAME-$VERSION - rm -f $TMPDIR$LIBDIR/*.la - if [ "$PREFIX" = "/opt/libjpeg-turbo" -a "$DOCDIR" = "/opt/libjpeg-turbo/doc" ]; then + make install DESTDIR=$TMPDIR + if [ "$PREFIX" = "@CMAKE_INSTALL_DEFAULT_PREFIX@" -a "$DOCDIR" = "@CMAKE_INSTALL_DEFAULT_PREFIX@/doc" ]; then + safedirmove $TMPDIR/$DOCDIR $TMPDIR/usr/share/doc/$PKGNAME-$VERSION $TMPDIR/__tmpdoc ln -fs /usr/share/doc/$DIRNAME-$VERSION $TMPDIR$DOCDIR fi fi SIZE=`du -s $TMPDIR | cut -f1` - (cat $SRCDIR/release/deb-control.tmpl | sed s/{__PKGNAME}/$PACKAGE_NAME/g \ - | sed s/{__VERSION}/$VERSION/g | sed s/{__BUILD}/$BUILD/g \ + (cat pkgscripts/deb-control | sed s/{__PKGNAME}/$PKGNAME/g \ | sed s/{__ARCH}/$DEBARCH/g | sed s/{__SIZE}/$SIZE/g \ > $TMPDIR/DEBIAN/control) - /sbin/ldconfig -n $TMPDIR$LIBDIR $SUDO chown -Rh root:root $TMPDIR/* - dpkg -b $TMPDIR $PACKAGE_NAME\_$VERSION\_$DEBARCH.deb + dpkg -b $TMPDIR $PKGNAME\_$VERSION\_$DEBARCH.deb } -PACKAGE_NAME=@PKGNAME@ +PKGNAME=@PKGNAME@ VERSION=@VERSION@ -BUILD=@BUILD@ DEBARCH=@DEBARCH@ -SRCDIR=@abs_top_srcdir@ -PREFIX=%{__prefix} -DOCDIR=%{__docdir} -LIBDIR=%{__libdir} +PREFIX=@CMAKE_INSTALL_PREFIX@ +BINDIR=@CMAKE_INSTALL_FULL_BINDIR@ +DATAROOTDIR=@CMAKE_INSTALL_FULL_DATAROOTDIR@ +DOCDIR=@CMAKE_INSTALL_FULL_DOCDIR@ +INCLUDEDIR=@CMAKE_INSTALL_FULL_INCLUDEDIR@ +JAVADIR=@CMAKE_INSTALL_FULL_JAVADIR@ +LIBDIR=@CMAKE_INSTALL_FULL_LIBDIR@ +MANDIR=@CMAKE_INSTALL_FULL_MANDIR@ if [ ! `uid` -eq 0 ]; then SUDO=sudo diff --git a/release/makemacpkg.in b/release/makemacpkg.in index 7163757..b0a2e23 100644 --- a/release/makemacpkg.in +++ b/release/makemacpkg.in @@ -15,76 +15,63 @@ onexit() fi } +safedirmove () +{ + if [ "$1" = "$2" ]; then + return 0 + fi + if [ "$1" = "" -o ! -d "$1" ]; then + echo safedirmove: source dir $1 is not valid + return 1 + fi + if [ "$2" = "" -o -e "$2" ]; then + echo safedirmove: dest dir $2 is not valid + return 1 + fi + if [ "$3" = "" -o -e "$3" ]; then + echo safedirmove: tmp dir $3 is not valid + return 1 + fi + mkdir -p $3 + mv $1/* $3/ + rmdir $1 + mkdir -p $2 + mv $3/* $2/ + rmdir $3 + return 0 +} + usage() { - echo "$0 [-build32 [32-bit build dir]] [-buildarmv6 [ARMv6 build dir]] [-buildarmv7 [ARMv7 build dir]] [-buildarmv7s [ARMv7s build dir] [-buildarmv8 [ARMv8 build dir]] [-lipo [path to lipo]]" + echo "$0 [universal] [-lipo [path to lipo]]" exit 1 } -PACKAGE_NAME=@PKGNAME@ +UNIVERSAL=0 + +PKGNAME=@PKGNAME@ VERSION=@VERSION@ BUILD=@BUILD@ -SRCDIR=@abs_top_srcdir@ -BUILDDIR32=@abs_top_srcdir@/osxx86 -BUILD32=0 -BUILDDIRARMV6=@abs_top_srcdir@/iosarmv6 -BUILDARMV6=0 -BUILDDIRARMV7=@abs_top_srcdir@/iosarmv7 -BUILDARMV7=0 -BUILDDIRARMV7S=@abs_top_srcdir@/iosarmv7s -BUILDARMV7S=0 -BUILDDIRARMV8=@abs_top_srcdir@/iosarmv8 -BUILDARMV8=0 +SRCDIR=@CMAKE_CURRENT_SOURCE_DIR@ +BUILDDIR32=@OSX_32BIT_BUILD@ +BUILDDIRARMV7=@IOS_ARMV7_BUILD@ +BUILDDIRARMV7S=@IOS_ARMV7S_BUILD@ +BUILDDIRARMV8=@IOS_ARMV8_BUILD@ WITH_JAVA=@WITH_JAVA@ LIPO=lipo -PREFIX=%{__prefix} -BINDIR=%{__bindir} -DOCDIR=%{__docdir} -LIBDIR=%{__libdir} +PREFIX=@CMAKE_INSTALL_PREFIX@ +BINDIR=@CMAKE_INSTALL_FULL_BINDIR@ +DOCDIR=@CMAKE_INSTALL_FULL_DOCDIR@ +LIBDIR=@CMAKE_INSTALL_FULL_LIBDIR@ + +LIBJPEG_DSO_NAME=libjpeg.@SO_MAJOR_VERSION@.@SO_AGE@.@SO_MINOR_VERSION@.dylib +TURBOJPEG_DSO_NAME=libturbojpeg.@TURBOJPEG_SO_VERSION@.dylib while [ $# -gt 0 ]; do case $1 in - -h*) usage 0 ;; - -build32) - BUILD32=1 - if [ $# -gt 1 ]; then - if [[ ! "$2" =~ -.* ]]; then - BUILDDIR32=$2; shift - fi - fi - ;; - -buildarmv6) - BUILDARMV6=1 - if [ $# -gt 1 ]; then - if [[ ! "$2" =~ -.* ]]; then - BUILDDIRARMV6=$2; shift - fi - fi - ;; - -buildarmv7) - BUILDARMV7=1 - if [ $# -gt 1 ]; then - if [[ ! "$2" =~ -.* ]]; then - BUILDDIRARMV7=$2; shift - fi - fi - ;; - -buildarmv7s) - BUILDARMV7S=1 - if [ $# -gt 1 ]; then - if [[ ! "$2" =~ -.* ]]; then - BUILDDIRARMV7S=$2; shift - fi - fi - ;; - -buildarmv8) - BUILDARMV8=1 - if [ $# -gt 1 ]; then - if [[ ! "$2" =~ -.* ]]; then - BUILDDIRARMV8=$2; shift - fi - fi + -h*) + usage 0 ;; -lipo) if [ $# -gt 1 ]; then @@ -93,27 +80,31 @@ while [ $# -gt 0 ]; do fi fi ;; + universal) + UNIVERSAL=1 + ;; esac shift done -if [ -f $PACKAGE_NAME-$VERSION.dmg ]; then - rm -f $PACKAGE_NAME-$VERSION.dmg +if [ -f $PKGNAME-$VERSION.dmg ]; then + rm -f $PKGNAME-$VERSION.dmg fi umask 022 -TMPDIR=`mktemp -d /tmp/$PACKAGE_NAME-build.XXXXXX` +TMPDIR=`mktemp -d /tmp/$PKGNAME-build.XXXXXX` PKGROOT=$TMPDIR/pkg/Package_Root mkdir -p $PKGROOT -make install DESTDIR=$PKGROOT docdir=/Library/Documentation/$PACKAGE_NAME \ - exampledir=/Library/Documentation/$PACKAGE_NAME -rm -f $PKGROOT$LIBDIR/*.la -if [ "$PREFIX" = "/opt/libjpeg-turbo" -a "$DOCDIR" = "/opt/libjpeg-turbo/doc" ]; then - ln -fs /Library/Documentation/$PACKAGE_NAME $PKGROOT$DOCDIR +make install DESTDIR=$PKGROOT + +if [ "$PREFIX" = "@CMAKE_INSTALL_DEFAULT_PREFIX@" -a "$DOCDIR" = "@CMAKE_INSTALL_DEFAULT_PREFIX@/doc" ]; then + mkdir -p $PKGROOT/Library/Documentation + safedirmove $PKGROOT$DOCDIR $PKGROOT/Library/Documentation/$PKGNAME $TMPDIR/__tmpdoc + ln -fs /Library/Documentation/$PKGNAME $PKGROOT$DOCDIR fi -if [ $BUILD32 = 1 ]; then +if [ $UNIVERSAL = 1 -a "$BUILDDIR32" != "" ]; then if [ ! -d $BUILDDIR32 ]; then echo ERROR: 32-bit build directory $BUILDDIR32 does not exist exit 1 @@ -126,27 +117,18 @@ if [ $BUILD32 = 1 ]; then pushd $BUILDDIR32 make install DESTDIR=$TMPDIR/dist.x86 popd - if [ ! -h $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib -a \ - ! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib ]; then - $LIPO -create \ - -arch i386 $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \ - -arch x86_64 $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \ - -output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib - elif [ ! -h $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib -a \ - ! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib ]; then - $LIPO -create \ - -arch i386 $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \ - -arch x86_64 $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \ - -output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib - fi + $LIPO -create \ + -arch i386 $TMPDIR/dist.x86/$LIBDIR/$LIBJPEG_DSO_NAME \ + -arch x86_64 $PKGROOT/$LIBDIR/$LIBJPEG_DSO_NAME \ + -output $PKGROOT/$LIBDIR/$LIBJPEG_DSO_NAME $LIPO -create \ -arch i386 $TMPDIR/dist.x86/$LIBDIR/libjpeg.a \ -arch x86_64 $PKGROOT/$LIBDIR/libjpeg.a \ -output $PKGROOT/$LIBDIR/libjpeg.a $LIPO -create \ - -arch i386 $TMPDIR/dist.x86/$LIBDIR/libturbojpeg.0.dylib \ - -arch x86_64 $PKGROOT/$LIBDIR/libturbojpeg.0.dylib \ - -output $PKGROOT/$LIBDIR/libturbojpeg.0.dylib + -arch i386 $TMPDIR/dist.x86/$LIBDIR/$TURBOJPEG_DSO_NAME \ + -arch x86_64 $PKGROOT/$LIBDIR/$TURBOJPEG_DSO_NAME \ + -output $PKGROOT/$LIBDIR/$TURBOJPEG_DSO_NAME $LIPO -create \ -arch i386 $TMPDIR/dist.x86/$LIBDIR/libturbojpeg.a \ -arch x86_64 $PKGROOT/$LIBDIR/libturbojpeg.a \ @@ -175,272 +157,88 @@ if [ $BUILD32 = 1 ]; then -arch i386 $TMPDIR/dist.x86/$BINDIR/wrjpgcom \ -arch x86_64 $PKGROOT/$BINDIR/wrjpgcom \ -output $PKGROOT/$BINDIR/wrjpgcom - fi -if [ $BUILDARMV6 = 1 ]; then - if [ ! -d $BUILDDIRARMV6 ]; then - echo ERROR: ARMv6 build directory $BUILDDIRARMV6 does not exist - exit 1 - fi - if [ ! -f $BUILDDIRARMV6/Makefile ]; then - echo ERROR: ARMv6 build directory $BUILDDIRARMV6 is not configured - exit 1 - fi - mkdir -p $TMPDIR/dist.armv6 - pushd $BUILDDIRARMV6 - make install DESTDIR=$TMPDIR/dist.armv6 - popd - if [ ! -h $TMPDIR/dist.armv6/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib -a \ - ! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib ]; then - $LIPO -create \ - $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \ - -arch arm $TMPDIR/dist.armv6/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \ - -output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib - elif [ ! -h $TMPDIR/dist.armv6/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib -a \ - ! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib ]; then - $LIPO -create \ - $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \ - -arch arm $TMPDIR/dist.armv6/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \ - -output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib - fi - $LIPO -create \ - $PKGROOT/$LIBDIR/libjpeg.a \ - -arch arm $TMPDIR/dist.armv6/$LIBDIR/libjpeg.a \ - -output $PKGROOT/$LIBDIR/libjpeg.a - $LIPO -create \ - $PKGROOT/$LIBDIR/libturbojpeg.0.dylib \ - -arch arm $TMPDIR/dist.armv6/$LIBDIR/libturbojpeg.0.dylib \ - -output $PKGROOT/$LIBDIR/libturbojpeg.0.dylib - $LIPO -create \ - $PKGROOT/$LIBDIR/libturbojpeg.a \ - -arch arm $TMPDIR/dist.armv6/$LIBDIR/libturbojpeg.a \ - -output $PKGROOT/$LIBDIR/libturbojpeg.a - $LIPO -create \ - $PKGROOT/$BINDIR/cjpeg \ - -arch arm $TMPDIR/dist.armv6/$BINDIR/cjpeg \ - -output $PKGROOT/$BINDIR/cjpeg - $LIPO -create \ - $PKGROOT/$BINDIR/djpeg \ - -arch arm $TMPDIR/dist.armv6/$BINDIR/djpeg \ - -output $PKGROOT/$BINDIR/djpeg - $LIPO -create \ - $PKGROOT/$BINDIR/jpegtran \ - -arch arm $TMPDIR/dist.armv6/$BINDIR/jpegtran \ - -output $PKGROOT/$BINDIR/jpegtran - $LIPO -create \ - $PKGROOT/$BINDIR/tjbench \ - -arch arm $TMPDIR/dist.armv6/$BINDIR/tjbench \ - -output $PKGROOT/$BINDIR/tjbench - $LIPO -create \ - $PKGROOT/$BINDIR/rdjpgcom \ - -arch arm $TMPDIR/dist.armv6/$BINDIR/rdjpgcom \ - -output $PKGROOT/$BINDIR/rdjpgcom - $LIPO -create \ - $PKGROOT/$BINDIR/wrjpgcom \ - -arch arm $TMPDIR/dist.armv6/$BINDIR/wrjpgcom \ - -output $PKGROOT/$BINDIR/wrjpgcom -fi +install_ios() +{ + BUILDDIR=$1 + ARCHNAME=$2 + DIRNAME=$3 + LIPOARCH=$4 -if [ $BUILDARMV7 = 1 ]; then - if [ ! -d $BUILDDIRARMV7 ]; then - echo ERROR: ARMv7 build directory $BUILDDIRARMV7 does not exist + if [ ! -d $BUILDDIR ]; then + echo ERROR: $ARCHNAME build directory $BUILDDIR does not exist exit 1 fi - if [ ! -f $BUILDDIRARMV7/Makefile ]; then - echo ERROR: ARMv7 build directory $BUILDDIRARMV7 is not configured + if [ ! -f $BUILDDIR/Makefile ]; then + echo ERROR: $ARCHNAME build directory $BUILDDIR is not configured exit 1 fi - mkdir -p $TMPDIR/dist.armv7 - pushd $BUILDDIRARMV7 - make install DESTDIR=$TMPDIR/dist.armv7 + mkdir -p $TMPDIR/dist.$DIRNAME + pushd $BUILDDIR + make install DESTDIR=$TMPDIR/dist.$DIRNAME popd - if [ ! -h $TMPDIR/dist.armv7/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib -a \ - ! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib ]; then - $LIPO -create \ - $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \ - -arch arm $TMPDIR/dist.armv7/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \ - -output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib - elif [ ! -h $TMPDIR/dist.armv7/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib -a \ - ! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib ]; then - $LIPO -create \ - $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \ - -arch arm $TMPDIR/dist.armv7/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \ - -output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib - fi + $LIPO -create \ + $PKGROOT/$LIBDIR/$LIBJPEG_DSO_NAME \ + -arch $LIPOARCH $TMPDIR/dist.$DIRNAME/$LIBDIR/$LIBJPEG_DSO_NAME \ + -output $PKGROOT/$LIBDIR/$LIBJPEG_DSO_NAME $LIPO -create \ $PKGROOT/$LIBDIR/libjpeg.a \ - -arch arm $TMPDIR/dist.armv7/$LIBDIR/libjpeg.a \ + -arch $LIPOARCH $TMPDIR/dist.$DIRNAME/$LIBDIR/libjpeg.a \ -output $PKGROOT/$LIBDIR/libjpeg.a $LIPO -create \ - $PKGROOT/$LIBDIR/libturbojpeg.0.dylib \ - -arch arm $TMPDIR/dist.armv7/$LIBDIR/libturbojpeg.0.dylib \ - -output $PKGROOT/$LIBDIR/libturbojpeg.0.dylib + $PKGROOT/$LIBDIR/$TURBOJPEG_DSO_NAME \ + -arch $LIPOARCH $TMPDIR/dist.$DIRNAME/$LIBDIR/$TURBOJPEG_DSO_NAME \ + -output $PKGROOT/$LIBDIR/$TURBOJPEG_DSO_NAME $LIPO -create \ $PKGROOT/$LIBDIR/libturbojpeg.a \ - -arch arm $TMPDIR/dist.armv7/$LIBDIR/libturbojpeg.a \ + -arch $LIPOARCH $TMPDIR/dist.$DIRNAME/$LIBDIR/libturbojpeg.a \ -output $PKGROOT/$LIBDIR/libturbojpeg.a $LIPO -create \ $PKGROOT/$BINDIR/cjpeg \ - -arch arm $TMPDIR/dist.armv7/$BINDIR/cjpeg \ + -arch $LIPOARCH $TMPDIR/dist.$DIRNAME/$BINDIR/cjpeg \ -output $PKGROOT/$BINDIR/cjpeg $LIPO -create \ $PKGROOT/$BINDIR/djpeg \ - -arch arm $TMPDIR/dist.armv7/$BINDIR/djpeg \ + -arch $LIPOARCH $TMPDIR/dist.$DIRNAME/$BINDIR/djpeg \ -output $PKGROOT/$BINDIR/djpeg $LIPO -create \ $PKGROOT/$BINDIR/jpegtran \ - -arch arm $TMPDIR/dist.armv7/$BINDIR/jpegtran \ + -arch $LIPOARCH $TMPDIR/dist.$DIRNAME/$BINDIR/jpegtran \ -output $PKGROOT/$BINDIR/jpegtran $LIPO -create \ $PKGROOT/$BINDIR/tjbench \ - -arch arm $TMPDIR/dist.armv7/$BINDIR/tjbench \ + -arch $LIPOARCH $TMPDIR/dist.$DIRNAME/$BINDIR/tjbench \ -output $PKGROOT/$BINDIR/tjbench $LIPO -create \ $PKGROOT/$BINDIR/rdjpgcom \ - -arch arm $TMPDIR/dist.armv7/$BINDIR/rdjpgcom \ + -arch $LIPOARCH $TMPDIR/dist.$DIRNAME/$BINDIR/rdjpgcom \ -output $PKGROOT/$BINDIR/rdjpgcom $LIPO -create \ $PKGROOT/$BINDIR/wrjpgcom \ - -arch arm $TMPDIR/dist.armv7/$BINDIR/wrjpgcom \ + -arch $LIPOARCH $TMPDIR/dist.$DIRNAME/$BINDIR/wrjpgcom \ -output $PKGROOT/$BINDIR/wrjpgcom +} + +if [ $UNIVERSAL = 1 -a "$BUILDDIRARMV7" != "" ]; then + install_ios $BUILDDIRARMV7 ARMv7 armv7 arm fi -if [ $BUILDARMV7S = 1 ]; then - if [ ! -d $BUILDDIRARMV7S ]; then - echo ERROR: ARMv7s build directory $BUILDDIRARMV7S does not exist - exit 1 - fi - if [ ! -f $BUILDDIRARMV7S/Makefile ]; then - echo ERROR: ARMv7s build directory $BUILDDIRARMV7S is not configured - exit 1 - fi - mkdir -p $TMPDIR/dist.armv7s - pushd $BUILDDIRARMV7S - make install DESTDIR=$TMPDIR/dist.armv7s - popd - if [ ! -h $TMPDIR/dist.armv7s/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib -a \ - ! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib ]; then - $LIPO -create \ - $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \ - -arch arm $TMPDIR/dist.armv7s/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \ - -output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib - elif [ ! -h $TMPDIR/dist.armv7s/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib -a \ - ! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib ]; then - $LIPO -create \ - $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \ - -arch arm $TMPDIR/dist.armv7s/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \ - -output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib - fi - $LIPO -create \ - $PKGROOT/$LIBDIR/libjpeg.a \ - -arch arm $TMPDIR/dist.armv7s/$LIBDIR/libjpeg.a \ - -output $PKGROOT/$LIBDIR/libjpeg.a - $LIPO -create \ - $PKGROOT/$LIBDIR/libturbojpeg.0.dylib \ - -arch arm $TMPDIR/dist.armv7s/$LIBDIR/libturbojpeg.0.dylib \ - -output $PKGROOT/$LIBDIR/libturbojpeg.0.dylib - $LIPO -create \ - $PKGROOT/$LIBDIR/libturbojpeg.a \ - -arch arm $TMPDIR/dist.armv7s/$LIBDIR/libturbojpeg.a \ - -output $PKGROOT/$LIBDIR/libturbojpeg.a - $LIPO -create \ - $PKGROOT/$BINDIR/cjpeg \ - -arch arm $TMPDIR/dist.armv7s/$BINDIR/cjpeg \ - -output $PKGROOT/$BINDIR/cjpeg - $LIPO -create \ - $PKGROOT/$BINDIR/djpeg \ - -arch arm $TMPDIR/dist.armv7s/$BINDIR/djpeg \ - -output $PKGROOT/$BINDIR/djpeg - $LIPO -create \ - $PKGROOT/$BINDIR/jpegtran \ - -arch arm $TMPDIR/dist.armv7s/$BINDIR/jpegtran \ - -output $PKGROOT/$BINDIR/jpegtran - $LIPO -create \ - $PKGROOT/$BINDIR/tjbench \ - -arch arm $TMPDIR/dist.armv7s/$BINDIR/tjbench \ - -output $PKGROOT/$BINDIR/tjbench - $LIPO -create \ - $PKGROOT/$BINDIR/rdjpgcom \ - -arch arm $TMPDIR/dist.armv7s/$BINDIR/rdjpgcom \ - -output $PKGROOT/$BINDIR/rdjpgcom - $LIPO -create \ - $PKGROOT/$BINDIR/wrjpgcom \ - -arch arm $TMPDIR/dist.armv7s/$BINDIR/wrjpgcom \ - -output $PKGROOT/$BINDIR/wrjpgcom +if [ $UNIVERSAL = 1 -a "$BUILDDIRARMV7S" != "" ]; then + install_ios $BUILDDIRARMV7S ARMv7s armv7s arm fi -if [ $BUILDARMV8 = 1 ]; then - if [ ! -d $BUILDDIRARMV8 ]; then - echo ERROR: ARMv8 build directory $BUILDDIRARMV8 does not exist - exit 1 - fi - if [ ! -f $BUILDDIRARMV8/Makefile ]; then - echo ERROR: ARMv8 build directory $BUILDDIRARMV8 is not configured - exit 1 - fi - mkdir -p $TMPDIR/dist.armv8 - pushd $BUILDDIRARMV8 - make install DESTDIR=$TMPDIR/dist.armv8 - popd - if [ ! -h $TMPDIR/dist.armv8/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib -a \ - ! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib ]; then - $LIPO -create \ - $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \ - -arch arm64 $TMPDIR/dist.armv8/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \ - -output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib - elif [ ! -h $TMPDIR/dist.armv8/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib -a \ - ! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib ]; then - $LIPO -create \ - $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \ - -arch arm64 $TMPDIR/dist.armv8/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \ - -output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib - fi - $LIPO -create \ - $PKGROOT/$LIBDIR/libjpeg.a \ - -arch arm64 $TMPDIR/dist.armv8/$LIBDIR/libjpeg.a \ - -output $PKGROOT/$LIBDIR/libjpeg.a - $LIPO -create \ - $PKGROOT/$LIBDIR/libturbojpeg.0.dylib \ - -arch arm64 $TMPDIR/dist.armv8/$LIBDIR/libturbojpeg.0.dylib \ - -output $PKGROOT/$LIBDIR/libturbojpeg.0.dylib - $LIPO -create \ - $PKGROOT/$LIBDIR/libturbojpeg.a \ - -arch arm64 $TMPDIR/dist.armv8/$LIBDIR/libturbojpeg.a \ - -output $PKGROOT/$LIBDIR/libturbojpeg.a - $LIPO -create \ - $PKGROOT/$BINDIR/cjpeg \ - -arch arm64 $TMPDIR/dist.armv8/$BINDIR/cjpeg \ - -output $PKGROOT/$BINDIR/cjpeg - $LIPO -create \ - $PKGROOT/$BINDIR/djpeg \ - -arch arm64 $TMPDIR/dist.armv8/$BINDIR/djpeg \ - -output $PKGROOT/$BINDIR/djpeg - $LIPO -create \ - $PKGROOT/$BINDIR/jpegtran \ - -arch arm64 $TMPDIR/dist.armv8/$BINDIR/jpegtran \ - -output $PKGROOT/$BINDIR/jpegtran - $LIPO -create \ - $PKGROOT/$BINDIR/tjbench \ - -arch arm64 $TMPDIR/dist.armv8/$BINDIR/tjbench \ - -output $PKGROOT/$BINDIR/tjbench - $LIPO -create \ - $PKGROOT/$BINDIR/rdjpgcom \ - -arch arm64 $TMPDIR/dist.armv8/$BINDIR/rdjpgcom \ - -output $PKGROOT/$BINDIR/rdjpgcom - $LIPO -create \ - $PKGROOT/$BINDIR/wrjpgcom \ - -arch arm64 $TMPDIR/dist.armv8/$BINDIR/wrjpgcom \ - -output $PKGROOT/$BINDIR/wrjpgcom +if [ $UNIVERSAL = 1 -a "BUILDDIRARMV8" != "" ]; then + install_ios $BUILDDIRARMV8 ARMv8 armv8 arm64 fi -install_name_tool -id $LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib -install_name_tool -id $LIBDIR/libturbojpeg.0.dylib $PKGROOT/$LIBDIR/libturbojpeg.0.dylib +install_name_tool -id $LIBDIR/$LIBJPEG_DSO_NAME $PKGROOT/$LIBDIR/$LIBJPEG_DSO_NAME +install_name_tool -id $LIBDIR/$TURBOJPEG_DSO_NAME $PKGROOT/$LIBDIR/$TURBOJPEG_DSO_NAME if [ $WITH_JAVA = 1 ]; then - ln -fs libturbojpeg.0.dylib $PKGROOT/$LIBDIR/libturbojpeg.jnilib + ln -fs $TURBOJPEG_DSO_NAME $PKGROOT/$LIBDIR/libturbojpeg.jnilib fi -if [ "$PREFIX" = "/opt/libjpeg-turbo" -a "$LIBDIR" = "/opt/libjpeg-turbo/lib" ]; then +if [ "$PREFIX" = "@CMAKE_INSTALL_DEFAULT_PREFIX@" -a "$LIBDIR" = "@CMAKE_INSTALL_DEFAULT_PREFIX@/lib" ]; then if [ ! -h $PKGROOT/$PREFIX/lib32 ]; then ln -fs lib $PKGROOT/$PREFIX/lib32 fi @@ -458,13 +256,13 @@ find $PKGROOT -type f | while read file; do xattr -c $file; done cp $SRCDIR/release/License.rtf $SRCDIR/release/Welcome.rtf $SRCDIR/release/ReadMe.txt $TMPDIR/pkg/ mkdir $TMPDIR/dmg -pkgbuild --root $PKGROOT --version $VERSION.$BUILD \ - --identifier com.libjpeg-turbo.libjpeg-turbo $TMPDIR/pkg/$PACKAGE_NAME.pkg -productbuild --distribution $SRCDIR/release/Distribution.xml \ +pkgbuild --root $PKGROOT --version $VERSION.$BUILD --identifier @PKGID@ \ + $TMPDIR/pkg/$PKGNAME.pkg +productbuild --distribution pkgscripts/Distribution.xml \ --package-path $TMPDIR/pkg/ --resources $TMPDIR/pkg/ \ - $TMPDIR/dmg/$PACKAGE_NAME.pkg -hdiutil create -fs HFS+ -volname $PACKAGE_NAME-$VERSION \ - -srcfolder "$TMPDIR/dmg" $TMPDIR/$PACKAGE_NAME-$VERSION.dmg -cp $TMPDIR/$PACKAGE_NAME-$VERSION.dmg . + $TMPDIR/dmg/$PKGNAME.pkg +hdiutil create -fs HFS+ -volname $PKGNAME-$VERSION \ + -srcfolder "$TMPDIR/dmg" $TMPDIR/$PKGNAME-$VERSION.dmg +cp $TMPDIR/$PKGNAME-$VERSION.dmg . exit diff --git a/release/makerpm.in b/release/makerpm.in new file mode 100644 index 0000000..fc3b1d4 --- /dev/null +++ b/release/makerpm.in @@ -0,0 +1,30 @@ +#!/bin/sh + +set -u +set -e +trap onexit INT +trap onexit TERM +trap onexit EXIT + +TMPDIR= + +onexit() +{ + if [ ! "$TMPDIR" = "" ]; then + rm -rf $TMPDIR + fi +} + +if [ -f @PKGNAME@-@VERSION@.@RPMARCH@.rpm ]; then + rm -f @PKGNAME@-@VERSION@.@RPMARCH@.rpm +fi + +umask 022 +TMPDIR=`mktemp -d /tmp/@CMAKE_PROJECT_NAME@-build.XXXXXX` + +mkdir -p $TMPDIR/RPMS +ln -fs `pwd` $TMPDIR/BUILD +rpmbuild -bb --define "_blddir $TMPDIR/buildroot" --define "_topdir $TMPDIR" \ + --target @RPMARCH@ pkgscripts/rpm.spec; \ +cp $TMPDIR/RPMS/@RPMARCH@/@PKGNAME@-@VERSION@-@BUILD@.@RPMARCH@.rpm \ + @PKGNAME@-@VERSION@.@RPMARCH@.rpm diff --git a/release/makesrpm.in b/release/makesrpm.in new file mode 100644 index 0000000..84c39d4 --- /dev/null +++ b/release/makesrpm.in @@ -0,0 +1,48 @@ +#!/bin/sh + +set -u +set -e +trap onexit INT +trap onexit TERM +trap onexit EXIT + +TMPDIR= + +onexit() +{ + if [ ! "$TMPDIR" = "" ]; then + rm -rf $TMPDIR + fi +} + +PKGNAME=@PKGNAME@ +PROJECT=@CMAKE_PROJECT_NAME@ +VERSION=@VERSION@ +BUILD=@BUILD@ + +if [ -f $PKGNAME-$VERSION.src.rpm ]; then + rm -f $PKGNAME-$VERSION.src.rpm +fi + +umask 022 +TMPDIR=`mktemp -d /tmp/$PKGNAME-build.XXXXXX` + +mkdir -p $TMPDIR/RPMS +mkdir -p $TMPDIR/SRPMS +mkdir -p $TMPDIR/BUILD +mkdir -p $TMPDIR/SOURCES +mkdir -p $TMPDIR/SPECS + +if [ ! -f $PROJECT-$VERSION.tar.gz ]; then + echo "ERROR: $PROJECT-$VERSION.tar.gz does not exist." +fi + +cp $PROJECT-$VERSION.tar.gz $TMPDIR/SOURCES/$PROJECT-$VERSION.tar.gz + +cat pkgscripts/rpm.spec | sed s/%{_blddir}/%{_tmppath}/g \ + | sed s/#--\>//g > $TMPDIR/SPECS/$PKGNAME.spec + +rpmbuild -bs --define "_topdir $TMPDIR" $TMPDIR/SPECS/$PKGNAME.spec +mv $TMPDIR/SRPMS/$PKGNAME-$VERSION-$BUILD.src.rpm $PKGNAME-$VERSION.src.rpm + +exit diff --git a/release/maketarball.in b/release/maketarball.in new file mode 100644 index 0000000..00a9c7e --- /dev/null +++ b/release/maketarball.in @@ -0,0 +1,51 @@ +#!/bin/sh + +set -u +set -e +trap onexit INT +trap onexit TERM +trap onexit EXIT + +TMPDIR= +SUDO= + +onexit() +{ + if [ ! "$TMPDIR" = "" ]; then + rm -rf $TMPDIR + fi +} + +uid() +{ + id | cut -f2 -d = | cut -f1 -d \(; +} + +PKGNAME=@PKGNAME@ +VERSION=@VERSION@ +ARCH=@CPU_TYPE@ +OS=@CMAKE_SYSTEM_NAME@ +PREFIX=@CMAKE_INSTALL_PREFIX@ + +umask 022 +rm -f $PKGNAME-$VERSION-$OS-$ARCH.tar.bz2 +TMPDIR=`mktemp -d /tmp/$PKGNAME-build.XXXXXX` +mkdir -p $TMPDIR/install + +make install DESTDIR=$TMPDIR/install +echo tartest >$TMPDIR/tartest +GNUTAR=0 +BSDTAR=0 +tar cf $TMPDIR/tartest.tar --owner=root --group=root -C $TMPDIR tartest >/dev/null 2>&1 && GNUTAR=1 +if [ "$GNUTAR" = "1" ]; then + tar cf - --owner=root --group=root -C $TMPDIR/install .$PREFIX | bzip2 -c >$PKGNAME-$VERSION-$OS-$ARCH.tar.bz2 +else + tar cf $TMPDIR/tartest.tar --uid 0 --gid 0 -C $TMPDIR tartest >/dev/null 2>&1 && BSDTAR=1 + if [ "$BSDTAR" = "1" ]; then + tar cf - --uid=0 --gid=0 -C $TMPDIR/install .$PREFIX | bzip2 -c >$PKGNAME-$VERSION-$OS-$ARCH.tar.bz2 + else + tar cf - -C $TMPDIR/install .$PREFIX | bzip2 -c >$PKGNAME-$VERSION-$OS-$ARCH.tar.bz2 + fi +fi + +exit diff --git a/release/rpm.spec.in b/release/rpm.spec.in new file mode 100644 index 0000000..a22c5e1 --- /dev/null +++ b/release/rpm.spec.in @@ -0,0 +1,221 @@ +%define _prefix @CMAKE_INSTALL_PREFIX@ +%define _bindir @CMAKE_INSTALL_FULL_BINDIR@ +%define _datarootdir @CMAKE_INSTALL_FULL_DATAROOTDIR@ +%define _docdir %{_defaultdocdir}/%{name}-%{version} +%define _includedir @CMAKE_INSTALL_FULL_INCLUDEDIR@ +%define _javadir @CMAKE_INSTALL_FULL_JAVADIR@ +%define _mandir @CMAKE_INSTALL_FULL_MANDIR@ +%define _enable_static @ENABLE_STATIC@ +%define _enable_shared @ENABLE_SHARED@ +%define _with_turbojpeg @WITH_TURBOJPEG@ +%define _with_java @WITH_JAVA@ + +%if "%{?__isa_bits:1}" == "1" +%define _bits %{__isa_bits} +%else +# RPM < 4.6 +%if "%{_lib}" == "lib64" +%define _bits 64 +%else +%define _bits 32 +%endif +%endif + +#-->%if 1 +%if "%{_bits}" == "64" +%define _libdir %{_exec_prefix}/lib64 +%else +%if "%{_prefix}" == "/opt/libjpeg-turbo" +%define _libdir %{_exec_prefix}/lib32 +%endif +%endif +#-->%else +%define _libdir @CMAKE_INSTALL_FULL_LIBDIR@ +#-->%endif + +Summary: A SIMD-accelerated JPEG codec that provides both the libjpeg and TurboJPEG APIs +Name: @PKGNAME@ +Version: @VERSION@ +Vendor: @PKGVENDOR@ +URL: @PKGURL@ +Group: System Environment/Libraries +#-->Source0: http://prdownloads.sourceforge.net/@CMAKE_PROJECT_NAME@/@CMAKE_PROJECT_NAME@-%{version}.tar.gz +Release: @BUILD@ +License: BSD-style +BuildRoot: %{_blddir}/%{name}-buildroot-%{version}-%{release} +Prereq: /sbin/ldconfig +%if "%{_bits}" == "64" +Provides: %{name} = %{version}-%{release}, @CMAKE_PROJECT_NAME@ = %{version}-%{release}, libturbojpeg.so()(64bit) +%else +Provides: %{name} = %{version}-%{release}, @CMAKE_PROJECT_NAME@ = %{version}-%{release}, libturbojpeg.so +%endif + +%description +libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2, +AVX2, NEON, AltiVec) to accelerate baseline JPEG compression and decompression +on x86, x86-64, ARM, and PowerPC systems, as well as progressive JPEG +compression on x86 and x86-64 systems. On such systems, libjpeg-turbo is +generally 2-6x as fast as libjpeg, all else being equal. On other types of +systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by +virtue of its highly-optimized Huffman coding routines. In many cases, the +performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs. + +libjpeg-turbo implements both the traditional libjpeg API as well as the less +powerful but more straightforward TurboJPEG API. libjpeg-turbo also features +colorspace extensions that allow it to compress from/decompress to 32-bit and +big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java +interface. + +libjpeg-turbo was originally based on libjpeg/SIMD, an MMX-accelerated +derivative of libjpeg v6b developed by Miyasaka Masaru. The TigerVNC and +VirtualGL projects made numerous enhancements to the codec in 2009, and in +early 2010, libjpeg-turbo spun off into an independent project, with the goal +of making high-speed JPEG compression/decompression technology available to a +broader range of users and developers. + +#-->%prep +#-->%setup -q -n @CMAKE_PROJECT_NAME@-%{version} + +#-->%build +#-->cmake -G"Unix Makefiles" -DCMAKE_BUILD_TYPE=@CMAKE_BUILD_TYPE@ \ +#--> -DBUILD=%{release} \ +#--> -DCMAKE_INSTALL_BINDIR=%{_bindir} \ +#--> -DCMAKE_INSTALL_DATAROOTDIR=%{_datarootdir} \ +#--> -DCMAKE_INSTALL_DOCDIR=%{_docdir} \ +#--> -DCMAKE_INSTALL_INCLUDEDIR=%{_includedir} \ +#--> -DCMAKE_INSTALL_JAVADIR=%{_javadir} \ +#--> -DCMAKE_INSTALL_LIBDIR=%{_libdir} \ +#--> -DCMAKE_INSTALL_MANDIR=%{_mandir} \ +#--> -DCMAKE_INSTALL_PREFIX=%{_prefix} \ +#--> -DCMAKE_POSITION_INDEPENDENT_CODE=@CMAKE_POSITION_INDEPENDENT_CODE@ \ +#--> -DENABLE_SHARED=@ENABLE_SHARED@ -DENABLE_STATIC=@ENABLE_STATIC@ \ +#--> -DSO_MAJOR_VERSION=@SO_MAJOR_VERSION@ \ +#--> -DSO_MINOR_VERSION=@SO_MINOR_VERSION@ \ +#--> -DJPEG_LIB_VERSION=@JPEG_LIB_VERSION@ \ +#--> -DREQUIRE_SIMD=@REQUIRE_SIMD@ \ +#--> -DWITH_12BIT=@WITH_12BIT@ -DWITH_ARITH_DEC=@WITH_ARITH_DEC@ \ +#--> -DWITH_ARITH_ENC=@WITH_ARITH_ENC@ -DWITH_JAVA=@WITH_JAVA@ \ +#--> -DWITH_JPEG7=@WITH_JPEG7@ -DWITH_JPEG8=@WITH_JPEG8@ \ +#--> -DWITH_MEM_SRCDST=@WITH_MEM_SRCDST@ -DWITH_SIMD=@WITH_SIMD@ \ +#--> -DWITH_TURBOJPEG=@WITH_TURBOJPEG@ . +#-->make DESTDIR=$RPM_BUILD_ROOT + +%install + +rm -rf $RPM_BUILD_ROOT +make install DESTDIR=$RPM_BUILD_ROOT +/sbin/ldconfig -n $RPM_BUILD_ROOT%{_libdir} + +#-->%if 0 + +# This is only needed to support in-tree RPM generation via 'make rpm'. When +# building from a SRPM, we control where things are installed via CMake +# variables. + +safedirmove () +{ + if [ "$1" = "$2" ]; then + return 0 + fi + if [ "$1" = "" -o ! -d "$1" ]; then + echo safedirmove: source dir $1 is not valid + return 1 + fi + if [ "$2" = "" -o -e "$2" ]; then + echo safedirmove: dest dir $2 is not valid + return 1 + fi + if [ "$3" = "" -o -e "$3" ]; then + echo safedirmove: tmp dir $3 is not valid + return 1 + fi + mkdir -p $3 + mv $1/* $3/ + rmdir $1 + mkdir -p $2 + mv $3/* $2/ + rmdir $3 + return 0 +} + +LJT_DOCDIR=@CMAKE_INSTALL_FULL_DOCDIR@ +if [ ! "$LJT_DOCDIR" = "%{_docdir}" ]; then + safedirmove $RPM_BUILD_ROOT/$LJT_DOCDIR $RPM_BUILD_ROOT/%{_docdir} $RPM_BUILD_ROOT/__tmpdoc +fi + +#-->%endif + +LJT_DOCDIR=@CMAKE_INSTALL_FULL_DOCDIR@ +if [ "%{_prefix}" = "@CMAKE_INSTALL_DEFAULT_PREFIX@" -a "$LJT_DOCDIR" = "@CMAKE_INSTALL_DEFAULT_PREFIX@/doc" ]; then + ln -fs %{_docdir} $RPM_BUILD_ROOT/$LJT_DOCDIR +fi + +%post -p /sbin/ldconfig + +%postun -p /sbin/ldconfig + +%clean +rm -rf $RPM_BUILD_ROOT + +%files +%defattr(-,root,root) +%dir %{_docdir} +%doc %{_docdir}/* +%dir %{_prefix} +%if "%{_prefix}" == "@CMAKE_INSTALL_DEFAULT_PREFIX@" && "%{_docdir}" != "%{_prefix}/doc" + %{_prefix}/doc +%endif +%dir %{_bindir} +%{_bindir}/cjpeg +%{_bindir}/djpeg +%{_bindir}/jpegtran +%if "%{_with_turbojpeg}" == "1" + %{_bindir}/tjbench +%endif +%{_bindir}/rdjpgcom +%{_bindir}/wrjpgcom +%dir %{_libdir} +%if "%{_enable_shared}" == "1" + %{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@.@SO_AGE@.@SO_MINOR_VERSION@ + %{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@ + %{_libdir}/libjpeg.so +%endif +%if "%{_enable_static}" == "1" + %{_libdir}/libjpeg.a +%endif +%{_libdir}/pkgconfig +%{_libdir}/pkgconfig/libjpeg.pc +%if "%{_with_turbojpeg}" == "1" + %if "%{_enable_shared}" == "1" || "%{_with_java}" == "1" + %{_libdir}/libturbojpeg.so.@TURBOJPEG_SO_VERSION@ + %{_libdir}/libturbojpeg.so.@TURBOJPEG_SO_MAJOR_VERSION@ + %{_libdir}/libturbojpeg.so + %endif + %if "%{_enable_static}" == "1" + %{_libdir}/libturbojpeg.a + %endif + %{_libdir}/pkgconfig/libturbojpeg.pc +%endif +%dir %{_includedir} +%{_includedir}/jconfig.h +%{_includedir}/jerror.h +%{_includedir}/jmorecfg.h +%{_includedir}/jpeglib.h +%if "%{_with_turbojpeg}" == "1" + %{_includedir}/turbojpeg.h +%endif +%dir %{_mandir} +%dir %{_mandir}/man1 +%{_mandir}/man1/cjpeg.1* +%{_mandir}/man1/djpeg.1* +%{_mandir}/man1/jpegtran.1* +%{_mandir}/man1/rdjpgcom.1* +%{_mandir}/man1/wrjpgcom.1* +%if "%{_prefix}" != "%{_datarootdir}" + %dir %{_datarootdir} +%endif +%if "%{_with_java}" == "1" + %dir %{_javadir} + %{_javadir}/turbojpeg.jar +%endif +%changelog diff --git a/release/uninstall.in b/release/uninstall.in index 6cd1f86..cf1ba77 100644 --- a/release/uninstall.in +++ b/release/uninstall.in @@ -31,15 +31,15 @@ if [ ! "`id -u`" = "0" ]; then exit -1 fi -PACKAGE=@PKGNAME@ -MACPACKAGE=com.$PACKAGE.$PACKAGE -RECEIPT=/Library/Receipts/$PACKAGE.pkg +PKGNAME=@PKGNAME@ +PKGID=@PKGID@ +RECEIPT=/Library/Receipts/$PKGNAME.pkg LSBOM= if [ -d $RECEIPT ]; then LSBOM='lsbom -s -f -l '$RECEIPT'/Contents/Archive.bom' else - LSBOM='pkgutil --files '$MACPACKAGE + LSBOM='pkgutil --files '$PKGID fi mylsbom() @@ -56,12 +56,13 @@ done popd echo Removing package directories ... -PREFIX=%{__prefix} -BINDIR=%{__bindir} -DATADIR=%{__datadir} -INCLUDEDIR=%{__includedir} -LIBDIR=%{__libdir} -MANDIR=%{__mandir} +PREFIX=@CMAKE_INSTALL_PREFIX@ +BINDIR=@CMAKE_INSTALL_FULL_BINDIR@ +DATAROOTDIR=@CMAKE_INSTALL_FULL_DATAROOTDIR@ +INCLUDEDIR=@CMAKE_INSTALL_FULL_INCLUDEDIR@ +JAVADIR=@CMAKE_INSTALL_FULL_JAVADIR@ +LIBDIR=@CMAKE_INSTALL_FULL_LIBDIR@ +MANDIR=@CMAKE_INSTALL_FULL_MANDIR@ if [ -d $BINDIR ]; then rmdir $BINDIR 2>&1 || EXITSTATUS=-1 @@ -75,7 +76,7 @@ fi if [ -d $INCLUDEDIR ]; then rmdir $INCLUDEDIR 2>&1 || EXITSTATUS=-1 fi -if [ "$PREFIX" = "/opt/libjpeg-turbo" -a "$LIBDIR" = "/opt/libjpeg-turbo/lib" ]; then +if [ "$PREFIX" = "@CMAKE_INSTALL_DEFAULT_PREFIX@" -a "$LIBDIR" = "@CMAKE_INSTALL_DEFAULT_PREFIX@/lib" ]; then if [ -h $LIBDIR\32 ]; then rm $LIBDIR\32 2>&1 || EXITSTATUS=-1 fi @@ -89,24 +90,24 @@ fi if [ -d $MANDIR ]; then rmdir $MANDIR 2>&1 || EXITSTATUS=-1 fi -if [ -d $DATADIR/classes ]; then - rmdir $DATADIR/classes 2>&1 || EXITSTATUS=-1 +if [ -d $JAVADIR ]; then + rmdir $JAVADIR 2>&1 || EXITSTATUS=-1 fi -if [ -d $DATADIR -a "$DATADIR" != "$PREFIX" ]; then - rmdir $DATADIR 2>&1 || EXITSTATUS=-1 +if [ -d $DATAROOTDIR -a "$DATAROOTDIR" != "$PREFIX" ]; then + rmdir $DATAROOTDIR 2>&1 || EXITSTATUS=-1 fi -if [ "$PREFIX" = "/opt/libjpeg-turbo" -a -h "$PREFIX/doc" ]; then +if [ "$PREFIX" = "@CMAKE_INSTALL_DEFAULT_PREFIX@" -a -h "$PREFIX/doc" ]; then rm $PREFIX/doc 2>&1 || EXITSTATUS=-1 fi rmdir $PREFIX 2>&1 || EXITSTATUS=-1 -rmdir /Library/Documentation/$PACKAGE 2>&1 || EXITSTATUS=-1 +rmdir /Library/Documentation/$PKGNAME 2>&1 || EXITSTATUS=-1 if [ -d $RECEIPT ]; then echo Removing package receipt ... rm -r $RECEIPT 2>&1 || EXITSTATUS=-1 else - echo Forgetting package $MACPACKAGE ... - pkgutil --forget $MACPACKAGE + echo Forgetting package $PKGID ... + pkgutil --forget $PKGID fi exit $EXITSTATUS diff --git a/sharedlib/CMakeLists.txt b/sharedlib/CMakeLists.txt index d423cce..2bab832 100755 --- a/sharedlib/CMakeLists.txt +++ b/sharedlib/CMakeLists.txt @@ -5,6 +5,10 @@ # better yet, provide a friendly way of configuring a Windows target to use the # static C library. +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/..) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/..) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/..) + if(MSVC) # Build all configurations against shared C library foreach(var CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE @@ -16,38 +20,54 @@ if(MSVC) endif() foreach(src ${JPEG_SOURCES}) - set(JPEG_SRCS ${JPEG_SRCS} ${CMAKE_SOURCE_DIR}/${src}) + set(JPEG_SRCS ${JPEG_SRCS} ../${src}) endforeach() -if(WITH_SIMD) +if(WITH_SIMD AND MSVC_IDE) # This tells CMake that the "source" files haven't been generated yet set_source_files_properties(${SIMD_OBJS} PROPERTIES GENERATED 1) endif() -if(WITH_MEM_SRCDST AND NOT WITH_JPEG8) - add_library(jpeg SHARED ${JPEG_SRCS} ${SIMD_OBJS} - ${CMAKE_SOURCE_DIR}/win/jpeg${DLL_VERSION}-memsrcdst.def) -else() - add_library(jpeg SHARED ${JPEG_SRCS} ${SIMD_OBJS} - ${CMAKE_SOURCE_DIR}/win/jpeg${DLL_VERSION}.def) +if(WIN32) + if(WITH_MEM_SRCDST) + set(DEFFILE ../win/jpeg${SO_MAJOR_VERSION}-memsrcdst.def) + else() + set(DEFFILE ../win/jpeg${SO_MAJOR_VERSION}.def) + endif() +endif() +add_library(jpeg SHARED ${JPEG_SRCS} ${DEFFILE} $ + ${SIMD_OBJS}) + +set_target_properties(jpeg PROPERTIES SOVERSION ${SO_MAJOR_VERSION} + VERSION ${SO_MAJOR_VERSION}.${SO_AGE}.${SO_MINOR_VERSION}) +if(APPLE AND (NOT CMAKE_OSX_DEPLOYMENT_TARGET OR + CMAKE_OSX_DEPLOYMENT_TARGET VERSION_GREATER 10.4)) + if(NOT CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG) + set(CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG "-Wl,-rpath,") + endif() + set_target_properties(jpeg PROPERTIES MACOSX_RPATH 1) +endif() +if(MAPFLAG) + set_target_properties(jpeg PROPERTIES + LINK_FLAGS "${MAPFLAG}${CMAKE_CURRENT_BINARY_DIR}/../libjpeg.map") endif() -set_target_properties(jpeg PROPERTIES SOVERSION ${DLL_VERSION} - VERSION ${FULLVERSION}) if(MSVC) - set_target_properties(jpeg PROPERTIES SUFFIX ${DLL_VERSION}.dll) -elseif(MINGW OR CYGWIN) - set_target_properties(jpeg PROPERTIES SUFFIX -${DLL_VERSION}.dll) -endif(MSVC) -if(WITH_SIMD) - add_dependencies(jpeg simd) + set_target_properties(jpeg PROPERTIES SUFFIX ${SO_MAJOR_VERSION}.dll) + # The jsimd_*.c file is built using /MT, so this prevents a linker warning. + set_target_properties(jpeg PROPERTIES LINK_FLAGS "/NODEFAULTLIB:LIBCMT /NODEFAULTLIB:LIBCMTD") +elseif(MINGW) + set_target_properties(jpeg PROPERTIES SUFFIX -${SO_MAJOR_VERSION}.dll) endif() +if(WIN32) + set(USE_SETMODE "-DUSE_SETMODE") +endif() if(WITH_12BIT) - set(COMPILE_FLAGS "-DGIF_SUPPORTED -DPPM_SUPPORTED -DUSE_SETMODE") + set(COMPILE_FLAGS "-DGIF_SUPPORTED -DPPM_SUPPORTED ${USE_SETMODE}") else() - set(COMPILE_FLAGS "-DBMP_SUPPORTED -DGIF_SUPPORTED -DPPM_SUPPORTED -DTARGA_SUPPORTED -DUSE_SETMODE") - set(CJPEG_BMP_SOURCES ../rdbmp.c ../rdtarga.c) - set(DJPEG_BMP_SOURCES ../wrbmp.c ../wrtarga.c) + set(COMPILE_FLAGS "-DBMP_SUPPORTED -DGIF_SUPPORTED -DPPM_SUPPORTED -DTARGA_SUPPORTED ${USE_SETMODE}") + set(CJPEG_BMP_SOURCES ../rdbmp.c ../rdtarga.c) + set(DJPEG_BMP_SOURCES ../wrbmp.c ../wrtarga.c) endif() add_executable(cjpeg ../cjpeg.c ../cdjpeg.c ../rdgif.c ../rdppm.c @@ -62,12 +82,12 @@ target_link_libraries(djpeg jpeg) add_executable(jpegtran ../jpegtran.c ../cdjpeg.c ../rdswitch.c ../transupp.c) target_link_libraries(jpegtran jpeg) -set_property(TARGET jpegtran PROPERTY COMPILE_FLAGS "-DUSE_SETMODE") +set_property(TARGET jpegtran PROPERTY COMPILE_FLAGS "${USE_SETMODE}") add_executable(jcstest ../jcstest.c) target_link_libraries(jcstest jpeg) install(TARGETS jpeg cjpeg djpeg jpegtran - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib - RUNTIME DESTINATION bin) + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/simd/CMakeLists.txt b/simd/CMakeLists.txt index 6e898d8..8dbd7f1 100755 --- a/simd/CMakeLists.txt +++ b/simd/CMakeLists.txt @@ -1,81 +1,374 @@ -if(NOT DEFINED NASM) - find_program(NASM NAMES nasm yasm DOC "Path to NASM/YASM executable") +macro(simd_fail message) + if(REQUIRE_SIMD) + message(FATAL_ERROR "${message}.") + else() + message(WARNING "${message}. Performance will suffer.") + set(WITH_SIMD 0 PARENT_SCOPE) + endif() +endmacro() + + +############################################################################### +# x86[-64] (NASM) +############################################################################### + +if(CPU_TYPE STREQUAL "x86_64" OR CPU_TYPE STREQUAL "i386") + +set(CMAKE_ASM_NASM_FLAGS_DEBUG_INIT "-g") +set(CMAKE_ASM_NASM_FLAGS_RELWITHDEBINFO_INIT "-g") + +# Allow the location of the NASM executable to be specified using the ASM_NASM +# environment variable. This should happen automatically, but unfortunately +# enable_language(ASM_NASM) doesn't parse the ASM_NASM environment variable +# until after CMAKE_ASM_NASM_COMPILER has been populated with the results of +# searching for NASM or YASM in the PATH. +if(NOT DEFINED CMAKE_ASM_NASM_COMPILER AND DEFINED ENV{ASM_NASM}) + set(CMAKE_ASM_NASM_COMPILER $ENV{ASM_NASM}) endif() -message(STATUS "NASM = ${NASM}") -if(SIMD_X86_64) - set(NAFLAGS -fwin64 -DWIN64 -D__x86_64__) -else() +if(CPU_TYPE STREQUAL "x86_64") + if(CYGWIN) + set(CMAKE_ASM_NASM_OBJECT_FORMAT win64) + endif() +elseif(CPU_TYPE STREQUAL "i386") if(BORLAND) - set(NAFLAGS -fobj -DOBJ32) - else() - set(NAFLAGS -fwin32 -DWIN32) + set(CMAKE_ASM_NASM_OBJECT_FORMAT obj) + elseif(CYGWIN) + set(CMAKE_ASM_NASM_OBJECT_FORMAT win32) endif() endif() -set(NAFLAGS ${NAFLAGS} -I${CMAKE_SOURCE_DIR}/win/ -I${CMAKE_CURRENT_SOURCE_DIR}/) -# This only works if building from the command line. There is currently no way -# to set a variable's value based on the build type when using the MSVC IDE. -if(CMAKE_BUILD_TYPE STREQUAL "Debug" - OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") - set(NAFLAGS ${NAFLAGS} -g) +enable_language(ASM_NASM) +message(STATUS "CMAKE_ASM_NASM_COMPILER = ${CMAKE_ASM_NASM_COMPILER}") + +if(CMAKE_ASM_NASM_OBJECT_FORMAT MATCHES "macho*") + set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DMACHO") +elseif(CMAKE_ASM_NASM_OBJECT_FORMAT MATCHES "elf*") + set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DELF") + set(CMAKE_ASM_NASM_DEBUG_FORMAT "dwarf2") +endif() +if(CPU_TYPE STREQUAL "x86_64") + if(WIN32 OR CYGWIN) + set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DWIN64") + endif() + set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -D__x86_64__") +elseif(CPU_TYPE STREQUAL "i386") + if(BORLAND) + set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DOBJ32") + elseif(WIN32 OR CYGWIN) + set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DWIN32") + endif() +endif() + +message(STATUS "CMAKE_ASM_NASM_OBJECT_FORMAT = ${CMAKE_ASM_NASM_OBJECT_FORMAT}") + +if(NOT CMAKE_ASM_NASM_OBJECT_FORMAT) + simd_fail("SIMD extensions disabled: could not determine NASM object format") + return() +endif() + +get_filename_component(CMAKE_ASM_NASM_COMPILER_TYPE + "${CMAKE_ASM_NASM_COMPILER}" NAME_WE) +if(CMAKE_ASM_NASM_COMPILER_TYPE MATCHES "yasm") + foreach(var CMAKE_ASM_NASM_FLAGS_DEBUG CMAKE_ASM_NASM_FLAGS_RELWITHDEBINFO) + if(${var} STREQUAL "-g") + if(CMAKE_ASM_NASM_DEBUG_FORMAT) + set_property(CACHE ${var} PROPERTY VALUE "-g ${CMAKE_ASM_NASM_DEBUG_FORMAT}") + else() + set_property(CACHE ${var} PROPERTY VALUE "") + endif() + endif() + endforeach() +endif() + +if(NOT WIN32 AND (CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED)) + set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DPIC") +endif() + +string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC) +set(EFFECTIVE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} ${CMAKE_ASM_NASM_FLAGS_${CMAKE_BUILD_TYPE_UC}}") +message(STATUS "CMAKE_ASM_NASM_FLAGS = ${EFFECTIVE_ASM_NASM_FLAGS}") + +set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -I\"${CMAKE_CURRENT_SOURCE_DIR}/nasm/\" -I\"${CMAKE_CURRENT_SOURCE_DIR}/${CPU_TYPE}/\"") + +set(GREP grep) +if(CMAKE_SYSTEM_NAME STREQUAL "SunOS") + set(GREP ggrep) endif() +add_custom_target(jsimdcfg COMMAND + ${CMAKE_C_COMPILER} -E -I${CMAKE_BINARY_DIR} -I${CMAKE_CURRENT_BINARY_DIR} + -I${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/nasm/jsimdcfg.inc.h | + ${GREP} -E '^[\;%]|^\ %' | sed 's%_cpp_protection_%%' | + sed 's@% define@%define@g' >${CMAKE_CURRENT_SOURCE_DIR}/nasm/jsimdcfg.inc) -if(SIMD_X86_64) - set(SIMD_BASENAMES jfdctflt-sse-64 jccolor-sse2-64 jcgray-sse2-64 - jchuff-sse2-64 jcsample-sse2-64 jdcolor-sse2-64 jdmerge-sse2-64 - jdsample-sse2-64 jfdctfst-sse2-64 jfdctint-sse2-64 jidctflt-sse2-64 - jidctfst-sse2-64 jidctint-sse2-64 jidctred-sse2-64 jquantf-sse2-64 - jquanti-sse2-64) - message(STATUS "Building x86_64 SIMD extensions") +if(CPU_TYPE STREQUAL "x86_64") + set(SIMD_SOURCES x86_64/jsimdcpu.asm x86_64/jfdctflt-sse.asm + x86_64/jccolor-sse2.asm x86_64/jcgray-sse2.asm x86_64/jchuff-sse2.asm + x86_64/jcphuff-sse2.asm x86_64/jcsample-sse2.asm x86_64/jdcolor-sse2.asm + x86_64/jdmerge-sse2.asm x86_64/jdsample-sse2.asm x86_64/jfdctfst-sse2.asm + x86_64/jfdctint-sse2.asm x86_64/jidctflt-sse2.asm x86_64/jidctfst-sse2.asm + x86_64/jidctint-sse2.asm x86_64/jidctred-sse2.asm x86_64/jquantf-sse2.asm + x86_64/jquanti-sse2.asm + x86_64/jccolor-avx2.asm x86_64/jcgray-avx2.asm x86_64/jcsample-avx2.asm + x86_64/jdcolor-avx2.asm x86_64/jdmerge-avx2.asm x86_64/jdsample-avx2.asm + x86_64/jfdctint-avx2.asm x86_64/jidctint-avx2.asm x86_64/jquanti-avx2.asm) else() - set(SIMD_BASENAMES jsimdcpu jfdctflt-3dn jidctflt-3dn jquant-3dn jccolor-mmx - jcgray-mmx jcsample-mmx jdcolor-mmx jdmerge-mmx jdsample-mmx jfdctfst-mmx - jfdctint-mmx jidctfst-mmx jidctint-mmx jidctred-mmx jquant-mmx jfdctflt-sse - jidctflt-sse jquant-sse jccolor-sse2 jcgray-sse2 jchuff-sse2 jcsample-sse2 - jdcolor-sse2 jdmerge-sse2 jdsample-sse2 jfdctfst-sse2 jfdctint-sse2 - jidctflt-sse2 jidctfst-sse2 jidctint-sse2 jidctred-sse2 jquantf-sse2 - jquanti-sse2) - message(STATUS "Building i386 SIMD extensions") + set(SIMD_SOURCES i386/jsimdcpu.asm i386/jfdctflt-3dn.asm + i386/jidctflt-3dn.asm i386/jquant-3dn.asm + i386/jccolor-mmx.asm i386/jcgray-mmx.asm i386/jcsample-mmx.asm + i386/jdcolor-mmx.asm i386/jdmerge-mmx.asm i386/jdsample-mmx.asm + i386/jfdctfst-mmx.asm i386/jfdctint-mmx.asm i386/jidctfst-mmx.asm + i386/jidctint-mmx.asm i386/jidctred-mmx.asm i386/jquant-mmx.asm + i386/jfdctflt-sse.asm i386/jidctflt-sse.asm i386/jquant-sse.asm + i386/jccolor-sse2.asm i386/jcgray-sse2.asm i386/jchuff-sse2.asm + i386/jcphuff-sse2.asm i386/jcsample-sse2.asm i386/jdcolor-sse2.asm + i386/jdmerge-sse2.asm i386/jdsample-sse2.asm i386/jfdctfst-sse2.asm + i386/jfdctint-sse2.asm i386/jidctflt-sse2.asm i386/jidctfst-sse2.asm + i386/jidctint-sse2.asm i386/jidctred-sse2.asm i386/jquantf-sse2.asm + i386/jquanti-sse2.asm + i386/jccolor-avx2.asm i386/jcgray-avx2.asm i386/jcsample-avx2.asm + i386/jdcolor-avx2.asm i386/jdmerge-avx2.asm i386/jdsample-avx2.asm + i386/jfdctint-avx2.asm i386/jidctint-avx2.asm i386/jquanti-avx2.asm) endif() if(MSVC_IDE) set(OBJDIR "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}") -else() - set(OBJDIR ${CMAKE_CURRENT_BINARY_DIR}) + string(REGEX REPLACE " " ";" CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS}") endif() -file(GLOB INC_FILES *.inc) +file(GLOB INC_FILES nasm/*.inc) -foreach(file ${SIMD_BASENAMES}) - set(DEPFILE "") - set(SIMD_SRC ${CMAKE_CURRENT_SOURCE_DIR}/${file}.asm) +foreach(file ${SIMD_SOURCES}) + set(OBJECT_DEPENDS "") if(${file} MATCHES jccolor) - set(DEPFILE ${file}) - string(REGEX REPLACE "jccolor" "jccolext" DEPFILE ${DEPFILE}) - set(DEPFILE ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}.asm) + string(REGEX REPLACE "jccolor" "jccolext" DEPFILE ${file}) + set(OBJECT_DEPENDS ${OBJECT_DEPENDS} + ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}) endif() if(${file} MATCHES jcgray) - set(DEPFILE ${file}) - string(REGEX REPLACE "jcgray" "jcgryext" DEPFILE ${DEPFILE}) - set(DEPFILE ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}.asm) + string(REGEX REPLACE "jcgray" "jcgryext" DEPFILE ${file}) + set(OBJECT_DEPENDS ${OBJECT_DEPENDS} + ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}) endif() if(${file} MATCHES jdcolor) - set(DEPFILE ${file}) - string(REGEX REPLACE "jdcolor" "jdcolext" DEPFILE ${DEPFILE}) - set(DEPFILE ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}.asm) + string(REGEX REPLACE "jdcolor" "jdcolext" DEPFILE ${file}) + set(OBJECT_DEPENDS ${OBJECT_DEPENDS} + ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}) endif() if(${file} MATCHES jdmerge) - set(DEPFILE ${file}) - string(REGEX REPLACE "jdmerge" "jdmrgext" DEPFILE ${DEPFILE}) - set(DEPFILE ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}.asm) + string(REGEX REPLACE "jdmerge" "jdmrgext" DEPFILE ${file}) + set(OBJECT_DEPENDS ${OBJECT_DEPENDS} + ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}) + endif() + set(OBJECT_DEPENDS ${OBJECT_DEPENDS} ${INC_FILES}) + if(MSVC_IDE) + # The CMake Visual Studio generators do not work properly with the ASM_NASM + # language, so we have to go rogue here and use a custom command like we + # did in prior versions of libjpeg-turbo. (This is why we can't have nice + # things.) + string(REGEX REPLACE "${CPU_TYPE}/" "" filename ${file}) + set(SIMD_OBJ ${OBJDIR}/${filename}.obj) + add_custom_command(OUTPUT ${SIMD_OBJ} DEPENDS ${file} ${OBJECT_DEPENDS} + COMMAND ${CMAKE_ASM_NASM_COMPILER} -f${CMAKE_ASM_NASM_OBJECT_FORMAT} + ${CMAKE_ASM_NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/${file} + -o${SIMD_OBJ}) + set(SIMD_OBJS ${SIMD_OBJS} ${SIMD_OBJ}) + else() + set_source_files_properties(${file} PROPERTIES OBJECT_DEPENDS + "${OBJECT_DEPENDS}") endif() - set(SIMD_OBJ ${OBJDIR}/${file}.obj) - add_custom_command(OUTPUT ${SIMD_OBJ} - DEPENDS ${SIMD_SRC} ${DEPFILE} ${INC_FILES} - COMMAND ${NASM} ${NAFLAGS} ${SIMD_SRC} -o${SIMD_OBJ}) - set(SIMD_OBJS ${SIMD_OBJS} ${SIMD_OBJ}) endforeach() -set(SIMD_OBJS ${SIMD_OBJS} PARENT_SCOPE) -add_custom_target(simd DEPENDS ${SIMD_OBJS}) +if(MSVC_IDE) + set(SIMD_OBJS ${SIMD_OBJS} PARENT_SCOPE) + add_library(simd OBJECT ${CPU_TYPE}/jsimd.c) + add_custom_target(simd-objs DEPENDS ${SIMD_OBJS}) + add_dependencies(simd simd-objs) +else() + add_library(simd OBJECT ${SIMD_SOURCES} ${CPU_TYPE}/jsimd.c) +endif() +if(NOT WIN32 AND (CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED)) + set_target_properties(simd PROPERTIES POSITION_INDEPENDENT_CODE 1) +endif() + + +############################################################################### +# ARM (GAS) +############################################################################### + +elseif(CPU_TYPE STREQUAL "arm64" OR CPU_TYPE STREQUAL "arm") + +enable_language(ASM) + +set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_ASM_FLAGS}") + +string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC) +set(EFFECTIVE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${CMAKE_ASM_FLAGS_${CMAKE_BUILD_TYPE_UC}}") +message(STATUS "CMAKE_ASM_FLAGS = ${EFFECTIVE_ASM_FLAGS}") + +# Test whether we need gas-preprocessor.pl +if(CPU_TYPE STREQUAL "arm") + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/gastest.S " + .text + .fpu neon + .arch armv7a + .object_arch armv4 + .arm + pld [r0] + vmovn.u16 d0, q0") +else() + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/gastest.S " + .text + MYVAR .req x0 + movi v0.16b, #100 + mov MYVAR, #100 + .unreq MYVAR") +endif() + +separate_arguments(CMAKE_ASM_FLAGS_SEP UNIX_COMMAND "${CMAKE_ASM_FLAGS}") + +execute_process(COMMAND ${CMAKE_ASM_COMPILER} ${CMAKE_ASM_FLAGS_SEP} + -x assembler-with-cpp -c ${CMAKE_CURRENT_BINARY_DIR}/gastest.S + RESULT_VARIABLE RESULT OUTPUT_VARIABLE OUTPUT ERROR_VARIABLE ERROR) +if(NOT RESULT EQUAL 0) + message(STATUS "GAS appears to be broken. Trying gas-preprocessor.pl ...") + execute_process(COMMAND gas-preprocessor.pl ${CMAKE_ASM_COMPILER} + ${CMAKE_ASM_FLAGS_SEP} -x assembler-with-cpp -c + ${CMAKE_CURRENT_BINARY_DIR}/gastest.S + RESULT_VARIABLE RESULT OUTPUT_VARIABLE OUTPUT ERROR_VARIABLE ERROR) + if(NOT RESULT EQUAL 0) + simd_fail("SIMD extensions disabled: GAS is not working properly") + return() + else() + message(STATUS "Using gas-preprocessor.pl") + configure_file(gas-preprocessor.in gas-preprocessor @ONLY) + set(CMAKE_ASM_COMPILER ${CMAKE_CURRENT_BINARY_DIR}/gas-preprocessor) + endif() +else() + message(STATUS "GAS is working properly") +endif() + +file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/gastest.S) + +add_library(simd OBJECT ${CPU_TYPE}/jsimd_neon.S ${CPU_TYPE}/jsimd.c) + +if(CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED) + set_target_properties(simd PROPERTIES POSITION_INDEPENDENT_CODE 1) +endif() + + +############################################################################### +# MIPS (GAS) +############################################################################### + +elseif(CPU_TYPE STREQUAL "mips" OR CPU_TYPE STREQUAL "mipsel") + +enable_language(ASM) + +string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC) +set(EFFECTIVE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${CMAKE_ASM_FLAGS_${CMAKE_BUILD_TYPE_UC}}") +message(STATUS "CMAKE_ASM_FLAGS = ${EFFECTIVE_ASM_FLAGS}") + +set(CMAKE_REQUIRED_FLAGS -mdspr2) + +check_c_source_compiles(" + #if !(defined(__mips__) && __mips_isa_rev >= 2) + #error MIPS DSPr2 is currently only available on MIPS32r2 platforms. + #endif + int main(void) { + int c = 0, a = 0, b = 0; + __asm__ __volatile__ ( + \"precr.qb.ph %[c], %[a], %[b]\" + : [c] \"=r\" (c) + : [a] \"r\" (a), [b] \"r\" (b) + ); + return c; + }" HAVE_DSPR2) + +unset(CMAKE_REQUIRED_FLAGS) + +if(NOT HAVE_DSPR2) + simd_fail("SIMD extensions not available for this CPU") + return() +endif() + +add_library(simd OBJECT mips/jsimd_dspr2.S mips/jsimd.c) + +if(CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED) + set_target_properties(simd PROPERTIES POSITION_INDEPENDENT_CODE 1) +endif() + +############################################################################### +# Loongson (Intrinsics) +############################################################################### + +elseif(CPU_TYPE STREQUAL "loongson") + +set(SIMD_SOURCES loongson/jccolor-mmi.c loongson/jcsample-mmi.c + loongson/jdcolor-mmi.c loongson/jdsample-mmi.c loongson/jfdctint-mmi.c + loongson/jidctint-mmi.c loongson/jquanti-mmi.c) + +if(CMAKE_COMPILER_IS_GNUCC) + foreach(file ${SIMD_SOURCES}) + set_property(SOURCE ${file} APPEND_STRING PROPERTY COMPILE_FLAGS + " -fno-strict-aliasing") + endforeach() +endif() + +add_library(simd OBJECT ${SIMD_SOURCES} loongson/jsimd.c) + +if(CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED) + set_target_properties(simd PROPERTIES POSITION_INDEPENDENT_CODE 1) +endif() + +############################################################################### +# PowerPC (Intrinsics) +############################################################################### + +elseif(CPU_TYPE STREQUAL "powerpc") + +set(CMAKE_REQUIRED_FLAGS -maltivec) + +check_c_source_compiles(" + #include + int main(void) { + __vector int vi = { 0, 0, 0, 0 }; + int i[4]; + vec_st(vi, 0, i); + return i[0]; + }" HAVE_ALTIVEC) + +unset(CMAKE_REQUIRED_FLAGS) + +if(NOT HAVE_ALTIVEC) + simd_fail("SIMD extensions not available for this CPU (PowerPC SPE)") + return() +endif() + +set(SIMD_SOURCES powerpc/jccolor-altivec.c powerpc/jcgray-altivec.c + powerpc/jcsample-altivec.c powerpc/jdcolor-altivec.c + powerpc/jdmerge-altivec.c powerpc/jdsample-altivec.c + powerpc/jfdctfst-altivec.c powerpc/jfdctint-altivec.c + powerpc/jidctfst-altivec.c powerpc/jidctint-altivec.c + powerpc/jquanti-altivec.c) + +set_source_files_properties(${SIMD_SOURCES} PROPERTIES + COMPILE_FLAGS -maltivec) + +add_library(simd OBJECT ${SIMD_SOURCES} powerpc/jsimd.c) + +if(CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED) + set_target_properties(simd PROPERTIES POSITION_INDEPENDENT_CODE 1) +endif() + + +############################################################################### +# None +############################################################################### + +else() + +simd_fail("SIMD extensions not available for this CPU (${CMAKE_SYSTEM_PROCESSOR})") + +endif() # CPU_TYPE diff --git a/simd/Makefile.am b/simd/Makefile.am deleted file mode 100644 index b8660d1..0000000 --- a/simd/Makefile.am +++ /dev/null @@ -1,102 +0,0 @@ -noinst_LTLIBRARIES = libsimd.la - -BUILT_SOURCES = jsimdcfg.inc - -EXTRA_DIST = nasm_lt.sh CMakeLists.txt \ - jccolext-mmx.asm jcgryext-mmx.asm jdcolext-mmx.asm jdmrgext-mmx.asm \ - jccolext-sse2.asm jcgryext-sse2.asm jdcolext-sse2.asm jdmrgext-sse2.asm \ - jccolext-sse2-64.asm jcgryext-sse2-64.asm jdcolext-sse2-64.asm \ - jdmrgext-sse2-64.asm jccolext-altivec.c jcgryext-altivec.c \ - jdcolext-altivec.c jdmrgext-altivec.c - -if SIMD_X86_64 - -libsimd_la_SOURCES = jsimd_x86_64.c jsimd.h jsimdcfg.inc.h jsimdext.inc \ - jcolsamp.inc jdct.inc jpeg_nbits_table.inc jfdctflt-sse-64.asm \ - jccolor-sse2-64.asm jcgray-sse2-64.asm jchuff-sse2-64.asm \ - jcsample-sse2-64.asm jdcolor-sse2-64.asm jdmerge-sse2-64.asm \ - jdsample-sse2-64.asm jfdctfst-sse2-64.asm jfdctint-sse2-64.asm \ - jidctflt-sse2-64.asm jidctfst-sse2-64.asm jidctint-sse2-64.asm \ - jidctred-sse2-64.asm jquantf-sse2-64.asm jquanti-sse2-64.asm - -jccolor-sse2-64.lo: jccolext-sse2-64.asm -jcgray-sse2-64.lo: jcgryext-sse2-64.asm -jdcolor-sse2-64.lo: jdcolext-sse2-64.asm -jdmerge-sse2-64.lo: jdmrgext-sse2-64.asm - -endif - -if SIMD_I386 - -libsimd_la_SOURCES = jsimd_i386.c jsimd.h jsimdcfg.inc.h jsimdext.inc \ - jcolsamp.inc jdct.inc jpeg_nbits_table.inc jsimdcpu.asm \ - jfdctflt-3dn.asm jidctflt-3dn.asm jquant-3dn.asm \ - jccolor-mmx.asm jcgray-mmx.asm jcsample-mmx.asm \ - jdcolor-mmx.asm jdmerge-mmx.asm jdsample-mmx.asm \ - jfdctfst-mmx.asm jfdctint-mmx.asm jidctfst-mmx.asm \ - jidctint-mmx.asm jidctred-mmx.asm jquant-mmx.asm \ - jfdctflt-sse.asm jidctflt-sse.asm jquant-sse.asm \ - jccolor-sse2.asm jcgray-sse2.asm jchuff-sse2.asm \ - jcsample-sse2.asm jdcolor-sse2.asm jdmerge-sse2.asm \ - jdsample-sse2.asm jfdctfst-sse2.asm jfdctint-sse2.asm \ - jidctflt-sse2.asm jidctfst-sse2.asm jidctint-sse2.asm \ - jidctred-sse2.asm jquantf-sse2.asm jquanti-sse2.asm - -jccolor-mmx.lo: jccolext-mmx.asm -jcgray.-mmx.lo: jcgryext-mmx.asm -jdcolor-mmx.lo: jdcolext-mmx.asm -jdmerge-mmx.lo: jdmrgext-mmx.asm -jccolor-sse2.lo: jccolext-sse2.asm -jcgray-sse2.lo: jcgryext-sse2.asm -jdcolor-sse2.lo: jdcolext-sse2.asm -jdmerge-sse2.lo: jdmrgext-sse2.asm - -endif - -if SIMD_ARM - -libsimd_la_SOURCES = jsimd_arm.c jsimd_arm_neon.S - -endif - -if SIMD_ARM_64 - -libsimd_la_SOURCES = jsimd_arm64.c jsimd_arm64_neon.S - -endif - -if SIMD_MIPS - -libsimd_la_SOURCES = jsimd_mips.c jsimd_mips_dspr2_asm.h jsimd_mips_dspr2.S - -endif - -if SIMD_POWERPC - -noinst_LTLIBRARIES += libsimd_altivec.la - -libsimd_altivec_la_SOURCES = \ - jccolor-altivec.c jcgray-altivec.c jcsample-altivec.c \ - jdcolor-altivec.c jdmerge-altivec.c jdsample-altivec.c \ - jfdctfst-altivec.c jfdctint-altivec.c \ - jidctfst-altivec.c jidctint-altivec.c \ - jquanti-altivec.c -libsimd_altivec_la_CFLAGS = -maltivec - -jccolor-altivec.lo: jccolext-altivec.c -jcgray-altivec.lo: jcgryext-altivec.c -jdcolor-altivec.lo: jdcolext-altivec.c -jdmerge-altivec.lo: jdmrgext-altivec.c - -libsimd_la_SOURCES = jsimd_powerpc.c jsimd_altivec.h jcsample.h -libsimd_la_LIBADD = libsimd_altivec.la - -endif - -AM_CPPFLAGS = -I$(top_srcdir) - -.asm.lo: - $(AM_V_GEN) $(LIBTOOL) $(AM_V_lt) --mode=compile --tag NASM $(srcdir)/nasm_lt.sh $(AM_V_lt) $(NASM) $(NAFLAGS) -I$(srcdir) -I. $< -o $@ - -jsimdcfg.inc: $(srcdir)/jsimdcfg.inc.h ../jpeglib.h ../jconfig.h ../jmorecfg.h - $(AM_V_GEN) $(CPP) -I$(top_builddir) -I$(top_builddir)/simd $(srcdir)/jsimdcfg.inc.h | $(EGREP) "^[\;%]|^\ %" | sed 's%_cpp_protection_%%' | sed 's@% define@%define@g' > $@ diff --git a/simd/jsimd_arm.c b/simd/arm/jsimd.c similarity index 52% rename from simd/jsimd_arm.c rename to simd/arm/jsimd.c index 0b955cd..0fb8197 100644 --- a/simd/jsimd_arm.c +++ b/simd/arm/jsimd.c @@ -3,8 +3,8 @@ * * Copyright 2009 Pierre Ossman for Cendio AB * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies). - * Copyright (C) 2009-2011, 2013-2014, 2016, D. R. Commander. - * Copyright (C) 2015-2016, Matthieu Darbois. + * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, D. R. Commander. + * Copyright (C) 2015-2016, 2018, Matthieu Darbois. * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. @@ -16,12 +16,12 @@ */ #define JPEG_INTERNALS -#include "../jinclude.h" -#include "../jpeglib.h" +#include "../../jinclude.h" +#include "../../jpeglib.h" +#include "../../jsimd.h" +#include "../../jdct.h" +#include "../../jsimddct.h" #include "../jsimd.h" -#include "../jdct.h" -#include "../jsimddct.h" -#include "jsimd.h" #include #include @@ -32,12 +32,13 @@ static unsigned int simd_huffman = 1; #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) -#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) +#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) LOCAL(int) -check_feature (char *buffer, char *feature) +check_feature(char *buffer, char *feature) { char *p; + if (*feature == 0) return 0; if (strncmp(buffer, "Features", 8) != 0) @@ -63,10 +64,11 @@ check_feature (char *buffer, char *feature) } LOCAL(int) -parse_proc_cpuinfo (int bufsize) +parse_proc_cpuinfo(int bufsize) { char *buffer = (char *)malloc(bufsize); FILE *fd; + simd_support = 0; if (!buffer) @@ -82,7 +84,7 @@ parse_proc_cpuinfo (int bufsize) return 0; } if (check_feature(buffer, "neon")) - simd_support |= JSIMD_ARM_NEON; + simd_support |= JSIMD_NEON; } fclose(fd); } @@ -98,9 +100,11 @@ parse_proc_cpuinfo (int bufsize) * FIXME: This code is racy under a multi-threaded environment. */ LOCAL(void) -init_simd (void) +init_simd(void) { +#ifndef NO_GETENV char *env = NULL; +#endif #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) int bufsize = 1024; /* an initial guess for the line buffer size limit */ #endif @@ -111,7 +115,7 @@ init_simd (void) simd_support = 0; #if defined(__ARM_NEON__) - simd_support |= JSIMD_ARM_NEON; + simd_support |= JSIMD_NEON; #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) /* We still have a chance to use NEON regardless of globally used * -mcpu/-mfpu options passed to gcc by performing runtime detection via @@ -123,20 +127,22 @@ init_simd (void) } #endif +#ifndef NO_GETENV /* Force different settings through environment variables */ env = getenv("JSIMD_FORCENEON"); if ((env != NULL) && (strcmp(env, "1") == 0)) - simd_support = JSIMD_ARM_NEON; + simd_support = JSIMD_NEON; env = getenv("JSIMD_FORCENONE"); if ((env != NULL) && (strcmp(env, "1") == 0)) simd_support = 0; env = getenv("JSIMD_NOHUFFENC"); if ((env != NULL) && (strcmp(env, "1") == 0)) simd_huffman = 0; +#endif } GLOBAL(int) -jsimd_can_rgb_ycc (void) +jsimd_can_rgb_ycc(void) { init_simd(); @@ -148,22 +154,20 @@ jsimd_can_rgb_ycc (void) if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_rgb_gray (void) +jsimd_can_rgb_gray(void) { - init_simd(); - return 0; } GLOBAL(int) -jsimd_can_ycc_rgb (void) +jsimd_can_ycc_rgb(void) { init_simd(); @@ -175,14 +179,14 @@ jsimd_can_ycc_rgb (void) if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_ycc_rgb565 (void) +jsimd_can_ycc_rgb565(void) { init_simd(); @@ -192,174 +196,160 @@ jsimd_can_ycc_rgb565 (void) if (sizeof(JDIMENSION) != 4) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(void) -jsimd_rgb_ycc_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) -{ - void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); - - switch(cinfo->in_color_space) { - case JCS_EXT_RGB: - neonfct=jsimd_extrgb_ycc_convert_neon; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - neonfct=jsimd_extrgbx_ycc_convert_neon; - break; - case JCS_EXT_BGR: - neonfct=jsimd_extbgr_ycc_convert_neon; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - neonfct=jsimd_extbgrx_ycc_convert_neon; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - neonfct=jsimd_extxbgr_ycc_convert_neon; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - neonfct=jsimd_extxrgb_ycc_convert_neon; - break; - default: - neonfct=jsimd_extrgb_ycc_convert_neon; - break; +jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) +{ + void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + neonfct = jsimd_extrgb_ycc_convert_neon; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + neonfct = jsimd_extrgbx_ycc_convert_neon; + break; + case JCS_EXT_BGR: + neonfct = jsimd_extbgr_ycc_convert_neon; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + neonfct = jsimd_extbgrx_ycc_convert_neon; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + neonfct = jsimd_extxbgr_ycc_convert_neon; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + neonfct = jsimd_extxrgb_ycc_convert_neon; + break; + default: + neonfct = jsimd_extrgb_ycc_convert_neon; + break; } neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); } GLOBAL(void) -jsimd_rgb_gray_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) +jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) { } GLOBAL(void) -jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) -{ - void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); - - switch(cinfo->out_color_space) { - case JCS_EXT_RGB: - neonfct=jsimd_ycc_extrgb_convert_neon; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - neonfct=jsimd_ycc_extrgbx_convert_neon; - break; - case JCS_EXT_BGR: - neonfct=jsimd_ycc_extbgr_convert_neon; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - neonfct=jsimd_ycc_extbgrx_convert_neon; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - neonfct=jsimd_ycc_extxbgr_convert_neon; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - neonfct=jsimd_ycc_extxrgb_convert_neon; - break; - default: - neonfct=jsimd_ycc_extrgb_convert_neon; - break; +jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) +{ + void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + neonfct = jsimd_ycc_extrgb_convert_neon; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + neonfct = jsimd_ycc_extrgbx_convert_neon; + break; + case JCS_EXT_BGR: + neonfct = jsimd_ycc_extbgr_convert_neon; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + neonfct = jsimd_ycc_extbgrx_convert_neon; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + neonfct = jsimd_ycc_extxbgr_convert_neon; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + neonfct = jsimd_ycc_extxrgb_convert_neon; + break; + default: + neonfct = jsimd_ycc_extrgb_convert_neon; + break; } neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); } GLOBAL(void) -jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) { jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row, output_buf, num_rows); } GLOBAL(int) -jsimd_can_h2v2_downsample (void) +jsimd_can_h2v2_downsample(void) { - init_simd(); - return 0; } GLOBAL(int) -jsimd_can_h2v1_downsample (void) +jsimd_can_h2v1_downsample(void) { - init_simd(); - return 0; } GLOBAL(void) -jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { } GLOBAL(void) -jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { } GLOBAL(int) -jsimd_can_h2v2_upsample (void) +jsimd_can_h2v2_upsample(void) { - init_simd(); - return 0; } GLOBAL(int) -jsimd_can_h2v1_upsample (void) +jsimd_can_h2v1_upsample(void) { - init_simd(); - return 0; } GLOBAL(void) -jsimd_h2v2_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { } GLOBAL(void) -jsimd_h2v1_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { } GLOBAL(int) -jsimd_can_h2v2_fancy_upsample (void) +jsimd_can_h2v2_fancy_upsample(void) { - init_simd(); - return 0; } GLOBAL(int) -jsimd_can_h2v1_fancy_upsample (void) +jsimd_can_h2v1_fancy_upsample(void) { init_simd(); @@ -369,25 +359,21 @@ jsimd_can_h2v1_fancy_upsample (void) if (sizeof(JDIMENSION) != 4) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(void) -jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { } GLOBAL(void) -jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor, compptr->downsampled_width, input_data, @@ -395,39 +381,31 @@ jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, } GLOBAL(int) -jsimd_can_h2v2_merged_upsample (void) +jsimd_can_h2v2_merged_upsample(void) { - init_simd(); - return 0; } GLOBAL(int) -jsimd_can_h2v1_merged_upsample (void) +jsimd_can_h2v1_merged_upsample(void) { - init_simd(); - return 0; } GLOBAL(void) -jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) { } GLOBAL(void) -jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) { } GLOBAL(int) -jsimd_can_convsamp (void) +jsimd_can_convsamp(void) { init_simd(); @@ -441,43 +419,39 @@ jsimd_can_convsamp (void) if (sizeof(DCTELEM) != 2) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_convsamp_float (void) +jsimd_can_convsamp_float(void) { - init_simd(); - return 0; } GLOBAL(void) -jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, - DCTELEM *workspace) +jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) { jsimd_convsamp_neon(sample_data, start_col, workspace); } GLOBAL(void) -jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, - FAST_FLOAT *workspace) +jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) { } GLOBAL(int) -jsimd_can_fdct_islow (void) +jsimd_can_fdct_islow(void) { - init_simd(); - return 0; } GLOBAL(int) -jsimd_can_fdct_ifast (void) +jsimd_can_fdct_ifast(void) { init_simd(); @@ -487,38 +461,36 @@ jsimd_can_fdct_ifast (void) if (sizeof(DCTELEM) != 2) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_fdct_float (void) +jsimd_can_fdct_float(void) { - init_simd(); - return 0; } GLOBAL(void) -jsimd_fdct_islow (DCTELEM *data) +jsimd_fdct_islow(DCTELEM *data) { } GLOBAL(void) -jsimd_fdct_ifast (DCTELEM *data) +jsimd_fdct_ifast(DCTELEM *data) { jsimd_fdct_ifast_neon(data); } GLOBAL(void) -jsimd_fdct_float (FAST_FLOAT *data) +jsimd_fdct_float(FAST_FLOAT *data) { } GLOBAL(int) -jsimd_can_quantize (void) +jsimd_can_quantize(void) { init_simd(); @@ -530,35 +502,32 @@ jsimd_can_quantize (void) if (sizeof(DCTELEM) != 2) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_quantize_float (void) +jsimd_can_quantize_float(void) { - init_simd(); - return 0; } GLOBAL(void) -jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, - DCTELEM *workspace) +jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) { jsimd_quantize_neon(coef_block, divisors, workspace); } GLOBAL(void) -jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, - FAST_FLOAT *workspace) +jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) { } GLOBAL(int) -jsimd_can_idct_2x2 (void) +jsimd_can_idct_2x2(void) { init_simd(); @@ -574,14 +543,14 @@ jsimd_can_idct_2x2 (void) if (sizeof(ISLOW_MULT_TYPE) != 2) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_idct_4x4 (void) +jsimd_can_idct_4x4(void) { init_simd(); @@ -597,32 +566,30 @@ jsimd_can_idct_4x4 (void) if (sizeof(ISLOW_MULT_TYPE) != 2) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(void) -jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { - jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, - output_col); + jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col); } GLOBAL(void) -jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { - jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, - output_col); + jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col); } GLOBAL(int) -jsimd_can_idct_islow (void) +jsimd_can_idct_islow(void) { init_simd(); @@ -638,14 +605,14 @@ jsimd_can_idct_islow (void) if (sizeof(ISLOW_MULT_TYPE) != 2) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_idct_ifast (void) +jsimd_can_idct_ifast(void) { init_simd(); @@ -663,47 +630,45 @@ jsimd_can_idct_ifast (void) if (IFAST_SCALE_BITS != 2) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_idct_float (void) +jsimd_can_idct_float(void) { - init_simd(); - return 0; } GLOBAL(void) -jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, output_col); } GLOBAL(void) -jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, output_col); } GLOBAL(void) -jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { } GLOBAL(int) -jsimd_can_huff_encode_one_block (void) +jsimd_can_huff_encode_one_block(void) { init_simd(); @@ -712,17 +677,44 @@ jsimd_can_huff_encode_one_block (void) if (sizeof(JCOEF) != 2) return 0; - if (simd_support & JSIMD_ARM_NEON && simd_huffman) + if (simd_support & JSIMD_NEON && simd_huffman) return 1; return 0; } -GLOBAL(JOCTET*) -jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, - int last_dc_val, c_derived_tbl *dctbl, - c_derived_tbl *actbl) +GLOBAL(JOCTET *) +jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) { return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val, dctbl, actbl); } + +GLOBAL(int) +jsimd_can_encode_mcu_AC_first_prepare(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *values, size_t *zerobits) +{ +} + +GLOBAL(int) +jsimd_can_encode_mcu_AC_refine_prepare(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *absvalues, size_t *bits) +{ + return 0; +} diff --git a/simd/jsimd_arm_neon.S b/simd/arm/jsimd_neon.S similarity index 91% rename from simd/jsimd_arm_neon.S rename to simd/arm/jsimd_neon.S index cd26127..af929fe 100644 --- a/simd/jsimd_arm_neon.S +++ b/simd/arm/jsimd_neon.S @@ -2,12 +2,12 @@ * ARMv7 NEON optimizations for libjpeg-turbo * * Copyright (C) 2009-2011, Nokia Corporation and/or its subsidiary(-ies). - * All Rights Reserved. - * Author: Siarhei Siamashka + * All Rights Reserved. + * Author: Siarhei Siamashka * Copyright (C) 2014, Siarhei Siamashka. All Rights Reserved. * Copyright (C) 2014, Linaro Limited. All Rights Reserved. * Copyright (C) 2015, D. R. Commander. All Rights Reserved. - * Copyright (C) 2015-2016, Matthieu Darbois. All Rights Reserved. + * Copyright (C) 2015-2016, 2018, Matthieu Darbois. All Rights Reserved. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages @@ -38,7 +38,7 @@ .syntax unified -#define RESPECT_STRICT_ALIGNMENT 1 +#define RESPECT_STRICT_ALIGNMENT 1 /*****************************************************************************/ @@ -46,6 +46,7 @@ /* Supplementary macro for setting function attributes */ .macro asm_function fname #ifdef __APPLE__ + .private_extern _\fname .globl _\fname _\fname: #else @@ -67,7 +68,7 @@ _\fname: .endm -#define CENTERJSAMPLE 128 +#define CENTERJSAMPLE 128 /*****************************************************************************/ @@ -75,115 +76,114 @@ _\fname: * Perform dequantization and inverse DCT on one block of coefficients. * * GLOBAL(void) - * jsimd_idct_islow_neon (void *dct_table, JCOEFPTR coef_block, - * JSAMPARRAY output_buf, JDIMENSION output_col) + * jsimd_idct_islow_neon(void *dct_table, JCOEFPTR coef_block, + * JSAMPARRAY output_buf, JDIMENSION output_col) */ -#define FIX_0_298631336 (2446) -#define FIX_0_390180644 (3196) -#define FIX_0_541196100 (4433) -#define FIX_0_765366865 (6270) -#define FIX_0_899976223 (7373) -#define FIX_1_175875602 (9633) -#define FIX_1_501321110 (12299) -#define FIX_1_847759065 (15137) -#define FIX_1_961570560 (16069) -#define FIX_2_053119869 (16819) -#define FIX_2_562915447 (20995) -#define FIX_3_072711026 (25172) - -#define FIX_1_175875602_MINUS_1_961570560 (FIX_1_175875602 - FIX_1_961570560) -#define FIX_1_175875602_MINUS_0_390180644 (FIX_1_175875602 - FIX_0_390180644) -#define FIX_0_541196100_MINUS_1_847759065 (FIX_0_541196100 - FIX_1_847759065) -#define FIX_3_072711026_MINUS_2_562915447 (FIX_3_072711026 - FIX_2_562915447) -#define FIX_0_298631336_MINUS_0_899976223 (FIX_0_298631336 - FIX_0_899976223) -#define FIX_1_501321110_MINUS_0_899976223 (FIX_1_501321110 - FIX_0_899976223) -#define FIX_2_053119869_MINUS_2_562915447 (FIX_2_053119869 - FIX_2_562915447) -#define FIX_0_541196100_PLUS_0_765366865 (FIX_0_541196100 + FIX_0_765366865) +#define FIX_0_298631336 (2446) +#define FIX_0_390180644 (3196) +#define FIX_0_541196100 (4433) +#define FIX_0_765366865 (6270) +#define FIX_0_899976223 (7373) +#define FIX_1_175875602 (9633) +#define FIX_1_501321110 (12299) +#define FIX_1_847759065 (15137) +#define FIX_1_961570560 (16069) +#define FIX_2_053119869 (16819) +#define FIX_2_562915447 (20995) +#define FIX_3_072711026 (25172) + +#define FIX_1_175875602_MINUS_1_961570560 (FIX_1_175875602 - FIX_1_961570560) +#define FIX_1_175875602_MINUS_0_390180644 (FIX_1_175875602 - FIX_0_390180644) +#define FIX_0_541196100_MINUS_1_847759065 (FIX_0_541196100 - FIX_1_847759065) +#define FIX_3_072711026_MINUS_2_562915447 (FIX_3_072711026 - FIX_2_562915447) +#define FIX_0_298631336_MINUS_0_899976223 (FIX_0_298631336 - FIX_0_899976223) +#define FIX_1_501321110_MINUS_0_899976223 (FIX_1_501321110 - FIX_0_899976223) +#define FIX_2_053119869_MINUS_2_562915447 (FIX_2_053119869 - FIX_2_562915447) +#define FIX_0_541196100_PLUS_0_765366865 (FIX_0_541196100 + FIX_0_765366865) /* * Reference SIMD-friendly 1-D ISLOW iDCT C implementation. * Uses some ideas from the comments in 'simd/jiss2int-64.asm' */ -#define REF_1D_IDCT(xrow0, xrow1, xrow2, xrow3, xrow4, xrow5, xrow6, xrow7) \ -{ \ - DCTELEM row0, row1, row2, row3, row4, row5, row6, row7; \ - JLONG q1, q2, q3, q4, q5, q6, q7; \ - JLONG tmp11_plus_tmp2, tmp11_minus_tmp2; \ - \ - /* 1-D iDCT input data */ \ - row0 = xrow0; \ - row1 = xrow1; \ - row2 = xrow2; \ - row3 = xrow3; \ - row4 = xrow4; \ - row5 = xrow5; \ - row6 = xrow6; \ - row7 = xrow7; \ - \ - q5 = row7 + row3; \ - q4 = row5 + row1; \ - q6 = MULTIPLY(q5, FIX_1_175875602_MINUS_1_961570560) + \ - MULTIPLY(q4, FIX_1_175875602); \ - q7 = MULTIPLY(q5, FIX_1_175875602) + \ - MULTIPLY(q4, FIX_1_175875602_MINUS_0_390180644); \ - q2 = MULTIPLY(row2, FIX_0_541196100) + \ - MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065); \ - q4 = q6; \ - q3 = ((JLONG) row0 - (JLONG) row4) << 13; \ - q6 += MULTIPLY(row5, -FIX_2_562915447) + \ - MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447); \ - /* now we can use q1 (reloadable constants have been used up) */ \ - q1 = q3 + q2; \ - q4 += MULTIPLY(row7, FIX_0_298631336_MINUS_0_899976223) + \ - MULTIPLY(row1, -FIX_0_899976223); \ - q5 = q7; \ - q1 = q1 + q6; \ - q7 += MULTIPLY(row7, -FIX_0_899976223) + \ - MULTIPLY(row1, FIX_1_501321110_MINUS_0_899976223); \ - \ - /* (tmp11 + tmp2) has been calculated (out_row1 before descale) */ \ - tmp11_plus_tmp2 = q1; \ - row1 = 0; \ - \ - q1 = q1 - q6; \ - q5 += MULTIPLY(row5, FIX_2_053119869_MINUS_2_562915447) + \ - MULTIPLY(row3, -FIX_2_562915447); \ - q1 = q1 - q6; \ - q6 = MULTIPLY(row2, FIX_0_541196100_PLUS_0_765366865) + \ - MULTIPLY(row6, FIX_0_541196100); \ - q3 = q3 - q2; \ - \ - /* (tmp11 - tmp2) has been calculated (out_row6 before descale) */ \ - tmp11_minus_tmp2 = q1; \ - \ - q1 = ((JLONG) row0 + (JLONG) row4) << 13; \ - q2 = q1 + q6; \ - q1 = q1 - q6; \ - \ - /* pick up the results */ \ - tmp0 = q4; \ - tmp1 = q5; \ - tmp2 = (tmp11_plus_tmp2 - tmp11_minus_tmp2) / 2; \ - tmp3 = q7; \ - tmp10 = q2; \ - tmp11 = (tmp11_plus_tmp2 + tmp11_minus_tmp2) / 2; \ - tmp12 = q3; \ - tmp13 = q1; \ +#define REF_1D_IDCT(xrow0, xrow1, xrow2, xrow3, xrow4, xrow5, xrow6, xrow7) { \ + DCTELEM row0, row1, row2, row3, row4, row5, row6, row7; \ + JLONG q1, q2, q3, q4, q5, q6, q7; \ + JLONG tmp11_plus_tmp2, tmp11_minus_tmp2; \ + \ + /* 1-D iDCT input data */ \ + row0 = xrow0; \ + row1 = xrow1; \ + row2 = xrow2; \ + row3 = xrow3; \ + row4 = xrow4; \ + row5 = xrow5; \ + row6 = xrow6; \ + row7 = xrow7; \ + \ + q5 = row7 + row3; \ + q4 = row5 + row1; \ + q6 = MULTIPLY(q5, FIX_1_175875602_MINUS_1_961570560) + \ + MULTIPLY(q4, FIX_1_175875602); \ + q7 = MULTIPLY(q5, FIX_1_175875602) + \ + MULTIPLY(q4, FIX_1_175875602_MINUS_0_390180644); \ + q2 = MULTIPLY(row2, FIX_0_541196100) + \ + MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065); \ + q4 = q6; \ + q3 = ((JLONG)row0 - (JLONG)row4) << 13; \ + q6 += MULTIPLY(row5, -FIX_2_562915447) + \ + MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447); \ + /* now we can use q1 (reloadable constants have been used up) */ \ + q1 = q3 + q2; \ + q4 += MULTIPLY(row7, FIX_0_298631336_MINUS_0_899976223) + \ + MULTIPLY(row1, -FIX_0_899976223); \ + q5 = q7; \ + q1 = q1 + q6; \ + q7 += MULTIPLY(row7, -FIX_0_899976223) + \ + MULTIPLY(row1, FIX_1_501321110_MINUS_0_899976223); \ + \ + /* (tmp11 + tmp2) has been calculated (out_row1 before descale) */ \ + tmp11_plus_tmp2 = q1; \ + row1 = 0; \ + \ + q1 = q1 - q6; \ + q5 += MULTIPLY(row5, FIX_2_053119869_MINUS_2_562915447) + \ + MULTIPLY(row3, -FIX_2_562915447); \ + q1 = q1 - q6; \ + q6 = MULTIPLY(row2, FIX_0_541196100_PLUS_0_765366865) + \ + MULTIPLY(row6, FIX_0_541196100); \ + q3 = q3 - q2; \ + \ + /* (tmp11 - tmp2) has been calculated (out_row6 before descale) */ \ + tmp11_minus_tmp2 = q1; \ + \ + q1 = ((JLONG)row0 + (JLONG)row4) << 13; \ + q2 = q1 + q6; \ + q1 = q1 - q6; \ + \ + /* pick up the results */ \ + tmp0 = q4; \ + tmp1 = q5; \ + tmp2 = (tmp11_plus_tmp2 - tmp11_minus_tmp2) / 2; \ + tmp3 = q7; \ + tmp10 = q2; \ + tmp11 = (tmp11_plus_tmp2 + tmp11_minus_tmp2) / 2; \ + tmp12 = q3; \ + tmp13 = q1; \ } -#define XFIX_0_899976223 d0[0] -#define XFIX_0_541196100 d0[1] -#define XFIX_2_562915447 d0[2] -#define XFIX_0_298631336_MINUS_0_899976223 d0[3] -#define XFIX_1_501321110_MINUS_0_899976223 d1[0] -#define XFIX_2_053119869_MINUS_2_562915447 d1[1] -#define XFIX_0_541196100_PLUS_0_765366865 d1[2] -#define XFIX_1_175875602 d1[3] -#define XFIX_1_175875602_MINUS_0_390180644 d2[0] -#define XFIX_0_541196100_MINUS_1_847759065 d2[1] -#define XFIX_3_072711026_MINUS_2_562915447 d2[2] -#define XFIX_1_175875602_MINUS_1_961570560 d2[3] +#define XFIX_0_899976223 d0[0] +#define XFIX_0_541196100 d0[1] +#define XFIX_2_562915447 d0[2] +#define XFIX_0_298631336_MINUS_0_899976223 d0[3] +#define XFIX_1_501321110_MINUS_0_899976223 d1[0] +#define XFIX_2_053119869_MINUS_2_562915447 d1[1] +#define XFIX_0_541196100_PLUS_0_765366865 d1[2] +#define XFIX_1_175875602 d1[3] +#define XFIX_1_175875602_MINUS_0_390180644 d2[0] +#define XFIX_0_541196100_MINUS_1_847759065 d2[1] +#define XFIX_3_072711026_MINUS_2_562915447 d2[2] +#define XFIX_1_175875602_MINUS_1_961570560 d2[3] .balign 16 jsimd_idct_islow_neon_consts: @@ -695,10 +695,10 @@ asm_function jsimd_idct_islow_neon * per 1-D IDCT pass, totalling to 5 VQDMULH and 35 VADD/VSUB instructions. */ -#define XFIX_1_082392200 d0[0] -#define XFIX_1_414213562 d0[1] -#define XFIX_1_847759065 d0[2] -#define XFIX_2_613125930 d0[3] +#define XFIX_1_082392200 d0[0] +#define XFIX_1_414213562 d0[1] +#define XFIX_1_847759065 d0[2] +#define XFIX_2_613125930 d0[3] .balign 16 jsimd_idct_ifast_neon_consts: @@ -923,35 +923,35 @@ asm_function jsimd_idct_ifast_neon #define CONST_BITS 13 -#define FIX_0_211164243 (1730) /* FIX(0.211164243) */ -#define FIX_0_509795579 (4176) /* FIX(0.509795579) */ -#define FIX_0_601344887 (4926) /* FIX(0.601344887) */ -#define FIX_0_720959822 (5906) /* FIX(0.720959822) */ -#define FIX_0_765366865 (6270) /* FIX(0.765366865) */ -#define FIX_0_850430095 (6967) /* FIX(0.850430095) */ -#define FIX_0_899976223 (7373) /* FIX(0.899976223) */ -#define FIX_1_061594337 (8697) /* FIX(1.061594337) */ -#define FIX_1_272758580 (10426) /* FIX(1.272758580) */ -#define FIX_1_451774981 (11893) /* FIX(1.451774981) */ -#define FIX_1_847759065 (15137) /* FIX(1.847759065) */ -#define FIX_2_172734803 (17799) /* FIX(2.172734803) */ -#define FIX_2_562915447 (20995) /* FIX(2.562915447) */ -#define FIX_3_624509785 (29692) /* FIX(3.624509785) */ +#define FIX_0_211164243 (1730) /* FIX(0.211164243) */ +#define FIX_0_509795579 (4176) /* FIX(0.509795579) */ +#define FIX_0_601344887 (4926) /* FIX(0.601344887) */ +#define FIX_0_720959822 (5906) /* FIX(0.720959822) */ +#define FIX_0_765366865 (6270) /* FIX(0.765366865) */ +#define FIX_0_850430095 (6967) /* FIX(0.850430095) */ +#define FIX_0_899976223 (7373) /* FIX(0.899976223) */ +#define FIX_1_061594337 (8697) /* FIX(1.061594337) */ +#define FIX_1_272758580 (10426) /* FIX(1.272758580) */ +#define FIX_1_451774981 (11893) /* FIX(1.451774981) */ +#define FIX_1_847759065 (15137) /* FIX(1.847759065) */ +#define FIX_2_172734803 (17799) /* FIX(2.172734803) */ +#define FIX_2_562915447 (20995) /* FIX(2.562915447) */ +#define FIX_3_624509785 (29692) /* FIX(3.624509785) */ .balign 16 jsimd_idct_4x4_neon_consts: - .short FIX_1_847759065 /* d0[0] */ - .short -FIX_0_765366865 /* d0[1] */ - .short -FIX_0_211164243 /* d0[2] */ - .short FIX_1_451774981 /* d0[3] */ - .short -FIX_2_172734803 /* d1[0] */ - .short FIX_1_061594337 /* d1[1] */ - .short -FIX_0_509795579 /* d1[2] */ - .short -FIX_0_601344887 /* d1[3] */ - .short FIX_0_899976223 /* d2[0] */ - .short FIX_2_562915447 /* d2[1] */ - .short 1 << (CONST_BITS+1) /* d2[2] */ - .short 0 /* d2[3] */ + .short FIX_1_847759065 /* d0[0] */ + .short -FIX_0_765366865 /* d0[1] */ + .short -FIX_0_211164243 /* d0[2] */ + .short FIX_1_451774981 /* d0[3] */ + .short -FIX_2_172734803 /* d1[0] */ + .short FIX_1_061594337 /* d1[1] */ + .short -FIX_0_509795579 /* d1[2] */ + .short -FIX_0_601344887 /* d1[3] */ + .short FIX_0_899976223 /* d2[0] */ + .short FIX_2_562915447 /* d2[1] */ + .short 1 << (CONST_BITS + 1) /* d2[2] */ + .short 0 /* d2[3] */ .macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29 vmull.s16 q14, \x4, d2[2] @@ -1994,10 +1994,10 @@ asm_function jsimd_convsamp_neon * rid of a bunch of VLD1.16 instructions */ -#define XFIX_0_382683433 d0[0] -#define XFIX_0_541196100 d0[1] -#define XFIX_0_707106781 d0[2] -#define XFIX_1_306562965 d0[3] +#define XFIX_0_382683433 d0[0] +#define XFIX_0_541196100 d0[1] +#define XFIX_0_707106781 d0[2] +#define XFIX_1_306562965 d0[3] .balign 16 jsimd_fdct_ifast_neon_consts: @@ -2107,8 +2107,8 @@ asm_function jsimd_fdct_ifast_neon /* * GLOBAL(void) - * jsimd_quantize_neon (JCOEFPTR coef_block, DCTELEM *divisors, - * DCTELEM *workspace); + * jsimd_quantize_neon(JCOEFPTR coef_block, DCTELEM *divisors, + * DCTELEM *workspace); * * Note: the code uses 2 stage pipelining in order to improve instructions * scheduling and eliminate stalls (this provides ~15% better @@ -2208,10 +2208,10 @@ asm_function jsimd_quantize_neon /* * GLOBAL(void) - * jsimd_h2v1_fancy_upsample_neon (int max_v_samp_factor, - * JDIMENSION downsampled_width, - * JSAMPARRAY input_data, - * JSAMPARRAY *output_data_ptr); + * jsimd_h2v1_fancy_upsample_neon(int max_v_samp_factor, + * JDIMENSION downsampled_width, + * JSAMPARRAY input_data, + * JSAMPARRAY *output_data_ptr); * * Note: the use of unaligned writes is the main remaining bottleneck in * this code, which can be potentially solved to get up to tens @@ -2444,10 +2444,10 @@ asm_function jsimd_h2v1_fancy_upsample_neon /*****************************************************************************/ /* - * GLOBAL(JOCTET*) - * jsimd_huff_encode_one_block (working_state *state, JOCTET *buffer, - * JCOEFPTR block, int last_dc_val, - * c_derived_tbl *dctbl, c_derived_tbl *actbl) + * GLOBAL(JOCTET *) + * jsimd_huff_encode_one_block(working_state *state, JOCTET *buffer, + * JCOEFPTR block, int last_dc_val, + * c_derived_tbl *dctbl, c_derived_tbl *actbl) * */ @@ -2731,7 +2731,7 @@ asm_function jsimd_huff_encode_one_block_neon ldr r11, [r0, #0x8] /* r11 = put_buffer */ ldr r4, [r0, #0xc] /* r4 = put_bits */ ldrh r2, [r6, #-128] /* r2 = nbits */ - ldrh r3, [r6] /* r3 = temp2 & (((JLONG) 1)< for Cendio AB * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies). - * Copyright (C) 2009-2011, 2013-2014, 2016, D. R. Commander. - * Copyright (C) 2015-2016, Matthieu Darbois. + * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, D. R. Commander. + * Copyright (C) 2015-2016, 2018, Matthieu Darbois. * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. @@ -16,20 +16,20 @@ */ #define JPEG_INTERNALS -#include "../jinclude.h" -#include "../jpeglib.h" +#include "../../jinclude.h" +#include "../../jpeglib.h" +#include "../../jsimd.h" +#include "../../jdct.h" +#include "../../jsimddct.h" #include "../jsimd.h" -#include "../jdct.h" -#include "../jsimddct.h" -#include "jsimd.h" #include #include #include -#define JSIMD_FASTLD3 1 -#define JSIMD_FASTST3 2 -#define JSIMD_FASTTBL 4 +#define JSIMD_FASTLD3 1 +#define JSIMD_FASTST3 2 +#define JSIMD_FASTTBL 4 static unsigned int simd_support = ~0; static unsigned int simd_huffman = 1; @@ -38,12 +38,13 @@ static unsigned int simd_features = JSIMD_FASTLD3 | JSIMD_FASTST3 | #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) -#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) +#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) LOCAL(int) -check_cpuinfo (char *buffer, const char *field, char *value) +check_cpuinfo(char *buffer, const char *field, char *value) { char *p; + if (*value == 0) return 0; if (strncmp(buffer, field, strlen(field)) != 0) @@ -69,7 +70,7 @@ check_cpuinfo (char *buffer, const char *field, char *value) } LOCAL(int) -parse_proc_cpuinfo (int bufsize) +parse_proc_cpuinfo(int bufsize) { char *buffer = (char *)malloc(bufsize); FILE *fd; @@ -119,9 +120,11 @@ parse_proc_cpuinfo (int bufsize) LOCAL(void) -init_simd (void) +init_simd(void) { +#ifndef NO_GETENV char *env = NULL; +#endif #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) int bufsize = 1024; /* an initial guess for the line buffer size limit */ #endif @@ -131,7 +134,7 @@ init_simd (void) simd_support = 0; - simd_support |= JSIMD_ARM_NEON; + simd_support |= JSIMD_NEON; #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) while (!parse_proc_cpuinfo(bufsize)) { bufsize *= 2; @@ -140,10 +143,11 @@ init_simd (void) } #endif +#ifndef NO_GETENV /* Force different settings through environment variables */ env = getenv("JSIMD_FORCENEON"); if ((env != NULL) && (strcmp(env, "1") == 0)) - simd_support = JSIMD_ARM_NEON; + simd_support = JSIMD_NEON; env = getenv("JSIMD_FORCENONE"); if ((env != NULL) && (strcmp(env, "1") == 0)) simd_support = 0; @@ -160,10 +164,11 @@ init_simd (void) simd_features |= JSIMD_FASTST3; if ((env != NULL) && (strcmp(env, "0") == 0)) simd_features &= ~JSIMD_FASTST3; +#endif } GLOBAL(int) -jsimd_can_rgb_ycc (void) +jsimd_can_rgb_ycc(void) { init_simd(); @@ -175,22 +180,20 @@ jsimd_can_rgb_ycc (void) if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_rgb_gray (void) +jsimd_can_rgb_gray(void) { - init_simd(); - return 0; } GLOBAL(int) -jsimd_can_ycc_rgb (void) +jsimd_can_ycc_rgb(void) { init_simd(); @@ -202,14 +205,14 @@ jsimd_can_ycc_rgb (void) if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_ycc_rgb565 (void) +jsimd_can_ycc_rgb565(void) { init_simd(); @@ -219,124 +222,124 @@ jsimd_can_ycc_rgb565 (void) if (sizeof(JDIMENSION) != 4) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(void) -jsimd_rgb_ycc_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) -{ - void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); - - switch(cinfo->in_color_space) { - case JCS_EXT_RGB: - if (simd_features & JSIMD_FASTLD3) - neonfct=jsimd_extrgb_ycc_convert_neon; - else - neonfct=jsimd_extrgb_ycc_convert_neon_slowld3; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - neonfct=jsimd_extrgbx_ycc_convert_neon; - break; - case JCS_EXT_BGR: - if (simd_features & JSIMD_FASTLD3) - neonfct=jsimd_extbgr_ycc_convert_neon; - else - neonfct=jsimd_extbgr_ycc_convert_neon_slowld3; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - neonfct=jsimd_extbgrx_ycc_convert_neon; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - neonfct=jsimd_extxbgr_ycc_convert_neon; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - neonfct=jsimd_extxrgb_ycc_convert_neon; - break; - default: - if (simd_features & JSIMD_FASTLD3) - neonfct=jsimd_extrgb_ycc_convert_neon; - else - neonfct=jsimd_extrgb_ycc_convert_neon_slowld3; - break; +jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) +{ + void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + if (simd_features & JSIMD_FASTLD3) + neonfct = jsimd_extrgb_ycc_convert_neon; + else + neonfct = jsimd_extrgb_ycc_convert_neon_slowld3; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + neonfct = jsimd_extrgbx_ycc_convert_neon; + break; + case JCS_EXT_BGR: + if (simd_features & JSIMD_FASTLD3) + neonfct = jsimd_extbgr_ycc_convert_neon; + else + neonfct = jsimd_extbgr_ycc_convert_neon_slowld3; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + neonfct = jsimd_extbgrx_ycc_convert_neon; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + neonfct = jsimd_extxbgr_ycc_convert_neon; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + neonfct = jsimd_extxrgb_ycc_convert_neon; + break; + default: + if (simd_features & JSIMD_FASTLD3) + neonfct = jsimd_extrgb_ycc_convert_neon; + else + neonfct = jsimd_extrgb_ycc_convert_neon_slowld3; + break; } neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); } GLOBAL(void) -jsimd_rgb_gray_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) +jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) { } GLOBAL(void) -jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) -{ - void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); - - switch(cinfo->out_color_space) { - case JCS_EXT_RGB: - if (simd_features & JSIMD_FASTST3) - neonfct=jsimd_ycc_extrgb_convert_neon; - else - neonfct=jsimd_ycc_extrgb_convert_neon_slowst3; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - neonfct=jsimd_ycc_extrgbx_convert_neon; - break; - case JCS_EXT_BGR: - if (simd_features & JSIMD_FASTST3) - neonfct=jsimd_ycc_extbgr_convert_neon; - else - neonfct=jsimd_ycc_extbgr_convert_neon_slowst3; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - neonfct=jsimd_ycc_extbgrx_convert_neon; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - neonfct=jsimd_ycc_extxbgr_convert_neon; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - neonfct=jsimd_ycc_extxrgb_convert_neon; - break; - default: - if (simd_features & JSIMD_FASTST3) - neonfct=jsimd_ycc_extrgb_convert_neon; - else - neonfct=jsimd_ycc_extrgb_convert_neon_slowst3; - break; +jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) +{ + void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + if (simd_features & JSIMD_FASTST3) + neonfct = jsimd_ycc_extrgb_convert_neon; + else + neonfct = jsimd_ycc_extrgb_convert_neon_slowst3; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + neonfct = jsimd_ycc_extrgbx_convert_neon; + break; + case JCS_EXT_BGR: + if (simd_features & JSIMD_FASTST3) + neonfct = jsimd_ycc_extbgr_convert_neon; + else + neonfct = jsimd_ycc_extbgr_convert_neon_slowst3; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + neonfct = jsimd_ycc_extbgrx_convert_neon; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + neonfct = jsimd_ycc_extxbgr_convert_neon; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + neonfct = jsimd_ycc_extxrgb_convert_neon; + break; + default: + if (simd_features & JSIMD_FASTST3) + neonfct = jsimd_ycc_extrgb_convert_neon; + else + neonfct = jsimd_ycc_extrgb_convert_neon_slowst3; + break; } neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); } GLOBAL(void) -jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) { jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row, output_buf, num_rows); } GLOBAL(int) -jsimd_can_h2v2_downsample (void) +jsimd_can_h2v2_downsample(void) { init_simd(); @@ -348,14 +351,14 @@ jsimd_can_h2v2_downsample (void) if (sizeof(JDIMENSION) != 4) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_h2v1_downsample (void) +jsimd_can_h2v1_downsample(void) { init_simd(); @@ -367,15 +370,15 @@ jsimd_can_h2v1_downsample (void) if (sizeof(JDIMENSION) != 4) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(void) -jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor, compptr->v_samp_factor, compptr->width_in_blocks, @@ -383,8 +386,8 @@ jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, } GLOBAL(void) -jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor, compptr->v_samp_factor, compptr->width_in_blocks, @@ -392,103 +395,79 @@ jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, } GLOBAL(int) -jsimd_can_h2v2_upsample (void) +jsimd_can_h2v2_upsample(void) { - init_simd(); - return 0; } GLOBAL(int) -jsimd_can_h2v1_upsample (void) +jsimd_can_h2v1_upsample(void) { - init_simd(); - return 0; } GLOBAL(void) -jsimd_h2v2_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { } GLOBAL(void) -jsimd_h2v1_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { } GLOBAL(int) -jsimd_can_h2v2_fancy_upsample (void) +jsimd_can_h2v2_fancy_upsample(void) { - init_simd(); - return 0; } GLOBAL(int) -jsimd_can_h2v1_fancy_upsample (void) +jsimd_can_h2v1_fancy_upsample(void) { - init_simd(); - return 0; } GLOBAL(void) -jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { } GLOBAL(void) -jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { } GLOBAL(int) -jsimd_can_h2v2_merged_upsample (void) +jsimd_can_h2v2_merged_upsample(void) { - init_simd(); - return 0; } GLOBAL(int) -jsimd_can_h2v1_merged_upsample (void) +jsimd_can_h2v1_merged_upsample(void) { - init_simd(); - return 0; } GLOBAL(void) -jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) { } GLOBAL(void) -jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) { } GLOBAL(int) -jsimd_can_convsamp (void) +jsimd_can_convsamp(void) { init_simd(); @@ -502,35 +481,33 @@ jsimd_can_convsamp (void) if (sizeof(DCTELEM) != 2) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_convsamp_float (void) +jsimd_can_convsamp_float(void) { - init_simd(); - return 0; } GLOBAL(void) -jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, - DCTELEM *workspace) +jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) { jsimd_convsamp_neon(sample_data, start_col, workspace); } GLOBAL(void) -jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, - FAST_FLOAT *workspace) +jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) { } GLOBAL(int) -jsimd_can_fdct_islow (void) +jsimd_can_fdct_islow(void) { init_simd(); @@ -540,14 +517,14 @@ jsimd_can_fdct_islow (void) if (sizeof(DCTELEM) != 2) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_fdct_ifast (void) +jsimd_can_fdct_ifast(void) { init_simd(); @@ -557,39 +534,37 @@ jsimd_can_fdct_ifast (void) if (sizeof(DCTELEM) != 2) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_fdct_float (void) +jsimd_can_fdct_float(void) { - init_simd(); - return 0; } GLOBAL(void) -jsimd_fdct_islow (DCTELEM *data) +jsimd_fdct_islow(DCTELEM *data) { jsimd_fdct_islow_neon(data); } GLOBAL(void) -jsimd_fdct_ifast (DCTELEM *data) +jsimd_fdct_ifast(DCTELEM *data) { jsimd_fdct_ifast_neon(data); } GLOBAL(void) -jsimd_fdct_float (FAST_FLOAT *data) +jsimd_fdct_float(FAST_FLOAT *data) { } GLOBAL(int) -jsimd_can_quantize (void) +jsimd_can_quantize(void) { init_simd(); @@ -601,35 +576,32 @@ jsimd_can_quantize (void) if (sizeof(DCTELEM) != 2) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_quantize_float (void) +jsimd_can_quantize_float(void) { - init_simd(); - return 0; } GLOBAL(void) -jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, - DCTELEM *workspace) +jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) { jsimd_quantize_neon(coef_block, divisors, workspace); } GLOBAL(void) -jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, - FAST_FLOAT *workspace) +jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) { } GLOBAL(int) -jsimd_can_idct_2x2 (void) +jsimd_can_idct_2x2(void) { init_simd(); @@ -645,14 +617,14 @@ jsimd_can_idct_2x2 (void) if (sizeof(ISLOW_MULT_TYPE) != 2) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_idct_4x4 (void) +jsimd_can_idct_4x4(void) { init_simd(); @@ -668,32 +640,30 @@ jsimd_can_idct_4x4 (void) if (sizeof(ISLOW_MULT_TYPE) != 2) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(void) -jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { - jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, - output_col); + jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col); } GLOBAL(void) -jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { - jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, - output_col); + jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col); } GLOBAL(int) -jsimd_can_idct_islow (void) +jsimd_can_idct_islow(void) { init_simd(); @@ -709,14 +679,14 @@ jsimd_can_idct_islow (void) if (sizeof(ISLOW_MULT_TYPE) != 2) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_idct_ifast (void) +jsimd_can_idct_ifast(void) { init_simd(); @@ -734,47 +704,45 @@ jsimd_can_idct_ifast (void) if (IFAST_SCALE_BITS != 2) return 0; - if (simd_support & JSIMD_ARM_NEON) + if (simd_support & JSIMD_NEON) return 1; return 0; } GLOBAL(int) -jsimd_can_idct_float (void) +jsimd_can_idct_float(void) { - init_simd(); - return 0; } GLOBAL(void) -jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, output_col); } GLOBAL(void) -jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, output_col); } GLOBAL(void) -jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { } GLOBAL(int) -jsimd_can_huff_encode_one_block (void) +jsimd_can_huff_encode_one_block(void) { init_simd(); @@ -783,16 +751,16 @@ jsimd_can_huff_encode_one_block (void) if (sizeof(JCOEF) != 2) return 0; - if (simd_support & JSIMD_ARM_NEON && simd_huffman) + if (simd_support & JSIMD_NEON && simd_huffman) return 1; return 0; } -GLOBAL(JOCTET*) -jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, - int last_dc_val, c_derived_tbl *dctbl, - c_derived_tbl *actbl) +GLOBAL(JOCTET *) +jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) { if (simd_features & JSIMD_FASTTBL) return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val, @@ -801,3 +769,30 @@ jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, return jsimd_huff_encode_one_block_neon_slowtbl(state, buffer, block, last_dc_val, dctbl, actbl); } + +GLOBAL(int) +jsimd_can_encode_mcu_AC_first_prepare(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *values, size_t *zerobits) +{ +} + +GLOBAL(int) +jsimd_can_encode_mcu_AC_refine_prepare(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *absvalues, size_t *bits) +{ + return 0; +} diff --git a/simd/jsimd_arm64_neon.S b/simd/arm64/jsimd_neon.S similarity index 92% rename from simd/jsimd_arm64_neon.S rename to simd/arm64/jsimd_neon.S index 3309858..93472ef 100644 --- a/simd/jsimd_arm64_neon.S +++ b/simd/arm64/jsimd_neon.S @@ -2,12 +2,12 @@ * ARMv8 NEON optimizations for libjpeg-turbo * * Copyright (C) 2009-2011, Nokia Corporation and/or its subsidiary(-ies). - * All Rights Reserved. - * Author: Siarhei Siamashka + * All Rights Reserved. + * Author: Siarhei Siamashka * Copyright (C) 2013-2014, Linaro Limited. All Rights Reserved. - * Author: Ragesh Radhakrishnan + * Author: Ragesh Radhakrishnan * Copyright (C) 2014-2016, D. R. Commander. All Rights Reserved. - * Copyright (C) 2015-2016, Matthieu Darbois. All Rights Reserved. + * Copyright (C) 2015-2016, 2018, Matthieu Darbois. All Rights Reserved. * Copyright (C) 2016, Siarhei Siamashka. All Rights Reserved. * * This software is provided 'as-is', without any express or implied @@ -34,7 +34,7 @@ .text -#define RESPECT_STRICT_ALIGNMENT 1 +#define RESPECT_STRICT_ALIGNMENT 1 /*****************************************************************************/ @@ -42,6 +42,7 @@ /* Supplementary macro for setting function attributes */ .macro asm_function fname #ifdef __APPLE__ + .private_extern _\fname .globl _\fname _\fname: #else @@ -62,7 +63,7 @@ _\fname: trn2 \x1\literal, \xi\literal, \x1\literal .endm -/* Transpose elements of 2 differnet registers */ +/* Transpose elements of 2 different registers */ .macro transpose x0, x1, xi, xilen, literal mov \xi\xilen, \x0\xilen trn1 \x0\literal, \x0\literal, \x1\literal @@ -123,7 +124,7 @@ _\fname: .endm -#define CENTERJSAMPLE 128 +#define CENTERJSAMPLE 128 /*****************************************************************************/ @@ -131,25 +132,25 @@ _\fname: * Perform dequantization and inverse DCT on one block of coefficients. * * GLOBAL(void) - * jsimd_idct_islow_neon (void *dct_table, JCOEFPTR coef_block, - * JSAMPARRAY output_buf, JDIMENSION output_col) + * jsimd_idct_islow_neon(void *dct_table, JCOEFPTR coef_block, + * JSAMPARRAY output_buf, JDIMENSION output_col) */ -#define CONST_BITS 13 -#define PASS1_BITS 2 - -#define F_0_298 2446 /* FIX(0.298631336) */ -#define F_0_390 3196 /* FIX(0.390180644) */ -#define F_0_541 4433 /* FIX(0.541196100) */ -#define F_0_765 6270 /* FIX(0.765366865) */ -#define F_0_899 7373 /* FIX(0.899976223) */ -#define F_1_175 9633 /* FIX(1.175875602) */ -#define F_1_501 12299 /* FIX(1.501321110) */ -#define F_1_847 15137 /* FIX(1.847759065) */ -#define F_1_961 16069 /* FIX(1.961570560) */ -#define F_2_053 16819 /* FIX(2.053119869) */ -#define F_2_562 20995 /* FIX(2.562915447) */ -#define F_3_072 25172 /* FIX(3.072711026) */ +#define CONST_BITS 13 +#define PASS1_BITS 2 + +#define F_0_298 2446 /* FIX(0.298631336) */ +#define F_0_390 3196 /* FIX(0.390180644) */ +#define F_0_541 4433 /* FIX(0.541196100) */ +#define F_0_765 6270 /* FIX(0.765366865) */ +#define F_0_899 7373 /* FIX(0.899976223) */ +#define F_1_175 9633 /* FIX(1.175875602) */ +#define F_1_501 12299 /* FIX(1.501321110) */ +#define F_1_847 15137 /* FIX(1.847759065) */ +#define F_1_961 16069 /* FIX(1.961570560) */ +#define F_2_053 16819 /* FIX(2.053119869) */ +#define F_2_562 20995 /* FIX(2.562915447) */ +#define F_3_072 25172 /* FIX(3.072711026) */ .balign 16 Ljsimd_idct_islow_neon_consts: @@ -183,18 +184,18 @@ Ljsimd_idct_islow_neon_consts: #undef F_2_562 #undef F_3_072 -#define XFIX_P_0_298 v0.h[0] -#define XFIX_N_0_390 v0.h[1] -#define XFIX_P_0_541 v0.h[2] -#define XFIX_P_0_765 v0.h[3] -#define XFIX_N_0_899 v0.h[4] -#define XFIX_P_1_175 v0.h[5] -#define XFIX_P_1_501 v0.h[6] -#define XFIX_N_1_847 v0.h[7] -#define XFIX_N_1_961 v1.h[0] -#define XFIX_P_2_053 v1.h[1] -#define XFIX_N_2_562 v1.h[2] -#define XFIX_P_3_072 v1.h[3] +#define XFIX_P_0_298 v0.h[0] +#define XFIX_N_0_390 v0.h[1] +#define XFIX_P_0_541 v0.h[2] +#define XFIX_P_0_765 v0.h[3] +#define XFIX_N_0_899 v0.h[4] +#define XFIX_P_1_175 v0.h[5] +#define XFIX_P_1_501 v0.h[6] +#define XFIX_N_1_847 v0.h[7] +#define XFIX_N_1_961 v1.h[0] +#define XFIX_P_2_053 v1.h[1] +#define XFIX_N_2_562 v1.h[2] +#define XFIX_P_3_072 v1.h[3] asm_function jsimd_idct_islow_neon DCT_TABLE .req x0 @@ -292,8 +293,8 @@ asm_function jsimd_idct_islow_neon sshll2 v23.4s, v22.8h, #(CONST_BITS) /* tmp0h tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); */ mov v21.16b, v19.16b /* tmp3 = z1 */ mov v20.16b, v18.16b /* tmp3 = z1 */ - smlal2 v19.4s, v8.8h, XFIX_N_1_847 /* tmp2h tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ - smlal v18.4s, v8.4h, XFIX_N_1_847 /* tmp2l tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ + smlal2 v19.4s, v8.8h, XFIX_N_1_847 /* tmp2h tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065); */ + smlal v18.4s, v8.4h, XFIX_N_1_847 /* tmp2l tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065); */ sshll2 v27.4s, v26.8h, #(CONST_BITS) /* tmp1h tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); */ smlal2 v21.4s, v4.8h, XFIX_P_0_765 /* tmp3h tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ smlal v20.4s, v4.4h, XFIX_P_0_765 /* tmp3l tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ @@ -323,20 +324,20 @@ asm_function jsimd_idct_islow_neon smull2 v15.4s, v5.8h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ smull2 v17.4s, v3.8h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ smull2 v27.4s, v26.8h, XFIX_P_1_175 /* z5h z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ - smull2 v23.4s, v22.8h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ - smull2 v25.4s, v24.8h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ - smull2 v19.4s, v18.8h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ - smull2 v21.4s, v20.8h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + smull2 v23.4s, v22.8h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, -FIX_1_961570560) */ + smull2 v25.4s, v24.8h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, -FIX_0_390180644) */ + smull2 v19.4s, v18.8h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, -FIX_0_899976223) */ + smull2 v21.4s, v20.8h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, -FIX_2_562915447) */ smull v10.4s, v9.4h, XFIX_P_0_298 /* tmp0 = MULTIPLY(tmp0, FIX_0_298631336) */ smull v12.4s, v7.4h, XFIX_P_2_053 /* tmp1 = MULTIPLY(tmp1, FIX_2_053119869) */ smull v14.4s, v5.4h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ smull v16.4s, v3.4h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ smull v26.4s, v26.4h, XFIX_P_1_175 /* z5l z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ - smull v22.4s, v22.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ - smull v24.4s, v24.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ - smull v18.4s, v18.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ - smull v20.4s, v20.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + smull v22.4s, v22.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, -FIX_1_961570560) */ + smull v24.4s, v24.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, -FIX_0_390180644) */ + smull v18.4s, v18.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, -FIX_0_899976223) */ + smull v20.4s, v20.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, -FIX_2_562915447) */ add v23.4s, v23.4s, v27.4s /* z3 += z5 */ add v22.4s, v22.4s, v26.4s /* z3 += z5 */ @@ -380,22 +381,22 @@ asm_function jsimd_idct_islow_neon sub v16.4s, v6.4s, v10.4s /* tmp13 - tmp0 */ sub v17.4s, v31.4s, v11.4s /* tmp13 - tmp0 */ - shrn v2.4h, v18.4s, #16 /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) */ - shrn v9.4h, v20.4s, #16 /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) */ - shrn v3.4h, v22.4s, #16 /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) */ - shrn v8.4h, v24.4s, #16 /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) */ - shrn v4.4h, v26.4s, #16 /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) */ - shrn v7.4h, v28.4s, #16 /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) */ - shrn v5.4h, v14.4s, #16 /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) */ - shrn v6.4h, v16.4s, #16 /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) */ - shrn2 v2.8h, v19.4s, #16 /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) */ - shrn2 v9.8h, v21.4s, #16 /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) */ - shrn2 v3.8h, v23.4s, #16 /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) */ - shrn2 v8.8h, v25.4s, #16 /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) */ - shrn2 v4.8h, v27.4s, #16 /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) */ - shrn2 v7.8h, v29.4s, #16 /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) */ - shrn2 v5.8h, v15.4s, #16 /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) */ - shrn2 v6.8h, v17.4s, #16 /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) */ + shrn v2.4h, v18.4s, #16 /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) */ + shrn v9.4h, v20.4s, #16 /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) */ + shrn v3.4h, v22.4s, #16 /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) */ + shrn v8.4h, v24.4s, #16 /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) */ + shrn v4.4h, v26.4s, #16 /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) */ + shrn v7.4h, v28.4s, #16 /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) */ + shrn v5.4h, v14.4s, #16 /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) */ + shrn v6.4h, v16.4s, #16 /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) */ + shrn2 v2.8h, v19.4s, #16 /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) */ + shrn2 v9.8h, v21.4s, #16 /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) */ + shrn2 v3.8h, v23.4s, #16 /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) */ + shrn2 v8.8h, v25.4s, #16 /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) */ + shrn2 v4.8h, v27.4s, #16 /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) */ + shrn2 v7.8h, v29.4s, #16 /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) */ + shrn2 v5.8h, v15.4s, #16 /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) */ + shrn2 v6.8h, v17.4s, #16 /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) */ movi v0.16b, #(CENTERJSAMPLE) /* Prepare pointers (dual-issue with NEON instructions) */ ldp TMP1, TMP2, [OUTPUT_BUF], 16 @@ -474,7 +475,7 @@ asm_function jsimd_idct_islow_neon sshll v22.4s, v22.4h, #(CONST_BITS) /* tmp0l tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); */ mov v20.16b, v18.16b /* tmp3 = z1 */ sshll v26.4s, v26.4h, #(CONST_BITS) /* tmp1l tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); */ - smlal v18.4s, v8.4h, XFIX_N_1_847 /* tmp2l tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ + smlal v18.4s, v8.4h, XFIX_N_1_847 /* tmp2l tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065); */ smlal v20.4s, v4.4h, XFIX_P_0_765 /* tmp3l tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ add v2.4s, v22.4s, v20.4s /* tmp10l tmp10 = tmp0 + tmp3; */ sub v6.4s, v22.4s, v20.4s /* tmp13l tmp13 = tmp0 - tmp3; */ @@ -496,10 +497,10 @@ asm_function jsimd_idct_islow_neon smull v14.4s, v5.4h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ smull v16.4s, v3.4h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ smull v26.4s, v26.4h, XFIX_P_1_175 /* z5l z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ - smull v22.4s, v22.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ - smull v24.4s, v24.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ - smull v18.4s, v18.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ - smull v20.4s, v20.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + smull v22.4s, v22.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, -FIX_1_961570560) */ + smull v24.4s, v24.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, -FIX_0_390180644) */ + smull v18.4s, v18.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, -FIX_0_899976223) */ + smull v20.4s, v20.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, -FIX_2_562915447) */ add v22.4s, v22.4s, v26.4s /* z3 += z5 */ add v24.4s, v24.4s, v26.4s /* z4 += z5 */ @@ -525,14 +526,14 @@ asm_function jsimd_idct_islow_neon add v14.4s, v6.4s, v10.4s /* tmp13 + tmp0 */ sub v16.4s, v6.4s, v10.4s /* tmp13 - tmp0 */ - rshrn v2.4h, v18.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */ - rshrn v3.4h, v22.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */ - rshrn v4.4h, v26.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */ - rshrn v5.4h, v14.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */ - rshrn2 v2.8h, v16.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */ - rshrn2 v3.8h, v28.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */ - rshrn2 v4.8h, v24.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */ - rshrn2 v5.8h, v20.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */ + rshrn v2.4h, v18.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */ + rshrn v3.4h, v22.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */ + rshrn v4.4h, v26.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */ + rshrn v5.4h, v14.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v2.8h, v16.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v3.8h, v28.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */ + rshrn2 v4.8h, v24.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */ + rshrn2 v5.8h, v20.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */ mov v6.16b, v15.16b mov v7.16b, v15.16b mov v8.16b, v15.16b @@ -551,7 +552,7 @@ asm_function jsimd_idct_islow_neon sub v26.8h, v2.8h, v6.8h /* z2 - z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) - DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) */ sshll2 v23.4s, v22.8h, #(CONST_BITS) /* tmp0h tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); */ mov v21.16b, v19.16b /* tmp3 = z1 */ - smlal2 v19.4s, v8.8h, XFIX_N_1_847 /* tmp2h tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ + smlal2 v19.4s, v8.8h, XFIX_N_1_847 /* tmp2h tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065); */ sshll2 v27.4s, v26.8h, #(CONST_BITS) /* tmp1h tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); */ smlal2 v21.4s, v4.8h, XFIX_P_0_765 /* tmp3h tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ add v28.4s, v23.4s, v21.4s /* tmp10h tmp10 = tmp0 + tmp3; */ @@ -574,10 +575,10 @@ asm_function jsimd_idct_islow_neon smull2 v15.4s, v5.8h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ smull2 v17.4s, v3.8h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ smull2 v27.4s, v26.8h, XFIX_P_1_175 /* z5h z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ - smull2 v23.4s, v22.8h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ - smull2 v25.4s, v24.8h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ - smull2 v19.4s, v18.8h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ - smull2 v21.4s, v20.8h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + smull2 v23.4s, v22.8h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, -FIX_1_961570560) */ + smull2 v25.4s, v24.8h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, -FIX_0_390180644) */ + smull2 v19.4s, v18.8h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, -FIX_0_899976223) */ + smull2 v21.4s, v20.8h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, -FIX_2_562915447) */ add v23.4s, v23.4s, v27.4s /* z3 += z5 */ add v22.4s, v22.4s, v26.4s /* z3 += z5 */ @@ -609,14 +610,14 @@ asm_function jsimd_idct_islow_neon mov v3.16b, v14.16b mov v4.16b, v14.16b mov v5.16b, v14.16b - rshrn v6.4h, v19.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */ - rshrn v7.4h, v23.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */ - rshrn v8.4h, v27.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */ - rshrn v9.4h, v15.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */ - rshrn2 v6.8h, v17.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */ - rshrn2 v7.8h, v29.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */ - rshrn2 v8.8h, v25.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */ - rshrn2 v9.8h, v21.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */ + rshrn v6.4h, v19.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */ + rshrn v7.4h, v23.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */ + rshrn v8.4h, v27.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */ + rshrn v9.4h, v15.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v6.8h, v17.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v7.8h, v29.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */ + rshrn2 v8.8h, v25.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */ + rshrn2 v9.8h, v21.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */ b 1b .balign 16 @@ -631,8 +632,8 @@ asm_function jsimd_idct_islow_neon sshll2 v23.4s, v22.8h, #(CONST_BITS) /* tmp0h tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS); */ mov v21.16b, v19.16b /* tmp3 = z1 */ mov v20.16b, v18.16b /* tmp3 = z1 */ - smlal2 v19.4s, v8.8h, XFIX_N_1_847 /* tmp2h tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ - smlal v18.4s, v8.4h, XFIX_N_1_847 /* tmp2l tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); */ + smlal2 v19.4s, v8.8h, XFIX_N_1_847 /* tmp2h tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065); */ + smlal v18.4s, v8.4h, XFIX_N_1_847 /* tmp2l tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065); */ sshll2 v27.4s, v26.8h, #(CONST_BITS) /* tmp1h tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS); */ smlal2 v21.4s, v4.8h, XFIX_P_0_765 /* tmp3h tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ smlal v20.4s, v4.4h, XFIX_P_0_765 /* tmp3l tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); */ @@ -662,20 +663,20 @@ asm_function jsimd_idct_islow_neon smull2 v15.4s, v5.8h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ smull2 v17.4s, v3.8h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ smull2 v27.4s, v26.8h, XFIX_P_1_175 /* z5h z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ - smull2 v23.4s, v22.8h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ - smull2 v25.4s, v24.8h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ - smull2 v19.4s, v18.8h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ - smull2 v21.4s, v20.8h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + smull2 v23.4s, v22.8h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, -FIX_1_961570560) */ + smull2 v25.4s, v24.8h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, -FIX_0_390180644) */ + smull2 v19.4s, v18.8h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, -FIX_0_899976223) */ + smull2 v21.4s, v20.8h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, -FIX_2_562915447) */ smull v10.4s, v9.4h, XFIX_P_0_298 /* tmp0 = MULTIPLY(tmp0, FIX_0_298631336) */ smull v12.4s, v7.4h, XFIX_P_2_053 /* tmp1 = MULTIPLY(tmp1, FIX_2_053119869) */ smull v14.4s, v5.4h, XFIX_P_3_072 /* tmp2 = MULTIPLY(tmp2, FIX_3_072711026) */ smull v16.4s, v3.4h, XFIX_P_1_501 /* tmp3 = MULTIPLY(tmp3, FIX_1_501321110) */ smull v26.4s, v26.4h, XFIX_P_1_175 /* z5l z5 = MULTIPLY(z3 + z4, FIX_1_175875602) */ - smull v22.4s, v22.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560) */ - smull v24.4s, v24.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644) */ - smull v18.4s, v18.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223) */ - smull v20.4s, v20.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447) */ + smull v22.4s, v22.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, -FIX_1_961570560) */ + smull v24.4s, v24.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, -FIX_0_390180644) */ + smull v18.4s, v18.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, -FIX_0_899976223) */ + smull v20.4s, v20.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, -FIX_2_562915447) */ add v23.4s, v23.4s, v27.4s /* z3 += z5 */ add v22.4s, v22.4s, v26.4s /* z3 += z5 */ @@ -719,22 +720,22 @@ asm_function jsimd_idct_islow_neon sub v16.4s, v6.4s, v10.4s /* tmp13 - tmp0 */ sub v17.4s, v31.4s, v11.4s /* tmp13 - tmp0 */ - rshrn v2.4h, v18.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */ - rshrn v3.4h, v22.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */ - rshrn v4.4h, v26.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */ - rshrn v5.4h, v14.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */ - rshrn v6.4h, v19.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */ - rshrn v7.4h, v23.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */ - rshrn v8.4h, v27.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */ - rshrn v9.4h, v15.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */ - rshrn2 v2.8h, v16.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */ - rshrn2 v3.8h, v28.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */ - rshrn2 v4.8h, v24.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */ - rshrn2 v5.8h, v20.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */ - rshrn2 v6.8h, v17.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */ - rshrn2 v7.8h, v29.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */ - rshrn2 v8.8h, v25.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */ - rshrn2 v9.8h, v21.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */ + rshrn v2.4h, v18.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */ + rshrn v3.4h, v22.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */ + rshrn v4.4h, v26.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */ + rshrn v5.4h, v14.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */ + rshrn v6.4h, v19.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) */ + rshrn v7.4h, v23.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) */ + rshrn v8.4h, v27.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) */ + rshrn v9.4h, v15.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v2.8h, v16.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v3.8h, v28.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */ + rshrn2 v4.8h, v24.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */ + rshrn2 v5.8h, v20.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */ + rshrn2 v6.8h, v17.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) */ + rshrn2 v7.8h, v29.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) */ + rshrn2 v8.8h, v25.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) */ + rshrn2 v9.8h, v21.4s, #(CONST_BITS-PASS1_BITS) /* wsptr[DCTSIZE*7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) */ b 1b .unreq DCT_TABLE @@ -785,10 +786,10 @@ asm_function jsimd_idct_islow_neon * per 1-D IDCT pass, totalling to 5 VQDMULH and 35 VADD/VSUB instructions. */ -#define XFIX_1_082392200 v0.h[0] -#define XFIX_1_414213562 v0.h[1] -#define XFIX_1_847759065 v0.h[2] -#define XFIX_2_613125930 v0.h[3] +#define XFIX_1_082392200 v0.h[0] +#define XFIX_1_414213562 v0.h[1] +#define XFIX_1_847759065 v0.h[2] +#define XFIX_2_613125930 v0.h[3] .balign 16 Ljsimd_idct_ifast_neon_consts: @@ -1024,35 +1025,35 @@ asm_function jsimd_idct_ifast_neon #define CONST_BITS 13 -#define FIX_0_211164243 (1730) /* FIX(0.211164243) */ -#define FIX_0_509795579 (4176) /* FIX(0.509795579) */ -#define FIX_0_601344887 (4926) /* FIX(0.601344887) */ -#define FIX_0_720959822 (5906) /* FIX(0.720959822) */ -#define FIX_0_765366865 (6270) /* FIX(0.765366865) */ -#define FIX_0_850430095 (6967) /* FIX(0.850430095) */ -#define FIX_0_899976223 (7373) /* FIX(0.899976223) */ -#define FIX_1_061594337 (8697) /* FIX(1.061594337) */ -#define FIX_1_272758580 (10426) /* FIX(1.272758580) */ -#define FIX_1_451774981 (11893) /* FIX(1.451774981) */ -#define FIX_1_847759065 (15137) /* FIX(1.847759065) */ -#define FIX_2_172734803 (17799) /* FIX(2.172734803) */ -#define FIX_2_562915447 (20995) /* FIX(2.562915447) */ -#define FIX_3_624509785 (29692) /* FIX(3.624509785) */ +#define FIX_0_211164243 (1730) /* FIX(0.211164243) */ +#define FIX_0_509795579 (4176) /* FIX(0.509795579) */ +#define FIX_0_601344887 (4926) /* FIX(0.601344887) */ +#define FIX_0_720959822 (5906) /* FIX(0.720959822) */ +#define FIX_0_765366865 (6270) /* FIX(0.765366865) */ +#define FIX_0_850430095 (6967) /* FIX(0.850430095) */ +#define FIX_0_899976223 (7373) /* FIX(0.899976223) */ +#define FIX_1_061594337 (8697) /* FIX(1.061594337) */ +#define FIX_1_272758580 (10426) /* FIX(1.272758580) */ +#define FIX_1_451774981 (11893) /* FIX(1.451774981) */ +#define FIX_1_847759065 (15137) /* FIX(1.847759065) */ +#define FIX_2_172734803 (17799) /* FIX(2.172734803) */ +#define FIX_2_562915447 (20995) /* FIX(2.562915447) */ +#define FIX_3_624509785 (29692) /* FIX(3.624509785) */ .balign 16 Ljsimd_idct_4x4_neon_consts: - .short FIX_1_847759065 /* v0.h[0] */ - .short -FIX_0_765366865 /* v0.h[1] */ - .short -FIX_0_211164243 /* v0.h[2] */ - .short FIX_1_451774981 /* v0.h[3] */ - .short -FIX_2_172734803 /* d1[0] */ - .short FIX_1_061594337 /* d1[1] */ - .short -FIX_0_509795579 /* d1[2] */ - .short -FIX_0_601344887 /* d1[3] */ - .short FIX_0_899976223 /* v2.h[0] */ - .short FIX_2_562915447 /* v2.h[1] */ - .short 1 << (CONST_BITS+1) /* v2.h[2] */ - .short 0 /* v2.h[3] */ + .short FIX_1_847759065 /* v0.h[0] */ + .short -FIX_0_765366865 /* v0.h[1] */ + .short -FIX_0_211164243 /* v0.h[2] */ + .short FIX_1_451774981 /* v0.h[3] */ + .short -FIX_2_172734803 /* d1[0] */ + .short FIX_1_061594337 /* d1[1] */ + .short -FIX_0_509795579 /* d1[2] */ + .short -FIX_0_601344887 /* d1[3] */ + .short FIX_0_899976223 /* v2.h[0] */ + .short FIX_2_562915447 /* v2.h[1] */ + .short 1 << (CONST_BITS + 1) /* v2.h[2] */ + .short 0 /* v2.h[3] */ .macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29 smull v28.4s, \x4, v2.h[2] @@ -1543,7 +1544,7 @@ asm_function jsimd_idct_2x2_neon .else .error unsupported macroblock size .endif - .elseif \bpp==16 + .elseif \bpp == 16 .if \size == 8 st1 {v25.8h}, [RGB], 16 .elseif \size == 4 @@ -2234,24 +2235,24 @@ asm_function jsimd_convsamp_neon * rid of a bunch of VLD1.16 instructions */ -#define CONST_BITS 13 -#define PASS1_BITS 2 - -#define DESCALE_P1 (CONST_BITS-PASS1_BITS) -#define DESCALE_P2 (CONST_BITS+PASS1_BITS) - -#define F_0_298 2446 /* FIX(0.298631336) */ -#define F_0_390 3196 /* FIX(0.390180644) */ -#define F_0_541 4433 /* FIX(0.541196100) */ -#define F_0_765 6270 /* FIX(0.765366865) */ -#define F_0_899 7373 /* FIX(0.899976223) */ -#define F_1_175 9633 /* FIX(1.175875602) */ -#define F_1_501 12299 /* FIX(1.501321110) */ -#define F_1_847 15137 /* FIX(1.847759065) */ -#define F_1_961 16069 /* FIX(1.961570560) */ -#define F_2_053 16819 /* FIX(2.053119869) */ -#define F_2_562 20995 /* FIX(2.562915447) */ -#define F_3_072 25172 /* FIX(3.072711026) */ +#define CONST_BITS 13 +#define PASS1_BITS 2 + +#define DESCALE_P1 (CONST_BITS - PASS1_BITS) +#define DESCALE_P2 (CONST_BITS + PASS1_BITS) + +#define F_0_298 2446 /* FIX(0.298631336) */ +#define F_0_390 3196 /* FIX(0.390180644) */ +#define F_0_541 4433 /* FIX(0.541196100) */ +#define F_0_765 6270 /* FIX(0.765366865) */ +#define F_0_899 7373 /* FIX(0.899976223) */ +#define F_1_175 9633 /* FIX(1.175875602) */ +#define F_1_501 12299 /* FIX(1.501321110) */ +#define F_1_847 15137 /* FIX(1.847759065) */ +#define F_1_961 16069 /* FIX(1.961570560) */ +#define F_2_053 16819 /* FIX(2.053119869) */ +#define F_2_562 20995 /* FIX(2.562915447) */ +#define F_3_072 25172 /* FIX(3.072711026) */ .balign 16 Ljsimd_fdct_islow_neon_consts: @@ -2284,18 +2285,18 @@ Ljsimd_fdct_islow_neon_consts: #undef F_2_053 #undef F_2_562 #undef F_3_072 -#define XFIX_P_0_298 v0.h[0] -#define XFIX_N_0_390 v0.h[1] -#define XFIX_P_0_541 v0.h[2] -#define XFIX_P_0_765 v0.h[3] -#define XFIX_N_0_899 v0.h[4] -#define XFIX_P_1_175 v0.h[5] -#define XFIX_P_1_501 v0.h[6] -#define XFIX_N_1_847 v0.h[7] -#define XFIX_N_1_961 v1.h[0] -#define XFIX_P_2_053 v1.h[1] -#define XFIX_N_2_562 v1.h[2] -#define XFIX_P_3_072 v1.h[3] +#define XFIX_P_0_298 v0.h[0] +#define XFIX_N_0_390 v0.h[1] +#define XFIX_P_0_541 v0.h[2] +#define XFIX_P_0_765 v0.h[3] +#define XFIX_N_0_899 v0.h[4] +#define XFIX_P_1_175 v0.h[5] +#define XFIX_P_1_501 v0.h[6] +#define XFIX_N_1_847 v0.h[7] +#define XFIX_N_1_961 v1.h[0] +#define XFIX_P_2_053 v1.h[1] +#define XFIX_N_2_562 v1.h[2] +#define XFIX_P_3_072 v1.h[3] asm_function jsimd_fdct_islow_neon @@ -2353,8 +2354,8 @@ asm_function jsimd_fdct_islow_neon add v18.8h, v11.8h, v9.8h /* tmp12 + tmp13 */ - shl v16.8h, v16.8h, #PASS1_BITS /* dataptr[0] = (DCTELEM) LEFT_SHIFT(tmp10 + tmp11, PASS1_BITS); */ - shl v20.8h, v20.8h, #PASS1_BITS /* dataptr[4] = (DCTELEM) LEFT_SHIFT(tmp10 - tmp11, PASS1_BITS); */ + shl v16.8h, v16.8h, #PASS1_BITS /* dataptr[0] = (DCTELEM)LEFT_SHIFT(tmp10 + tmp11, PASS1_BITS); */ + shl v20.8h, v20.8h, #PASS1_BITS /* dataptr[4] = (DCTELEM)LEFT_SHIFT(tmp10 - tmp11, PASS1_BITS); */ smull2 v24.4s, v18.8h, XFIX_P_0_541 /* z1 hi = MULTIPLY(tmp12 + tmp13, XFIX_P_0_541); */ smull v18.4s, v18.4h, XFIX_P_0_541 /* z1 lo = MULTIPLY(tmp12 + tmp13, XFIX_P_0_541); */ @@ -2368,8 +2369,8 @@ asm_function jsimd_fdct_islow_neon rshrn v18.4h, v18.4s, #DESCALE_P1 rshrn v22.4h, v22.4s, #DESCALE_P1 - rshrn2 v18.8h, v24.4s, #DESCALE_P1 /* dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, XFIX_P_0_765), CONST_BITS-PASS1_BITS); */ - rshrn2 v22.8h, v25.4s, #DESCALE_P1 /* dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, XFIX_N_1_847), CONST_BITS-PASS1_BITS); */ + rshrn2 v18.8h, v24.4s, #DESCALE_P1 /* dataptr[2] = (DCTELEM)DESCALE(z1 + MULTIPLY(tmp13, XFIX_P_0_765), CONST_BITS-PASS1_BITS); */ + rshrn2 v22.8h, v25.4s, #DESCALE_P1 /* dataptr[6] = (DCTELEM)DESCALE(z1 + MULTIPLY(tmp12, XFIX_N_1_847), CONST_BITS-PASS1_BITS); */ /* Odd part */ @@ -2395,10 +2396,10 @@ asm_function jsimd_fdct_islow_neon smull2 v13.4s, v9.8h, XFIX_N_2_562 smull2 v14.4s, v10.8h, XFIX_N_1_961 smull2 v15.4s, v11.8h, XFIX_N_0_390 - smull v8.4s, v8.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223); */ - smull v9.4s, v9.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447); */ - smull v10.4s, v10.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560); */ - smull v11.4s, v11.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644); */ + smull v8.4s, v8.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, -FIX_0_899976223); */ + smull v9.4s, v9.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, -FIX_2_562915447); */ + smull v10.4s, v10.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, -FIX_1_961570560); */ + smull v11.4s, v11.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, -FIX_0_390180644); */ add v10.4s, v10.4s, v4.4s /* z3 += z5 */ add v14.4s, v14.4s, v5.4s @@ -2427,10 +2428,10 @@ asm_function jsimd_fdct_islow_neon rshrn v21.4h, v29.4s, #DESCALE_P1 rshrn v19.4h, v30.4s, #DESCALE_P1 rshrn v17.4h, v31.4s, #DESCALE_P1 - rshrn2 v23.8h, v24.4s, #DESCALE_P1 /* dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); */ - rshrn2 v21.8h, v25.4s, #DESCALE_P1 /* dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); */ - rshrn2 v19.8h, v26.4s, #DESCALE_P1 /* dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); */ - rshrn2 v17.8h, v27.4s, #DESCALE_P1 /* dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); */ + rshrn2 v23.8h, v24.4s, #DESCALE_P1 /* dataptr[7] = (DCTELEM)DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); */ + rshrn2 v21.8h, v25.4s, #DESCALE_P1 /* dataptr[5] = (DCTELEM)DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); */ + rshrn2 v19.8h, v26.4s, #DESCALE_P1 /* dataptr[3] = (DCTELEM)DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); */ + rshrn2 v17.8h, v27.4s, #DESCALE_P1 /* dataptr[1] = (DCTELEM)DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); */ /* Transpose */ transpose_8x8 v16, v17, v18, v19, v20, v21, v22, v23, v31, v2, v3, v4 @@ -2456,8 +2457,8 @@ asm_function jsimd_fdct_islow_neon add v18.8h, v11.8h, v9.8h /* tmp12 + tmp13 */ - srshr v16.8h, v16.8h, #PASS1_BITS /* dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); */ - srshr v20.8h, v20.8h, #PASS1_BITS /* dataptr[4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); */ + srshr v16.8h, v16.8h, #PASS1_BITS /* dataptr[0] = (DCTELEM)DESCALE(tmp10 + tmp11, PASS1_BITS); */ + srshr v20.8h, v20.8h, #PASS1_BITS /* dataptr[4] = (DCTELEM)DESCALE(tmp10 - tmp11, PASS1_BITS); */ smull2 v24.4s, v18.8h, XFIX_P_0_541 /* z1 hi = MULTIPLY(tmp12 + tmp13, XFIX_P_0_541); */ smull v18.4s, v18.4h, XFIX_P_0_541 /* z1 lo = MULTIPLY(tmp12 + tmp13, XFIX_P_0_541); */ @@ -2471,8 +2472,8 @@ asm_function jsimd_fdct_islow_neon rshrn v18.4h, v18.4s, #DESCALE_P2 rshrn v22.4h, v22.4s, #DESCALE_P2 - rshrn2 v18.8h, v24.4s, #DESCALE_P2 /* dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, XFIX_P_0_765), CONST_BITS-PASS1_BITS); */ - rshrn2 v22.8h, v25.4s, #DESCALE_P2 /* dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, XFIX_N_1_847), CONST_BITS-PASS1_BITS); */ + rshrn2 v18.8h, v24.4s, #DESCALE_P2 /* dataptr[2] = (DCTELEM)DESCALE(z1 + MULTIPLY(tmp13, XFIX_P_0_765), CONST_BITS-PASS1_BITS); */ + rshrn2 v22.8h, v25.4s, #DESCALE_P2 /* dataptr[6] = (DCTELEM)DESCALE(z1 + MULTIPLY(tmp12, XFIX_N_1_847), CONST_BITS-PASS1_BITS); */ /* Odd part */ add v8.8h, v28.8h, v31.8h /* z1 = tmp4 + tmp7; */ @@ -2498,10 +2499,10 @@ asm_function jsimd_fdct_islow_neon smull2 v13.4s, v9.8h, XFIX_N_2_562 smull2 v14.4s, v10.8h, XFIX_N_1_961 smull2 v15.4s, v11.8h, XFIX_N_0_390 - smull v8.4s, v8.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, - FIX_0_899976223); */ - smull v9.4s, v9.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, - FIX_2_562915447); */ - smull v10.4s, v10.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, - FIX_1_961570560); */ - smull v11.4s, v11.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, - FIX_0_390180644); */ + smull v8.4s, v8.4h, XFIX_N_0_899 /* z1 = MULTIPLY(z1, -FIX_0_899976223); */ + smull v9.4s, v9.4h, XFIX_N_2_562 /* z2 = MULTIPLY(z2, -FIX_2_562915447); */ + smull v10.4s, v10.4h, XFIX_N_1_961 /* z3 = MULTIPLY(z3, -FIX_1_961570560); */ + smull v11.4s, v11.4h, XFIX_N_0_390 /* z4 = MULTIPLY(z4, -FIX_0_390180644); */ add v10.4s, v10.4s, v4.4s add v14.4s, v14.4s, v5.4s @@ -2530,10 +2531,10 @@ asm_function jsimd_fdct_islow_neon rshrn v21.4h, v29.4s, #DESCALE_P2 rshrn v19.4h, v30.4s, #DESCALE_P2 rshrn v17.4h, v31.4s, #DESCALE_P2 - rshrn2 v23.8h, v24.4s, #DESCALE_P2 /* dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); */ - rshrn2 v21.8h, v25.4s, #DESCALE_P2 /* dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); */ - rshrn2 v19.8h, v26.4s, #DESCALE_P2 /* dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); */ - rshrn2 v17.8h, v27.4s, #DESCALE_P2 /* dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); */ + rshrn2 v23.8h, v24.4s, #DESCALE_P2 /* dataptr[7] = (DCTELEM)DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); */ + rshrn2 v21.8h, v25.4s, #DESCALE_P2 /* dataptr[5] = (DCTELEM)DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); */ + rshrn2 v19.8h, v26.4s, #DESCALE_P2 /* dataptr[3] = (DCTELEM)DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); */ + rshrn2 v17.8h, v27.4s, #DESCALE_P2 /* dataptr[1] = (DCTELEM)DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); */ /* store results */ st1 {v16.8h, v17.8h, v18.8h, v19.8h}, [DATA], 64 @@ -2577,10 +2578,10 @@ asm_function jsimd_fdct_islow_neon */ #undef XFIX_0_541196100 -#define XFIX_0_382683433 v0.h[0] -#define XFIX_0_541196100 v0.h[1] -#define XFIX_0_707106781 v0.h[2] -#define XFIX_1_306562965 v0.h[3] +#define XFIX_0_382683433 v0.h[0] +#define XFIX_0_541196100 v0.h[1] +#define XFIX_0_707106781 v0.h[2] +#define XFIX_1_306562965 v0.h[3] .balign 16 Ljsimd_fdct_ifast_neon_consts: @@ -2676,8 +2677,8 @@ asm_function jsimd_fdct_ifast_neon /* * GLOBAL(void) - * jsimd_quantize_neon (JCOEFPTR coef_block, DCTELEM *divisors, - * DCTELEM *workspace); + * jsimd_quantize_neon(JCOEFPTR coef_block, DCTELEM *divisors, + * DCTELEM *workspace); * */ asm_function jsimd_quantize_neon @@ -2768,10 +2769,10 @@ asm_function jsimd_quantize_neon * without smoothing. * * GLOBAL(void) - * jsimd_h2v1_downsample_neon (JDIMENSION image_width, int max_v_samp_factor, - * JDIMENSION v_samp_factor, - * JDIMENSION width_blocks, JSAMPARRAY input_data, - * JSAMPARRAY output_data); + * jsimd_h2v1_downsample_neon(JDIMENSION image_width, int max_v_samp_factor, + * JDIMENSION v_samp_factor, + * JDIMENSION width_in_blocks, + * JSAMPARRAY input_data, JSAMPARRAY output_data); */ .balign 16 @@ -2879,9 +2880,10 @@ asm_function jsimd_h2v1_downsample_neon * without smoothing. * * GLOBAL(void) - * jsimd_h2v2_downsample_neon (JDIMENSION image_width, int max_v_samp_factor, - * JDIMENSION v_samp_factor, JDIMENSION width_blocks, - * JSAMPARRAY input_data, JSAMPARRAY output_data); + * jsimd_h2v2_downsample_neon(JDIMENSION image_width, int max_v_samp_factor, + * JDIMENSION v_samp_factor, + * JDIMENSION width_in_blocks, + * JSAMPARRAY input_data, JSAMPARRAY output_data); */ .balign 16 @@ -2960,10 +2962,10 @@ asm_function jsimd_h2v2_downsample_neon /*****************************************************************************/ /* - * GLOBAL(JOCTET*) - * jsimd_huff_encode_one_block (working_state *state, JOCTET *buffer, - * JCOEFPTR block, int last_dc_val, - * c_derived_tbl *dctbl, c_derived_tbl *actbl) + * GLOBAL(JOCTET *) + * jsimd_huff_encode_one_block(working_state *state, JOCTET *buffer, + * JCOEFPTR block, int last_dc_val, + * c_derived_tbl *dctbl, c_derived_tbl *actbl) * */ @@ -3278,7 +3280,7 @@ asm_function jsimd_huff_encode_one_block_neon_slowtbl put_bits x10, x11 addp v16.16b, v16.16b, v18.16b checkbuf47 - umov x9,v16.D[0] + umov x9, v16.D[0] put_bits x13, x12 cnt v17.8b, v16.8b mvn x9, x9 diff --git a/simd/gas-preprocessor.in b/simd/gas-preprocessor.in new file mode 100755 index 0000000..560f788 --- /dev/null +++ b/simd/gas-preprocessor.in @@ -0,0 +1 @@ +gas-preprocessor.pl @CMAKE_ASM_COMPILER@ ${1+"$@"} diff --git a/simd/i386/jccolext-avx2.asm b/simd/i386/jccolext-avx2.asm new file mode 100644 index 0000000..7a8d784 --- /dev/null +++ b/simd/i386/jccolext-avx2.asm @@ -0,0 +1,580 @@ +; +; jccolext.asm - colorspace conversion (AVX2) +; +; Copyright (C) 2015, Intel Corporation. +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_ycc_convert_avx2(JDIMENSION img_width, JSAMPARRAY input_buf, +; JSAMPIMAGE output_buf, JDIMENSION output_row, +; int num_rows); +; + +%define img_width(b) (b) + 8 ; JDIMENSION img_width +%define input_buf(b) (b) + 12 ; JSAMPARRAY input_buf +%define output_buf(b) (b) + 16 ; JSAMPIMAGE output_buf +%define output_row(b) (b) + 20 ; JDIMENSION output_row +%define num_rows(b) (b) + 24 ; int num_rows + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_YMMWORD + ; ymmword wk[WK_NUM] +%define WK_NUM 8 +%define gotptr wk(0) - SIZEOF_POINTER ; void * gotptr + + align 32 + GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2) + +EXTN(jsimd_rgb_ycc_convert_avx2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_YMMWORD) ; align to 256 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [img_width(eax)] + test ecx, ecx + jz near .return + + push ecx + + mov esi, JSAMPIMAGE [output_buf(eax)] + mov ecx, JDIMENSION [output_row(eax)] + mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY] + lea edi, [edi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov esi, JSAMPARRAY [input_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax, eax + jle near .return + alignx 16, 7 +.rowloop: + pushpic eax + push edx + push ebx + push edi + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr0 + mov ebx, JSAMPROW [ebx] ; outptr1 + mov edx, JSAMPROW [edx] ; outptr2 + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + cmp ecx, byte SIZEOF_YMMWORD + jae near .columnloop + alignx 16, 7 + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push eax + push edx + lea ecx, [ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub ecx, byte SIZEOF_BYTE + movzx eax, BYTE [esi+ecx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub ecx, byte SIZEOF_WORD + movzx edx, WORD [esi+ecx] + shl eax, WORD_BIT + or eax, edx +.column_ld4: + vmovd xmmA, eax + pop edx + pop eax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub ecx, byte SIZEOF_DWORD + vmovd xmmF, XMM_DWORD [esi+ecx] + vpslldq xmmA, xmmA, SIZEOF_DWORD + vpor xmmA, xmmA, xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub ecx, byte SIZEOF_MMWORD + vmovq xmmB, XMM_MMWORD [esi+ecx] + vpslldq xmmA, xmmA, SIZEOF_MMWORD + vpor xmmA, xmmA, xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + sub ecx, byte SIZEOF_XMMWORD + vmovdqu xmmB, XMM_MMWORD [esi+ecx] + vperm2i128 ymmA, ymmA, ymmA, 1 + vpor ymmA, ymmB +.column_ld32: + test cl, SIZEOF_YMMWORD + jz short .column_ld64 + sub ecx, byte SIZEOF_YMMWORD + vmovdqa ymmF, ymmA + vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD] +.column_ld64: + test cl, 2*SIZEOF_YMMWORD + mov ecx, SIZEOF_YMMWORD + jz short .rgb_ycc_cnv + vmovdqa ymmB, ymmA + vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD] + vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD] + jmp short .rgb_ycc_cnv + alignx 16, 7 + +.columnloop: + vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD] + vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD] + vmovdqu ymmB, YMMWORD [esi+2*SIZEOF_YMMWORD] + +.rgb_ycc_cnv: + ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05 + ; 15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; ymmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F + ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L) + ; ymmB=(1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q + ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V) + + vmovdqu ymmC, ymmA + vinserti128 ymmA, ymmF, xmmA, 0 ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05 + ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L) + vinserti128 ymmC, ymmC, xmmB, 0 ; ymmC=(1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q + ; 15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + vinserti128 ymmB, ymmB, xmmF, 0 ; ymmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F + ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V) + vperm2i128 ymmF, ymmC, ymmC, 1 ; ymmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A + ; 1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q) + + vmovdqa ymmG, ymmA + vpslldq ymmA, ymmA, 8 ; ymmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12 + ; 22 03 13 23 04 14 24 05 0G 1G 2G 0H 1H 2H 0I 1I) + vpsrldq ymmG, ymmG, 8 ; ymmG=(22 03 13 23 04 14 24 05 0G 1G 2G 0H 1H 2H 0I 1I + ; 2I 0J 1J 2J 0K 1K 2K 0L -- -- -- -- -- -- -- --) + + vpunpckhbw ymmA, ymmA, ymmF ; ymmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A + ; 0G 0O 1G 1O 2G 2O 0H 0P 1H 1P 2H 2P 0I 0Q 1I 1Q) + vpslldq ymmF, ymmF, 8 ; ymmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27 + ; 08 18 28 09 19 29 0A 1A 1L 2L 0M 1M 2M 0N 1N 2N) + + vpunpcklbw ymmG, ymmG, ymmB ; ymmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D + ; 2I 2Q 0J 0R 1J 1R 2J 2R 0K 0S 1K 1S 2K 2S 0L 0T) + vpunpckhbw ymmF, ymmF, ymmB ; ymmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F + ; 1L 1T 2L 2T 0M 0U 1M 1U 2M 2U 0N 0V 1N 1V 2N 2V) + + vmovdqa ymmD, ymmA + vpslldq ymmA, ymmA, 8 ; ymmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09 + ; 11 19 21 29 02 0A 12 1A 0G 0O 1G 1O 2G 2O 0H 0P) + vpsrldq ymmD, ymmD, 8 ; ymmD=(11 19 21 29 02 0A 12 1A 0G 0O 1G 1O 2G 2O 0H 0P + ; 1H 1P 2H 2P 0I 0Q 1I 1Q -- -- -- -- -- -- -- --) + + vpunpckhbw ymmA, ymmA, ymmG ; ymmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D + ; 0G 0K 0O 0S 1G 1K 1O 1S 2G 2K 2O 2S 0H 0L 0P 0T) + vpslldq ymmG, ymmG, 8 ; ymmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B + ; 04 0C 14 1C 24 2C 05 0D 2I 2Q 0J 0R 1J 1R 2J 2R) + + vpunpcklbw ymmD, ymmD, ymmF ; ymmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E + ; 1H 1L 1P 1T 2H 2L 2P 2T 0I 0M 0Q 0U 1I 1M 1Q 1U) + vpunpckhbw ymmG, ymmG, ymmF ; ymmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F + ; 2I 2M 2Q 2U 0J 0N 0R 0V 1J 1N 1R 1V 2J 2N 2R 2V) + + vmovdqa ymmE, ymmA + vpslldq ymmA, ymmA, 8 ; ymmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C + ; 20 24 28 2C 01 05 09 0D 0G 0K 0O 0S 1G 1K 1O 1S) + vpsrldq ymmE, ymmE, 8 ; ymmE=(20 24 28 2C 01 05 09 0D 0G 0K 0O 0S 1G 1K 1O 1S + ; 2G 2K 2O 2S 0H 0L 0P 0T -- -- -- -- -- -- -- --) + + vpunpckhbw ymmA, ymmA, ymmD ; ymmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E + ; 0G 0I 0K 0M 0O 0Q 0S 0U 1G 1I 1K 1M 1O 1Q 1S 1U) + vpslldq ymmD, ymmD, 8 ; ymmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D + ; 02 06 0A 0E 12 16 1A 1E 1H 1L 1P 1T 2H 2L 2P 2T) + + vpunpcklbw ymmE, ymmE, ymmG ; ymmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F + ; 2G 2I 2K 2M 2O 2Q 2S 2U 0H 0J 0L 0N 0P 0R 0T 0V) + vpunpckhbw ymmD, ymmD, ymmG ; ymmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F + ; 1H 1J 1L 1N 1P 1R 1T 1V 2H 2J 2L 2N 2P 2R 2T 2V) + + vpxor ymmH, ymmH, ymmH + + vmovdqa ymmC, ymmA + vpunpcklbw ymmA, ymmA, ymmH ; ymmA=(00 02 04 06 08 0A 0C 0E 0G 0I 0K 0M 0O 0Q 0S 0U) + vpunpckhbw ymmC, ymmC, ymmH ; ymmC=(10 12 14 16 18 1A 1C 1E 1G 1I 1K 1M 1O 1Q 1S 1U) + + vmovdqa ymmB, ymmE + vpunpcklbw ymmE, ymmE, ymmH ; ymmE=(20 22 24 26 28 2A 2C 2E 2G 2I 2K 2M 2O 2Q 2S 2U) + vpunpckhbw ymmB, ymmB, ymmH ; ymmB=(01 03 05 07 09 0B 0D 0F 0H 0J 0L 0N 0P 0R 0T 0V) + + vmovdqa ymmF, ymmD + vpunpcklbw ymmD, ymmD, ymmH ; ymmD=(11 13 15 17 19 1B 1D 1F 1H 1J 1L 1N 1P 1R 1T 1V) + vpunpckhbw ymmF, ymmF, ymmH ; ymmF=(21 23 25 27 29 2B 2D 2F 2H 2J 2L 2N 2P 2R 2T 2V) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub ecx, byte SIZEOF_XMMWORD/16 + vmovd xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub ecx, byte SIZEOF_XMMWORD/8 + vmovq xmmF, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE] + vpslldq xmmA, xmmA, SIZEOF_MMWORD + vpor xmmA, xmmA, xmmF +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub ecx, byte SIZEOF_XMMWORD/4 + vmovdqa xmmF, xmmA + vperm2i128 ymmF, ymmF, ymmF, 1 + vmovdqu xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE] + vpor ymmA, ymmA, ymmF +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + jz short .column_ld16 + sub ecx, byte SIZEOF_XMMWORD/2 + vmovdqa ymmF, ymmA + vmovdqu ymmA, YMMWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld16: + test cl, SIZEOF_XMMWORD + mov ecx, SIZEOF_YMMWORD + jz short .rgb_ycc_cnv + vmovdqa ymmE, ymmA + vmovdqa ymmH, ymmF + vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD] + vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD] + jmp short .rgb_ycc_cnv + alignx 16, 7 + +.columnloop: + vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD] + vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD] + vmovdqu ymmE, YMMWORD [esi+2*SIZEOF_YMMWORD] + vmovdqu ymmH, YMMWORD [esi+3*SIZEOF_YMMWORD] + +.rgb_ycc_cnv: + ; ymmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ; 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; ymmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + ; 0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + ; ymmE=(0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J + ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N) + ; ymmH=(0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R + ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V) + + vmovdqa ymmB, ymmA + vinserti128 ymmA, ymmA, xmmE, 1 ; ymmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ; 0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J) + vperm2i128 ymmE, ymmB, ymmE, 0x31 ; ymmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 + ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N) + + vmovdqa ymmB, ymmF + vinserti128 ymmF, ymmF, xmmH, 1 ; ymmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + ; 0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R) + vperm2i128 ymmH, ymmB, ymmH, 0x31 ; ymmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F + ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V) + + vmovdqa ymmD, ymmA + vpunpcklbw ymmA, ymmA, ymmE ; ymmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35 + ; 0G 0K 1G 1K 2G 2K 3G 3K 0H 0L 1H 1L 2H 2L 3H 3L) + vpunpckhbw ymmD, ymmD, ymmE ; ymmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37 + ; 0I 0M 1I 1M 2I 2M 3I 3M 0J 0N 1J 1N 2J 2N 3J 3N) + + vmovdqa ymmC, ymmF + vpunpcklbw ymmF, ymmF, ymmH ; ymmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D + ; 0O 0S 1O 1S 2O 2S 3O 3S 0P 0T 1P 1T 2P 2T 3P 3T) + vpunpckhbw ymmC, ymmC, ymmH ; ymmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F + ; 0Q 0U 1Q 1U 2Q 2U 3Q 3U 0R 0V 1R 1V 2R 2V 3R 3V) + + vmovdqa ymmB, ymmA + vpunpcklwd ymmA, ymmA, ymmF ; ymmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C + ; 0G 0K 0O 0S 1G 1K 1O 1S 2G 2K 2O 2S 3G 3K 3O 3S) + vpunpckhwd ymmB, ymmB, ymmF ; ymmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D + ; 0H 0L 0P 0T 1H 1L 1P 1T 2H 2L 2P 2T 3H 3L 3P 3T) + + vmovdqa ymmG, ymmD + vpunpcklwd ymmD, ymmD, ymmC ; ymmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E + ; 0I 0M 0Q 0U 1I 1M 1Q 1U 2I 2M 2Q 2U 3I 3M 3Q 3U) + vpunpckhwd ymmG, ymmG, ymmC ; ymmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F + ; 0J 0N 0R 0V 1J 1N 1R 1V 2J 2N 2R 2V 3J 3N 3R 3V) + + vmovdqa ymmE, ymmA + vpunpcklbw ymmA, ymmA, ymmD ; ymmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E + ; 0G 0I 0K 0M 0O 0Q 0S 0U 1G 1I 1K 1M 1O 1Q 1S 1U) + vpunpckhbw ymmE, ymmE, ymmD ; ymmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E + ; 2G 2I 2K 2M 2O 2Q 2S 2U 3G 3I 3K 3M 3O 3Q 3S 3U) + + vmovdqa ymmH, ymmB + vpunpcklbw ymmB, ymmB, ymmG ; ymmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F + ; 0H 0J 0L 0N 0P 0R 0T 0V 1H 1J 1L 1N 1P 1R 1T 1V) + vpunpckhbw ymmH, ymmH, ymmG ; ymmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F + ; 2H 2J 2L 2N 2P 2R 2T 2V 3H 3J 3L 3N 3P 3R 3T 3V) + + vpxor ymmF, ymmF, ymmF + + vmovdqa ymmC, ymmA + vpunpcklbw ymmA, ymmA, ymmF ; ymmA=(00 02 04 06 08 0A 0C 0E 0G 0I 0K 0M 0O 0Q 0S 0U) + vpunpckhbw ymmC, ymmC, ymmF ; ymmC=(10 12 14 16 18 1A 1C 1E 1G 1I 1K 1M 1O 1Q 1S 1U) + + vmovdqa ymmD, ymmB + vpunpcklbw ymmB, ymmB, ymmF ; ymmB=(01 03 05 07 09 0B 0D 0F 0H 0J 0L 0N 0P 0R 0T 0V) + vpunpckhbw ymmD, ymmD, ymmF ; ymmD=(11 13 15 17 19 1B 1D 1F 1H 1J 1L 1N 1P 1R 1T 1V) + + vmovdqa ymmG, ymmE + vpunpcklbw ymmE, ymmE, ymmF ; ymmE=(20 22 24 26 28 2A 2C 2E 2G 2I 2K 2M 2O 2Q 2S 2U) + vpunpckhbw ymmG, ymmG, ymmF ; ymmG=(30 32 34 36 38 3A 3C 3E 3G 3I 3K 3M 3O 3Q 3S 3U) + + vpunpcklbw ymmF, ymmF, ymmH + vpunpckhbw ymmH, ymmH, ymmH + vpsrlw ymmF, ymmF, BYTE_BIT ; ymmF=(21 23 25 27 29 2B 2D 2F 2H 2J 2L 2N 2P 2R 2T 2V) + vpsrlw ymmH, ymmH, BYTE_BIT ; ymmH=(31 33 35 37 39 3B 3D 3F 3H 3J 3L 3N 3P 3R 3T 3V) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; ymm0=R(02468ACEGIKMOQSU)=RE, ymm2=G(02468ACEGIKMOQSU)=GE, ymm4=B(02468ACEGIKMOQSU)=BE + ; ymm1=R(13579BDFHJLNPRTV)=RO, ymm3=G(13579BDFHJLNPRTV)=GO, ymm5=B(13579BDFHJLNPRTV)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + + vmovdqa YMMWORD [wk(0)], ymm0 ; wk(0)=RE + vmovdqa YMMWORD [wk(1)], ymm1 ; wk(1)=RO + vmovdqa YMMWORD [wk(2)], ymm4 ; wk(2)=BE + vmovdqa YMMWORD [wk(3)], ymm5 ; wk(3)=BO + + vmovdqa ymm6, ymm1 + vpunpcklwd ymm1, ymm1, ymm3 + vpunpckhwd ymm6, ymm6, ymm3 + vmovdqa ymm7, ymm1 + vmovdqa ymm4, ymm6 + vpmaddwd ymm1, ymm1, [GOTOFF(eax,PW_F0299_F0337)] ; ymm1=ROL*FIX(0.299)+GOL*FIX(0.337) + vpmaddwd ymm6, ymm6, [GOTOFF(eax,PW_F0299_F0337)] ; ymm6=ROH*FIX(0.299)+GOH*FIX(0.337) + vpmaddwd ymm7, ymm7, [GOTOFF(eax,PW_MF016_MF033)] ; ymm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) + vpmaddwd ymm4, ymm4, [GOTOFF(eax,PW_MF016_MF033)] ; ymm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) + + vmovdqa YMMWORD [wk(4)], ymm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) + vmovdqa YMMWORD [wk(5)], ymm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) + + vpxor ymm1, ymm1, ymm1 + vpxor ymm6, ymm6, ymm6 + vpunpcklwd ymm1, ymm1, ymm5 ; ymm1=BOL + vpunpckhwd ymm6, ymm6, ymm5 ; ymm6=BOH + vpsrld ymm1, ymm1, 1 ; ymm1=BOL*FIX(0.500) + vpsrld ymm6, ymm6, 1 ; ymm6=BOH*FIX(0.500) + + vmovdqa ymm5, [GOTOFF(eax,PD_ONEHALFM1_CJ)] ; ymm5=[PD_ONEHALFM1_CJ] + + vpaddd ymm7, ymm7, ymm1 + vpaddd ymm4, ymm4, ymm6 + vpaddd ymm7, ymm7, ymm5 + vpaddd ymm4, ymm4, ymm5 + vpsrld ymm7, ymm7, SCALEBITS ; ymm7=CbOL + vpsrld ymm4, ymm4, SCALEBITS ; ymm4=CbOH + vpackssdw ymm7, ymm7, ymm4 ; ymm7=CbO + + vmovdqa ymm1, YMMWORD [wk(2)] ; ymm1=BE + + vmovdqa ymm6, ymm0 + vpunpcklwd ymm0, ymm0, ymm2 + vpunpckhwd ymm6, ymm6, ymm2 + vmovdqa ymm5, ymm0 + vmovdqa ymm4, ymm6 + vpmaddwd ymm0, ymm0, [GOTOFF(eax,PW_F0299_F0337)] ; ymm0=REL*FIX(0.299)+GEL*FIX(0.337) + vpmaddwd ymm6, ymm6, [GOTOFF(eax,PW_F0299_F0337)] ; ymm6=REH*FIX(0.299)+GEH*FIX(0.337) + vpmaddwd ymm5, ymm5, [GOTOFF(eax,PW_MF016_MF033)] ; ymm5=REL*-FIX(0.168)+GEL*-FIX(0.331) + vpmaddwd ymm4, ymm4, [GOTOFF(eax,PW_MF016_MF033)] ; ymm4=REH*-FIX(0.168)+GEH*-FIX(0.331) + + vmovdqa YMMWORD [wk(6)], ymm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) + vmovdqa YMMWORD [wk(7)], ymm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) + + vpxor ymm0, ymm0, ymm0 + vpxor ymm6, ymm6, ymm6 + vpunpcklwd ymm0, ymm0, ymm1 ; ymm0=BEL + vpunpckhwd ymm6, ymm6, ymm1 ; ymm6=BEH + vpsrld ymm0, ymm0, 1 ; ymm0=BEL*FIX(0.500) + vpsrld ymm6, ymm6, 1 ; ymm6=BEH*FIX(0.500) + + vmovdqa ymm1, [GOTOFF(eax,PD_ONEHALFM1_CJ)] ; ymm1=[PD_ONEHALFM1_CJ] + + vpaddd ymm5, ymm5, ymm0 + vpaddd ymm4, ymm4, ymm6 + vpaddd ymm5, ymm5, ymm1 + vpaddd ymm4, ymm4, ymm1 + vpsrld ymm5, ymm5, SCALEBITS ; ymm5=CbEL + vpsrld ymm4, ymm4, SCALEBITS ; ymm4=CbEH + vpackssdw ymm5, ymm5, ymm4 ; ymm5=CbE + + vpsllw ymm7, ymm7, BYTE_BIT + vpor ymm5, ymm5, ymm7 ; ymm5=Cb + vmovdqu YMMWORD [ebx], ymm5 ; Save Cb + + vmovdqa ymm0, YMMWORD [wk(3)] ; ymm0=BO + vmovdqa ymm6, YMMWORD [wk(2)] ; ymm6=BE + vmovdqa ymm1, YMMWORD [wk(1)] ; ymm1=RO + + vmovdqa ymm4, ymm0 + vpunpcklwd ymm0, ymm0, ymm3 + vpunpckhwd ymm4, ymm4, ymm3 + vmovdqa ymm7, ymm0 + vmovdqa ymm5, ymm4 + vpmaddwd ymm0, ymm0, [GOTOFF(eax,PW_F0114_F0250)] ; ymm0=BOL*FIX(0.114)+GOL*FIX(0.250) + vpmaddwd ymm4, ymm4, [GOTOFF(eax,PW_F0114_F0250)] ; ymm4=BOH*FIX(0.114)+GOH*FIX(0.250) + vpmaddwd ymm7, ymm7, [GOTOFF(eax,PW_MF008_MF041)] ; ymm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) + vpmaddwd ymm5, ymm5, [GOTOFF(eax,PW_MF008_MF041)] ; ymm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) + + vmovdqa ymm3, [GOTOFF(eax,PD_ONEHALF)] ; ymm3=[PD_ONEHALF] + + vpaddd ymm0, ymm0, YMMWORD [wk(4)] + vpaddd ymm4, ymm4, YMMWORD [wk(5)] + vpaddd ymm0, ymm0, ymm3 + vpaddd ymm4, ymm4, ymm3 + vpsrld ymm0, ymm0, SCALEBITS ; ymm0=YOL + vpsrld ymm4, ymm4, SCALEBITS ; ymm4=YOH + vpackssdw ymm0, ymm0, ymm4 ; ymm0=YO + + vpxor ymm3, ymm3, ymm3 + vpxor ymm4, ymm4, ymm4 + vpunpcklwd ymm3, ymm3, ymm1 ; ymm3=ROL + vpunpckhwd ymm4, ymm4, ymm1 ; ymm4=ROH + vpsrld ymm3, ymm3, 1 ; ymm3=ROL*FIX(0.500) + vpsrld ymm4, ymm4, 1 ; ymm4=ROH*FIX(0.500) + + vmovdqa ymm1, [GOTOFF(eax,PD_ONEHALFM1_CJ)] ; ymm1=[PD_ONEHALFM1_CJ] + + vpaddd ymm7, ymm7, ymm3 + vpaddd ymm5, ymm5, ymm4 + vpaddd ymm7, ymm7, ymm1 + vpaddd ymm5, ymm5, ymm1 + vpsrld ymm7, ymm7, SCALEBITS ; ymm7=CrOL + vpsrld ymm5, ymm5, SCALEBITS ; ymm5=CrOH + vpackssdw ymm7, ymm7, ymm5 ; ymm7=CrO + + vmovdqa ymm3, YMMWORD [wk(0)] ; ymm3=RE + + vmovdqa ymm4, ymm6 + vpunpcklwd ymm6, ymm6, ymm2 + vpunpckhwd ymm4, ymm4, ymm2 + vmovdqa ymm1, ymm6 + vmovdqa ymm5, ymm4 + vpmaddwd ymm6, ymm6, [GOTOFF(eax,PW_F0114_F0250)] ; ymm6=BEL*FIX(0.114)+GEL*FIX(0.250) + vpmaddwd ymm4, ymm4, [GOTOFF(eax,PW_F0114_F0250)] ; ymm4=BEH*FIX(0.114)+GEH*FIX(0.250) + vpmaddwd ymm1, ymm1, [GOTOFF(eax,PW_MF008_MF041)] ; ymm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) + vpmaddwd ymm5, ymm5, [GOTOFF(eax,PW_MF008_MF041)] ; ymm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) + + vmovdqa ymm2, [GOTOFF(eax,PD_ONEHALF)] ; ymm2=[PD_ONEHALF] + + vpaddd ymm6, ymm6, YMMWORD [wk(6)] + vpaddd ymm4, ymm4, YMMWORD [wk(7)] + vpaddd ymm6, ymm6, ymm2 + vpaddd ymm4, ymm4, ymm2 + vpsrld ymm6, ymm6, SCALEBITS ; ymm6=YEL + vpsrld ymm4, ymm4, SCALEBITS ; ymm4=YEH + vpackssdw ymm6, ymm6, ymm4 ; ymm6=YE + + vpsllw ymm0, ymm0, BYTE_BIT + vpor ymm6, ymm6, ymm0 ; ymm6=Y + vmovdqu YMMWORD [edi], ymm6 ; Save Y + + vpxor ymm2, ymm2, ymm2 + vpxor ymm4, ymm4, ymm4 + vpunpcklwd ymm2, ymm2, ymm3 ; ymm2=REL + vpunpckhwd ymm4, ymm4, ymm3 ; ymm4=REH + vpsrld ymm2, ymm2, 1 ; ymm2=REL*FIX(0.500) + vpsrld ymm4, ymm4, 1 ; ymm4=REH*FIX(0.500) + + vmovdqa ymm0, [GOTOFF(eax,PD_ONEHALFM1_CJ)] ; ymm0=[PD_ONEHALFM1_CJ] + + vpaddd ymm1, ymm1, ymm2 + vpaddd ymm5, ymm5, ymm4 + vpaddd ymm1, ymm1, ymm0 + vpaddd ymm5, ymm5, ymm0 + vpsrld ymm1, ymm1, SCALEBITS ; ymm1=CrEL + vpsrld ymm5, ymm5, SCALEBITS ; ymm5=CrEH + vpackssdw ymm1, ymm1, ymm5 ; ymm1=CrE + + vpsllw ymm7, ymm7, BYTE_BIT + vpor ymm1, ymm1, ymm7 ; ymm1=Cr + vmovdqu YMMWORD [edx], ymm1 ; Save Cr + + sub ecx, byte SIZEOF_YMMWORD + add esi, RGB_PIXELSIZE*SIZEOF_YMMWORD ; inptr + add edi, byte SIZEOF_YMMWORD ; outptr0 + add ebx, byte SIZEOF_YMMWORD ; outptr1 + add edx, byte SIZEOF_YMMWORD ; outptr2 + cmp ecx, byte SIZEOF_YMMWORD + jae near .columnloop + test ecx, ecx + jnz near .column_ld1 + + pop ecx ; col + pop esi + pop edi + pop ebx + pop edx + poppic eax + + add esi, byte SIZEOF_JSAMPROW ; input_buf + add edi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + dec eax ; num_rows + jg near .rowloop + +.return: + vzeroupper + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jccolext-mmx.asm b/simd/i386/jccolext-mmx.asm new file mode 100644 index 0000000..9a2c30e --- /dev/null +++ b/simd/i386/jccolext-mmx.asm @@ -0,0 +1,478 @@ +; +; jccolext.asm - colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_ycc_convert_mmx(JDIMENSION img_width, JSAMPARRAY input_buf, +; JSAMPIMAGE output_buf, JDIMENSION output_row, +; int num_rows); +; + +%define img_width(b) (b) + 8 ; JDIMENSION img_width +%define input_buf(b) (b) + 12 ; JSAMPARRAY input_buf +%define output_buf(b) (b) + 16 ; JSAMPIMAGE output_buf +%define output_row(b) (b) + 20 ; JDIMENSION output_row +%define num_rows(b) (b) + 24 ; int num_rows + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_MMWORD + ; mmword wk[WK_NUM] +%define WK_NUM 8 +%define gotptr wk(0) - SIZEOF_POINTER ; void * gotptr + + align 32 + GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_mmx) + +EXTN(jsimd_rgb_ycc_convert_mmx): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [img_width(eax)] ; num_cols + test ecx, ecx + jz near .return + + push ecx + + mov esi, JSAMPIMAGE [output_buf(eax)] + mov ecx, JDIMENSION [output_row(eax)] + mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY] + lea edi, [edi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov esi, JSAMPARRAY [input_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax, eax + jle near .return + alignx 16, 7 +.rowloop: + pushpic eax + push edx + push ebx + push edi + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr0 + mov ebx, JSAMPROW [ebx] ; outptr1 + mov edx, JSAMPROW [edx] ; outptr2 + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + cmp ecx, byte SIZEOF_MMWORD + jae short .columnloop + alignx 16, 7 + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push eax + push edx + lea ecx, [ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub ecx, byte SIZEOF_BYTE + xor eax, eax + mov al, BYTE [esi+ecx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub ecx, byte SIZEOF_WORD + xor edx, edx + mov dx, WORD [esi+ecx] + shl eax, WORD_BIT + or eax, edx +.column_ld4: + movd mmA, eax + pop edx + pop eax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub ecx, byte SIZEOF_DWORD + movd mmG, DWORD [esi+ecx] + psllq mmA, DWORD_BIT + por mmA, mmG +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + movq mmG, mmA + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + mov ecx, SIZEOF_MMWORD + jmp short .rgb_ycc_cnv +.column_ld16: + test cl, 2*SIZEOF_MMWORD + mov ecx, SIZEOF_MMWORD + jz short .rgb_ycc_cnv + movq mmF, mmA + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] + jmp short .rgb_ycc_cnv + alignx 16, 7 + +.columnloop: + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+2*SIZEOF_MMWORD] + +.rgb_ycc_cnv: + ; mmA=(00 10 20 01 11 21 02 12) + ; mmG=(22 03 13 23 04 14 24 05) + ; mmF=(15 25 06 16 26 07 17 27) + + movq mmD, mmA + psllq mmA, 4*BYTE_BIT ; mmA=(-- -- -- -- 00 10 20 01) + psrlq mmD, 4*BYTE_BIT ; mmD=(11 21 02 12 -- -- -- --) + + punpckhbw mmA, mmG ; mmA=(00 04 10 14 20 24 01 05) + psllq mmG, 4*BYTE_BIT ; mmG=(-- -- -- -- 22 03 13 23) + + punpcklbw mmD, mmF ; mmD=(11 15 21 25 02 06 12 16) + punpckhbw mmG, mmF ; mmG=(22 26 03 07 13 17 23 27) + + movq mmE, mmA + psllq mmA, 4*BYTE_BIT ; mmA=(-- -- -- -- 00 04 10 14) + psrlq mmE, 4*BYTE_BIT ; mmE=(20 24 01 05 -- -- -- --) + + punpckhbw mmA, mmD ; mmA=(00 02 04 06 10 12 14 16) + psllq mmD, 4*BYTE_BIT ; mmD=(-- -- -- -- 11 15 21 25) + + punpcklbw mmE, mmG ; mmE=(20 22 24 26 01 03 05 07) + punpckhbw mmD, mmG ; mmD=(11 13 15 17 21 23 25 27) + + pxor mmH, mmH + + movq mmC, mmA + punpcklbw mmA, mmH ; mmA=(00 02 04 06) + punpckhbw mmC, mmH ; mmC=(10 12 14 16) + + movq mmB, mmE + punpcklbw mmE, mmH ; mmE=(20 22 24 26) + punpckhbw mmB, mmH ; mmB=(01 03 05 07) + + movq mmF, mmD + punpcklbw mmD, mmH ; mmD=(11 13 15 17) + punpckhbw mmF, mmH ; mmF=(21 23 25 27) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_MMWORD/8 + jz short .column_ld2 + sub ecx, byte SIZEOF_MMWORD/8 + movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_MMWORD/4 + jz short .column_ld4 + sub ecx, byte SIZEOF_MMWORD/4 + movq mmF, mmA + movq mmA, MMWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld4: + test cl, SIZEOF_MMWORD/2 + mov ecx, SIZEOF_MMWORD + jz short .rgb_ycc_cnv + movq mmD, mmA + movq mmC, mmF + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] + jmp short .rgb_ycc_cnv + alignx 16, 7 + +.columnloop: + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] + movq mmD, MMWORD [esi+2*SIZEOF_MMWORD] + movq mmC, MMWORD [esi+3*SIZEOF_MMWORD] + +.rgb_ycc_cnv: + ; mmA=(00 10 20 30 01 11 21 31) + ; mmF=(02 12 22 32 03 13 23 33) + ; mmD=(04 14 24 34 05 15 25 35) + ; mmC=(06 16 26 36 07 17 27 37) + + movq mmB, mmA + punpcklbw mmA, mmF ; mmA=(00 02 10 12 20 22 30 32) + punpckhbw mmB, mmF ; mmB=(01 03 11 13 21 23 31 33) + + movq mmG, mmD + punpcklbw mmD, mmC ; mmD=(04 06 14 16 24 26 34 36) + punpckhbw mmG, mmC ; mmG=(05 07 15 17 25 27 35 37) + + movq mmE, mmA + punpcklwd mmA, mmD ; mmA=(00 02 04 06 10 12 14 16) + punpckhwd mmE, mmD ; mmE=(20 22 24 26 30 32 34 36) + + movq mmH, mmB + punpcklwd mmB, mmG ; mmB=(01 03 05 07 11 13 15 17) + punpckhwd mmH, mmG ; mmH=(21 23 25 27 31 33 35 37) + + pxor mmF, mmF + + movq mmC, mmA + punpcklbw mmA, mmF ; mmA=(00 02 04 06) + punpckhbw mmC, mmF ; mmC=(10 12 14 16) + + movq mmD, mmB + punpcklbw mmB, mmF ; mmB=(01 03 05 07) + punpckhbw mmD, mmF ; mmD=(11 13 15 17) + + movq mmG, mmE + punpcklbw mmE, mmF ; mmE=(20 22 24 26) + punpckhbw mmG, mmF ; mmG=(30 32 34 36) + + punpcklbw mmF, mmH + punpckhbw mmH, mmH + psrlw mmF, BYTE_BIT ; mmF=(21 23 25 27) + psrlw mmH, BYTE_BIT ; mmH=(31 33 35 37) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE + ; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + + movq MMWORD [wk(0)], mm0 ; wk(0)=RE + movq MMWORD [wk(1)], mm1 ; wk(1)=RO + movq MMWORD [wk(2)], mm4 ; wk(2)=BE + movq MMWORD [wk(3)], mm5 ; wk(3)=BO + + movq mm6, mm1 + punpcklwd mm1, mm3 + punpckhwd mm6, mm3 + movq mm7, mm1 + movq mm4, mm6 + pmaddwd mm1, [GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd mm6, [GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337) + pmaddwd mm7, [GOTOFF(eax,PW_MF016_MF033)] ; mm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) + pmaddwd mm4, [GOTOFF(eax,PW_MF016_MF033)] ; mm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) + + movq MMWORD [wk(4)], mm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) + movq MMWORD [wk(5)], mm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) + + pxor mm1, mm1 + pxor mm6, mm6 + punpcklwd mm1, mm5 ; mm1=BOL + punpckhwd mm6, mm5 ; mm6=BOH + psrld mm1, 1 ; mm1=BOL*FIX(0.500) + psrld mm6, 1 ; mm6=BOH*FIX(0.500) + + movq mm5, [GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm5=[PD_ONEHALFM1_CJ] + + paddd mm7, mm1 + paddd mm4, mm6 + paddd mm7, mm5 + paddd mm4, mm5 + psrld mm7, SCALEBITS ; mm7=CbOL + psrld mm4, SCALEBITS ; mm4=CbOH + packssdw mm7, mm4 ; mm7=CbO + + movq mm1, MMWORD [wk(2)] ; mm1=BE + + movq mm6, mm0 + punpcklwd mm0, mm2 + punpckhwd mm6, mm2 + movq mm5, mm0 + movq mm4, mm6 + pmaddwd mm0, [GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd mm6, [GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337) + pmaddwd mm5, [GOTOFF(eax,PW_MF016_MF033)] ; mm5=REL*-FIX(0.168)+GEL*-FIX(0.331) + pmaddwd mm4, [GOTOFF(eax,PW_MF016_MF033)] ; mm4=REH*-FIX(0.168)+GEH*-FIX(0.331) + + movq MMWORD [wk(6)], mm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) + movq MMWORD [wk(7)], mm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) + + pxor mm0, mm0 + pxor mm6, mm6 + punpcklwd mm0, mm1 ; mm0=BEL + punpckhwd mm6, mm1 ; mm6=BEH + psrld mm0, 1 ; mm0=BEL*FIX(0.500) + psrld mm6, 1 ; mm6=BEH*FIX(0.500) + + movq mm1, [GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ] + + paddd mm5, mm0 + paddd mm4, mm6 + paddd mm5, mm1 + paddd mm4, mm1 + psrld mm5, SCALEBITS ; mm5=CbEL + psrld mm4, SCALEBITS ; mm4=CbEH + packssdw mm5, mm4 ; mm5=CbE + + psllw mm7, BYTE_BIT + por mm5, mm7 ; mm5=Cb + movq MMWORD [ebx], mm5 ; Save Cb + + movq mm0, MMWORD [wk(3)] ; mm0=BO + movq mm6, MMWORD [wk(2)] ; mm6=BE + movq mm1, MMWORD [wk(1)] ; mm1=RO + + movq mm4, mm0 + punpcklwd mm0, mm3 + punpckhwd mm4, mm3 + movq mm7, mm0 + movq mm5, mm4 + pmaddwd mm0, [GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd mm4, [GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250) + pmaddwd mm7, [GOTOFF(eax,PW_MF008_MF041)] ; mm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) + pmaddwd mm5, [GOTOFF(eax,PW_MF008_MF041)] ; mm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) + + movq mm3, [GOTOFF(eax,PD_ONEHALF)] ; mm3=[PD_ONEHALF] + + paddd mm0, MMWORD [wk(4)] + paddd mm4, MMWORD [wk(5)] + paddd mm0, mm3 + paddd mm4, mm3 + psrld mm0, SCALEBITS ; mm0=YOL + psrld mm4, SCALEBITS ; mm4=YOH + packssdw mm0, mm4 ; mm0=YO + + pxor mm3, mm3 + pxor mm4, mm4 + punpcklwd mm3, mm1 ; mm3=ROL + punpckhwd mm4, mm1 ; mm4=ROH + psrld mm3, 1 ; mm3=ROL*FIX(0.500) + psrld mm4, 1 ; mm4=ROH*FIX(0.500) + + movq mm1, [GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ] + + paddd mm7, mm3 + paddd mm5, mm4 + paddd mm7, mm1 + paddd mm5, mm1 + psrld mm7, SCALEBITS ; mm7=CrOL + psrld mm5, SCALEBITS ; mm5=CrOH + packssdw mm7, mm5 ; mm7=CrO + + movq mm3, MMWORD [wk(0)] ; mm3=RE + + movq mm4, mm6 + punpcklwd mm6, mm2 + punpckhwd mm4, mm2 + movq mm1, mm6 + movq mm5, mm4 + pmaddwd mm6, [GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd mm4, [GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250) + pmaddwd mm1, [GOTOFF(eax,PW_MF008_MF041)] ; mm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) + pmaddwd mm5, [GOTOFF(eax,PW_MF008_MF041)] ; mm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) + + movq mm2, [GOTOFF(eax,PD_ONEHALF)] ; mm2=[PD_ONEHALF] + + paddd mm6, MMWORD [wk(6)] + paddd mm4, MMWORD [wk(7)] + paddd mm6, mm2 + paddd mm4, mm2 + psrld mm6, SCALEBITS ; mm6=YEL + psrld mm4, SCALEBITS ; mm4=YEH + packssdw mm6, mm4 ; mm6=YE + + psllw mm0, BYTE_BIT + por mm6, mm0 ; mm6=Y + movq MMWORD [edi], mm6 ; Save Y + + pxor mm2, mm2 + pxor mm4, mm4 + punpcklwd mm2, mm3 ; mm2=REL + punpckhwd mm4, mm3 ; mm4=REH + psrld mm2, 1 ; mm2=REL*FIX(0.500) + psrld mm4, 1 ; mm4=REH*FIX(0.500) + + movq mm0, [GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm0=[PD_ONEHALFM1_CJ] + + paddd mm1, mm2 + paddd mm5, mm4 + paddd mm1, mm0 + paddd mm5, mm0 + psrld mm1, SCALEBITS ; mm1=CrEL + psrld mm5, SCALEBITS ; mm5=CrEH + packssdw mm1, mm5 ; mm1=CrE + + psllw mm7, BYTE_BIT + por mm1, mm7 ; mm1=Cr + movq MMWORD [edx], mm1 ; Save Cr + + sub ecx, byte SIZEOF_MMWORD + add esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; inptr + add edi, byte SIZEOF_MMWORD ; outptr0 + add ebx, byte SIZEOF_MMWORD ; outptr1 + add edx, byte SIZEOF_MMWORD ; outptr2 + cmp ecx, byte SIZEOF_MMWORD + jae near .columnloop + test ecx, ecx + jnz near .column_ld1 + + pop ecx ; col + pop esi + pop edi + pop ebx + pop edx + poppic eax + + add esi, byte SIZEOF_JSAMPROW ; input_buf + add edi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + dec eax ; num_rows + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jccolext-sse2.asm b/simd/i386/jccolext-sse2.asm new file mode 100644 index 0000000..e830562 --- /dev/null +++ b/simd/i386/jccolext-sse2.asm @@ -0,0 +1,505 @@ +; +; jccolext.asm - colorspace conversion (SSE2) +; +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_ycc_convert_sse2(JDIMENSION img_width, JSAMPARRAY input_buf, +; JSAMPIMAGE output_buf, JDIMENSION output_row, +; int num_rows); +; + +%define img_width(b) (b) + 8 ; JDIMENSION img_width +%define input_buf(b) (b) + 12 ; JSAMPARRAY input_buf +%define output_buf(b) (b) + 16 ; JSAMPIMAGE output_buf +%define output_row(b) (b) + 20 ; JDIMENSION output_row +%define num_rows(b) (b) + 24 ; int num_rows + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 8 +%define gotptr wk(0) - SIZEOF_POINTER ; void * gotptr + + align 32 + GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2) + +EXTN(jsimd_rgb_ycc_convert_sse2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [img_width(eax)] + test ecx, ecx + jz near .return + + push ecx + + mov esi, JSAMPIMAGE [output_buf(eax)] + mov ecx, JDIMENSION [output_row(eax)] + mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY] + lea edi, [edi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov esi, JSAMPARRAY [input_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax, eax + jle near .return + alignx 16, 7 +.rowloop: + pushpic eax + push edx + push ebx + push edi + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr0 + mov ebx, JSAMPROW [ebx] ; outptr1 + mov edx, JSAMPROW [edx] ; outptr2 + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + alignx 16, 7 + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push eax + push edx + lea ecx, [ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub ecx, byte SIZEOF_BYTE + movzx eax, BYTE [esi+ecx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub ecx, byte SIZEOF_WORD + movzx edx, WORD [esi+ecx] + shl eax, WORD_BIT + or eax, edx +.column_ld4: + movd xmmA, eax + pop edx + pop eax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub ecx, byte SIZEOF_DWORD + movd xmmF, XMM_DWORD [esi+ecx] + pslldq xmmA, SIZEOF_DWORD + por xmmA, xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub ecx, byte SIZEOF_MMWORD + movq xmmB, XMM_MMWORD [esi+ecx] + pslldq xmmA, SIZEOF_MMWORD + por xmmA, xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + movdqa xmmF, xmmA + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + mov ecx, SIZEOF_XMMWORD + jmp short .rgb_ycc_cnv +.column_ld32: + test cl, 2*SIZEOF_XMMWORD + mov ecx, SIZEOF_XMMWORD + jz short .rgb_ycc_cnv + movdqa xmmB, xmmA + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] + jmp short .rgb_ycc_cnv + alignx 16, 7 + +.columnloop: + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] + movdqu xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD] + +.rgb_ycc_cnv: + ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + movdqa xmmG, xmmA + pslldq xmmA, 8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) + psrldq xmmG, 8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) + + punpckhbw xmmA, xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) + pslldq xmmF, 8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) + + punpcklbw xmmG, xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) + punpckhbw xmmF, xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) + + movdqa xmmD, xmmA + pslldq xmmA, 8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) + psrldq xmmD, 8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) + + punpckhbw xmmA, xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) + pslldq xmmG, 8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) + + punpcklbw xmmD, xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) + punpckhbw xmmG, xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) + + movdqa xmmE, xmmA + pslldq xmmA, 8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) + psrldq xmmE, 8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) + + punpckhbw xmmA, xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + pslldq xmmD, 8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) + + punpcklbw xmmE, xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD, xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) + + pxor xmmH, xmmH + + movdqa xmmC, xmmA + punpcklbw xmmA, xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC, xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmB, xmmE + punpcklbw xmmE, xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmB, xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) + + movdqa xmmF, xmmD + punpcklbw xmmD, xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) + punpckhbw xmmF, xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub ecx, byte SIZEOF_XMMWORD/16 + movd xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub ecx, byte SIZEOF_XMMWORD/8 + movq xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE] + pslldq xmmA, SIZEOF_MMWORD + por xmmA, xmmE +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub ecx, byte SIZEOF_XMMWORD/4 + movdqa xmmE, xmmA + movdqu xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + mov ecx, SIZEOF_XMMWORD + jz short .rgb_ycc_cnv + movdqa xmmF, xmmA + movdqa xmmH, xmmE + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] + jmp short .rgb_ycc_cnv + alignx 16, 7 + +.columnloop: + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD] + movdqu xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD] + +.rgb_ycc_cnv: + ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + movdqa xmmD, xmmA + punpcklbw xmmA, xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) + punpckhbw xmmD, xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) + + movdqa xmmC, xmmF + punpcklbw xmmF, xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) + punpckhbw xmmC, xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) + + movdqa xmmB, xmmA + punpcklwd xmmA, xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) + punpckhwd xmmB, xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) + + movdqa xmmG, xmmD + punpcklwd xmmD, xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) + punpckhwd xmmG, xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) + + movdqa xmmE, xmmA + punpcklbw xmmA, xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + punpckhbw xmmE, xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) + + movdqa xmmH, xmmB + punpcklbw xmmB, xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) + punpckhbw xmmH, xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) + + pxor xmmF, xmmF + + movdqa xmmC, xmmA + punpcklbw xmmA, xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC, xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmD, xmmB + punpcklbw xmmB, xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD, xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) + + movdqa xmmG, xmmE + punpcklbw xmmE, xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmG, xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) + + punpcklbw xmmF, xmmH + punpckhbw xmmH, xmmH + psrlw xmmF, BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) + psrlw xmmH, BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE + ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=RE + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=RO + movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=BE + movdqa XMMWORD [wk(3)], xmm5 ; wk(3)=BO + + movdqa xmm6, xmm1 + punpcklwd xmm1, xmm3 + punpckhwd xmm6, xmm3 + movdqa xmm7, xmm1 + movdqa xmm4, xmm6 + pmaddwd xmm1, [GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd xmm6, [GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) + pmaddwd xmm7, [GOTOFF(eax,PW_MF016_MF033)] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) + pmaddwd xmm4, [GOTOFF(eax,PW_MF016_MF033)] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) + + movdqa XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) + movdqa XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) + + pxor xmm1, xmm1 + pxor xmm6, xmm6 + punpcklwd xmm1, xmm5 ; xmm1=BOL + punpckhwd xmm6, xmm5 ; xmm6=BOH + psrld xmm1, 1 ; xmm1=BOL*FIX(0.500) + psrld xmm6, 1 ; xmm6=BOH*FIX(0.500) + + movdqa xmm5, [GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm5=[PD_ONEHALFM1_CJ] + + paddd xmm7, xmm1 + paddd xmm4, xmm6 + paddd xmm7, xmm5 + paddd xmm4, xmm5 + psrld xmm7, SCALEBITS ; xmm7=CbOL + psrld xmm4, SCALEBITS ; xmm4=CbOH + packssdw xmm7, xmm4 ; xmm7=CbO + + movdqa xmm1, XMMWORD [wk(2)] ; xmm1=BE + + movdqa xmm6, xmm0 + punpcklwd xmm0, xmm2 + punpckhwd xmm6, xmm2 + movdqa xmm5, xmm0 + movdqa xmm4, xmm6 + pmaddwd xmm0, [GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd xmm6, [GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) + pmaddwd xmm5, [GOTOFF(eax,PW_MF016_MF033)] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331) + pmaddwd xmm4, [GOTOFF(eax,PW_MF016_MF033)] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331) + + movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) + movdqa XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) + + pxor xmm0, xmm0 + pxor xmm6, xmm6 + punpcklwd xmm0, xmm1 ; xmm0=BEL + punpckhwd xmm6, xmm1 ; xmm6=BEH + psrld xmm0, 1 ; xmm0=BEL*FIX(0.500) + psrld xmm6, 1 ; xmm6=BEH*FIX(0.500) + + movdqa xmm1, [GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ] + + paddd xmm5, xmm0 + paddd xmm4, xmm6 + paddd xmm5, xmm1 + paddd xmm4, xmm1 + psrld xmm5, SCALEBITS ; xmm5=CbEL + psrld xmm4, SCALEBITS ; xmm4=CbEH + packssdw xmm5, xmm4 ; xmm5=CbE + + psllw xmm7, BYTE_BIT + por xmm5, xmm7 ; xmm5=Cb + movdqa XMMWORD [ebx], xmm5 ; Save Cb + + movdqa xmm0, XMMWORD [wk(3)] ; xmm0=BO + movdqa xmm6, XMMWORD [wk(2)] ; xmm6=BE + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=RO + + movdqa xmm4, xmm0 + punpcklwd xmm0, xmm3 + punpckhwd xmm4, xmm3 + movdqa xmm7, xmm0 + movdqa xmm5, xmm4 + pmaddwd xmm0, [GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd xmm4, [GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) + pmaddwd xmm7, [GOTOFF(eax,PW_MF008_MF041)] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) + pmaddwd xmm5, [GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) + + movdqa xmm3, [GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF] + + paddd xmm0, XMMWORD [wk(4)] + paddd xmm4, XMMWORD [wk(5)] + paddd xmm0, xmm3 + paddd xmm4, xmm3 + psrld xmm0, SCALEBITS ; xmm0=YOL + psrld xmm4, SCALEBITS ; xmm4=YOH + packssdw xmm0, xmm4 ; xmm0=YO + + pxor xmm3, xmm3 + pxor xmm4, xmm4 + punpcklwd xmm3, xmm1 ; xmm3=ROL + punpckhwd xmm4, xmm1 ; xmm4=ROH + psrld xmm3, 1 ; xmm3=ROL*FIX(0.500) + psrld xmm4, 1 ; xmm4=ROH*FIX(0.500) + + movdqa xmm1, [GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ] + + paddd xmm7, xmm3 + paddd xmm5, xmm4 + paddd xmm7, xmm1 + paddd xmm5, xmm1 + psrld xmm7, SCALEBITS ; xmm7=CrOL + psrld xmm5, SCALEBITS ; xmm5=CrOH + packssdw xmm7, xmm5 ; xmm7=CrO + + movdqa xmm3, XMMWORD [wk(0)] ; xmm3=RE + + movdqa xmm4, xmm6 + punpcklwd xmm6, xmm2 + punpckhwd xmm4, xmm2 + movdqa xmm1, xmm6 + movdqa xmm5, xmm4 + pmaddwd xmm6, [GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd xmm4, [GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) + pmaddwd xmm1, [GOTOFF(eax,PW_MF008_MF041)] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) + pmaddwd xmm5, [GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) + + movdqa xmm2, [GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF] + + paddd xmm6, XMMWORD [wk(6)] + paddd xmm4, XMMWORD [wk(7)] + paddd xmm6, xmm2 + paddd xmm4, xmm2 + psrld xmm6, SCALEBITS ; xmm6=YEL + psrld xmm4, SCALEBITS ; xmm4=YEH + packssdw xmm6, xmm4 ; xmm6=YE + + psllw xmm0, BYTE_BIT + por xmm6, xmm0 ; xmm6=Y + movdqa XMMWORD [edi], xmm6 ; Save Y + + pxor xmm2, xmm2 + pxor xmm4, xmm4 + punpcklwd xmm2, xmm3 ; xmm2=REL + punpckhwd xmm4, xmm3 ; xmm4=REH + psrld xmm2, 1 ; xmm2=REL*FIX(0.500) + psrld xmm4, 1 ; xmm4=REH*FIX(0.500) + + movdqa xmm0, [GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm0=[PD_ONEHALFM1_CJ] + + paddd xmm1, xmm2 + paddd xmm5, xmm4 + paddd xmm1, xmm0 + paddd xmm5, xmm0 + psrld xmm1, SCALEBITS ; xmm1=CrEL + psrld xmm5, SCALEBITS ; xmm5=CrEH + packssdw xmm1, xmm5 ; xmm1=CrE + + psllw xmm7, BYTE_BIT + por xmm1, xmm7 ; xmm1=Cr + movdqa XMMWORD [edx], xmm1 ; Save Cr + + sub ecx, byte SIZEOF_XMMWORD + add esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr + add edi, byte SIZEOF_XMMWORD ; outptr0 + add ebx, byte SIZEOF_XMMWORD ; outptr1 + add edx, byte SIZEOF_XMMWORD ; outptr2 + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + test ecx, ecx + jnz near .column_ld1 + + pop ecx ; col + pop esi + pop edi + pop ebx + pop edx + poppic eax + + add esi, byte SIZEOF_JSAMPROW ; input_buf + add edi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + dec eax ; num_rows + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jccolor-avx2.asm b/simd/i386/jccolor-avx2.asm new file mode 100644 index 0000000..958517f --- /dev/null +++ b/simd/i386/jccolor-avx2.asm @@ -0,0 +1,123 @@ +; +; jccolor.asm - colorspace conversion (AVX2) +; +; Copyright (C) 2009, 2016, D. R. Commander. +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_081 equ 5329 ; FIX(0.08131) +F_0_114 equ 7471 ; FIX(0.11400) +F_0_168 equ 11059 ; FIX(0.16874) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_331 equ 21709 ; FIX(0.33126) +F_0_418 equ 27439 ; FIX(0.41869) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_rgb_ycc_convert_avx2) + +EXTN(jconst_rgb_ycc_convert_avx2): + +PW_F0299_F0337 times 8 dw F_0_299, F_0_337 +PW_F0114_F0250 times 8 dw F_0_114, F_0_250 +PW_MF016_MF033 times 8 dw -F_0_168, -F_0_331 +PW_MF008_MF041 times 8 dw -F_0_081, -F_0_418 +PD_ONEHALFM1_CJ times 8 dd (1 << (SCALEBITS - 1)) - 1 + \ + (CENTERJSAMPLE << SCALEBITS) +PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1)) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jccolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_avx2 jsimd_extrgb_ycc_convert_avx2 +%include "jccolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_ycc_convert_avx2 jsimd_extrgbx_ycc_convert_avx2 +%include "jccolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_avx2 jsimd_extbgr_ycc_convert_avx2 +%include "jccolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_ycc_convert_avx2 jsimd_extbgrx_ycc_convert_avx2 +%include "jccolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_avx2 jsimd_extxbgr_ycc_convert_avx2 +%include "jccolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_avx2 jsimd_extxrgb_ycc_convert_avx2 +%include "jccolext-avx2.asm" diff --git a/simd/i386/jccolor-mmx.asm b/simd/i386/jccolor-mmx.asm new file mode 100644 index 0000000..47be9e1 --- /dev/null +++ b/simd/i386/jccolor-mmx.asm @@ -0,0 +1,123 @@ +; +; jccolor.asm - colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_081 equ 5329 ; FIX(0.08131) +F_0_114 equ 7471 ; FIX(0.11400) +F_0_168 equ 11059 ; FIX(0.16874) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_331 equ 21709 ; FIX(0.33126) +F_0_418 equ 27439 ; FIX(0.41869) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_rgb_ycc_convert_mmx) + +EXTN(jconst_rgb_ycc_convert_mmx): + +PW_F0299_F0337 times 2 dw F_0_299, F_0_337 +PW_F0114_F0250 times 2 dw F_0_114, F_0_250 +PW_MF016_MF033 times 2 dw -F_0_168, -F_0_331 +PW_MF008_MF041 times 2 dw -F_0_081, -F_0_418 +PD_ONEHALFM1_CJ times 2 dd (1 << (SCALEBITS - 1)) - 1 + \ + (CENTERJSAMPLE << SCALEBITS) +PD_ONEHALF times 2 dd (1 << (SCALEBITS - 1)) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extrgb_ycc_convert_mmx +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extrgbx_ycc_convert_mmx +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extbgr_ycc_convert_mmx +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extbgrx_ycc_convert_mmx +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extxbgr_ycc_convert_mmx +%include "jccolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_mmx jsimd_extxrgb_ycc_convert_mmx +%include "jccolext-mmx.asm" diff --git a/simd/i386/jccolor-sse2.asm b/simd/i386/jccolor-sse2.asm new file mode 100644 index 0000000..c0d5d45 --- /dev/null +++ b/simd/i386/jccolor-sse2.asm @@ -0,0 +1,122 @@ +; +; jccolor.asm - colorspace conversion (SSE2) +; +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_081 equ 5329 ; FIX(0.08131) +F_0_114 equ 7471 ; FIX(0.11400) +F_0_168 equ 11059 ; FIX(0.16874) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_331 equ 21709 ; FIX(0.33126) +F_0_418 equ 27439 ; FIX(0.41869) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_rgb_ycc_convert_sse2) + +EXTN(jconst_rgb_ycc_convert_sse2): + +PW_F0299_F0337 times 4 dw F_0_299, F_0_337 +PW_F0114_F0250 times 4 dw F_0_114, F_0_250 +PW_MF016_MF033 times 4 dw -F_0_168, -F_0_331 +PW_MF008_MF041 times 4 dw -F_0_081, -F_0_418 +PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS - 1)) - 1 + \ + (CENTERJSAMPLE << SCALEBITS) +PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1)) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2 +%include "jccolext-sse2.asm" diff --git a/simd/i386/jcgray-avx2.asm b/simd/i386/jcgray-avx2.asm new file mode 100644 index 0000000..4d66242 --- /dev/null +++ b/simd/i386/jcgray-avx2.asm @@ -0,0 +1,115 @@ +; +; jcgray.asm - grayscale colorspace conversion (AVX2) +; +; Copyright (C) 2011, 2016, D. R. Commander. +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_114 equ 7471 ; FIX(0.11400) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_rgb_gray_convert_avx2) + +EXTN(jconst_rgb_gray_convert_avx2): + +PW_F0299_F0337 times 8 dw F_0_299, F_0_337 +PW_F0114_F0250 times 8 dw F_0_114, F_0_250 +PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1)) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jcgryext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_gray_convert_avx2 jsimd_extrgb_gray_convert_avx2 +%include "jcgryext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_gray_convert_avx2 jsimd_extrgbx_gray_convert_avx2 +%include "jcgryext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_gray_convert_avx2 jsimd_extbgr_gray_convert_avx2 +%include "jcgryext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_gray_convert_avx2 jsimd_extbgrx_gray_convert_avx2 +%include "jcgryext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_gray_convert_avx2 jsimd_extxbgr_gray_convert_avx2 +%include "jcgryext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_gray_convert_avx2 jsimd_extxrgb_gray_convert_avx2 +%include "jcgryext-avx2.asm" diff --git a/simd/i386/jcgray-mmx.asm b/simd/i386/jcgray-mmx.asm new file mode 100644 index 0000000..07c7ea6 --- /dev/null +++ b/simd/i386/jcgray-mmx.asm @@ -0,0 +1,115 @@ +; +; jcgray.asm - grayscale colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2011, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_114 equ 7471 ; FIX(0.11400) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_rgb_gray_convert_mmx) + +EXTN(jconst_rgb_gray_convert_mmx): + +PW_F0299_F0337 times 2 dw F_0_299, F_0_337 +PW_F0114_F0250 times 2 dw F_0_114, F_0_250 +PD_ONEHALF times 2 dd (1 << (SCALEBITS - 1)) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extrgb_gray_convert_mmx +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extrgbx_gray_convert_mmx +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extbgr_gray_convert_mmx +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extbgrx_gray_convert_mmx +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extxbgr_gray_convert_mmx +%include "jcgryext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_gray_convert_mmx jsimd_extxrgb_gray_convert_mmx +%include "jcgryext-mmx.asm" diff --git a/simd/i386/jcgray-sse2.asm b/simd/i386/jcgray-sse2.asm new file mode 100644 index 0000000..4b8c797 --- /dev/null +++ b/simd/i386/jcgray-sse2.asm @@ -0,0 +1,114 @@ +; +; jcgray.asm - grayscale colorspace conversion (SSE2) +; +; Copyright (C) 2011, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_114 equ 7471 ; FIX(0.11400) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_rgb_gray_convert_sse2) + +EXTN(jconst_rgb_gray_convert_sse2): + +PW_F0299_F0337 times 4 dw F_0_299, F_0_337 +PW_F0114_F0250 times 4 dw F_0_114, F_0_250 +PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1)) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extrgb_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extrgbx_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extbgr_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extbgrx_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extxbgr_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extxrgb_gray_convert_sse2 +%include "jcgryext-sse2.asm" diff --git a/simd/i386/jcgryext-avx2.asm b/simd/i386/jcgryext-avx2.asm new file mode 100644 index 0000000..52e99a8 --- /dev/null +++ b/simd/i386/jcgryext-avx2.asm @@ -0,0 +1,459 @@ +; +; jcgryext.asm - grayscale colorspace conversion (AVX2) +; +; Copyright (C) 2011, 2016, D. R. Commander. +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_gray_convert_avx2(JDIMENSION img_width, JSAMPARRAY input_buf, +; JSAMPIMAGE output_buf, JDIMENSION output_row, +; int num_rows); +; + +%define img_width(b) (b) + 8 ; JDIMENSION img_width +%define input_buf(b) (b) + 12 ; JSAMPARRAY input_buf +%define output_buf(b) (b) + 16 ; JSAMPIMAGE output_buf +%define output_row(b) (b) + 20 ; JDIMENSION output_row +%define num_rows(b) (b) + 24 ; int num_rows + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_YMMWORD + ; ymmword wk[WK_NUM] +%define WK_NUM 2 +%define gotptr wk(0) - SIZEOF_POINTER ; void * gotptr + + align 32 + GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2) + +EXTN(jsimd_rgb_gray_convert_avx2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_YMMWORD) ; align to 256 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [img_width(eax)] + test ecx, ecx + jz near .return + + push ecx + + mov esi, JSAMPIMAGE [output_buf(eax)] + mov ecx, JDIMENSION [output_row(eax)] + mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] + lea edi, [edi+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov esi, JSAMPARRAY [input_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax, eax + jle near .return + alignx 16, 7 +.rowloop: + pushpic eax + push edi + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr0 + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + cmp ecx, byte SIZEOF_YMMWORD + jae near .columnloop + alignx 16, 7 + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push eax + push edx + lea ecx, [ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub ecx, byte SIZEOF_BYTE + movzx eax, BYTE [esi+ecx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub ecx, byte SIZEOF_WORD + movzx edx, WORD [esi+ecx] + shl eax, WORD_BIT + or eax, edx +.column_ld4: + vmovd xmmA, eax + pop edx + pop eax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub ecx, byte SIZEOF_DWORD + vmovd xmmF, XMM_DWORD [esi+ecx] + vpslldq xmmA, xmmA, SIZEOF_DWORD + vpor xmmA, xmmA, xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub ecx, byte SIZEOF_MMWORD + vmovq xmmB, XMM_MMWORD [esi+ecx] + vpslldq xmmA, xmmA, SIZEOF_MMWORD + vpor xmmA, xmmA, xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + sub ecx, byte SIZEOF_XMMWORD + vmovdqu xmmB, XMM_MMWORD [esi+ecx] + vperm2i128 ymmA, ymmA, ymmA, 1 + vpor ymmA, ymmB +.column_ld32: + test cl, SIZEOF_YMMWORD + jz short .column_ld64 + sub ecx, byte SIZEOF_YMMWORD + vmovdqa ymmF, ymmA + vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD] +.column_ld64: + test cl, 2*SIZEOF_YMMWORD + mov ecx, SIZEOF_YMMWORD + jz short .rgb_gray_cnv + vmovdqa ymmB, ymmA + vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD] + vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD] + jmp short .rgb_gray_cnv + alignx 16, 7 + +.columnloop: + vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD] + vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD] + vmovdqu ymmB, YMMWORD [esi+2*SIZEOF_YMMWORD] + +.rgb_gray_cnv: + ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05 + ; 15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; ymmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F + ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L) + ; ymmB=(1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q + ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V) + + vmovdqu ymmC, ymmA + vinserti128 ymmA, ymmF, xmmA, 0 ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05 + ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L) + vinserti128 ymmC, ymmC, xmmB, 0 ; ymmC=(1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q + ; 15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + vinserti128 ymmB, ymmB, xmmF, 0 ; ymmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F + ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V) + vperm2i128 ymmF, ymmC, ymmC, 1 ; ymmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A + ; 1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q) + + vmovdqa ymmG, ymmA + vpslldq ymmA, ymmA, 8 ; ymmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12 + ; 22 03 13 23 04 14 24 05 0G 1G 2G 0H 1H 2H 0I 1I) + vpsrldq ymmG, ymmG, 8 ; ymmG=(22 03 13 23 04 14 24 05 0G 1G 2G 0H 1H 2H 0I 1I + ; 2I 0J 1J 2J 0K 1K 2K 0L -- -- -- -- -- -- -- --) + + vpunpckhbw ymmA, ymmA, ymmF ; ymmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A + ; 0G 0O 1G 1O 2G 2O 0H 0P 1H 1P 2H 2P 0I 0Q 1I 1Q) + vpslldq ymmF, ymmF, 8 ; ymmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27 + ; 08 18 28 09 19 29 0A 1A 1L 2L 0M 1M 2M 0N 1N 2N) + + vpunpcklbw ymmG, ymmG, ymmB ; ymmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D + ; 2I 2Q 0J 0R 1J 1R 2J 2R 0K 0S 1K 1S 2K 2S 0L 0T) + vpunpckhbw ymmF, ymmF, ymmB ; ymmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F + ; 1L 1T 2L 2T 0M 0U 1M 1U 2M 2U 0N 0V 1N 1V 2N 2V) + + vmovdqa ymmD, ymmA + vpslldq ymmA, ymmA, 8 ; ymmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09 + ; 11 19 21 29 02 0A 12 1A 0G 0O 1G 1O 2G 2O 0H 0P) + vpsrldq ymmD, ymmD, 8 ; ymmD=(11 19 21 29 02 0A 12 1A 0G 0O 1G 1O 2G 2O 0H 0P + ; 1H 1P 2H 2P 0I 0Q 1I 1Q -- -- -- -- -- -- -- --) + + vpunpckhbw ymmA, ymmA, ymmG ; ymmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D + ; 0G 0K 0O 0S 1G 1K 1O 1S 2G 2K 2O 2S 0H 0L 0P 0T) + vpslldq ymmG, ymmG, 8 ; ymmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B + ; 04 0C 14 1C 24 2C 05 0D 2I 2Q 0J 0R 1J 1R 2J 2R) + + vpunpcklbw ymmD, ymmD, ymmF ; ymmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E + ; 1H 1L 1P 1T 2H 2L 2P 2T 0I 0M 0Q 0U 1I 1M 1Q 1U) + vpunpckhbw ymmG, ymmG, ymmF ; ymmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F + ; 2I 2M 2Q 2U 0J 0N 0R 0V 1J 1N 1R 1V 2J 2N 2R 2V) + + vmovdqa ymmE, ymmA + vpslldq ymmA, ymmA, 8 ; ymmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C + ; 20 24 28 2C 01 05 09 0D 0G 0K 0O 0S 1G 1K 1O 1S) + vpsrldq ymmE, ymmE, 8 ; ymmE=(20 24 28 2C 01 05 09 0D 0G 0K 0O 0S 1G 1K 1O 1S + ; 2G 2K 2O 2S 0H 0L 0P 0T -- -- -- -- -- -- -- --) + + vpunpckhbw ymmA, ymmA, ymmD ; ymmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E + ; 0G 0I 0K 0M 0O 0Q 0S 0U 1G 1I 1K 1M 1O 1Q 1S 1U) + vpslldq ymmD, ymmD, 8 ; ymmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D + ; 02 06 0A 0E 12 16 1A 1E 1H 1L 1P 1T 2H 2L 2P 2T) + + vpunpcklbw ymmE, ymmE, ymmG ; ymmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F + ; 2G 2I 2K 2M 2O 2Q 2S 2U 0H 0J 0L 0N 0P 0R 0T 0V) + vpunpckhbw ymmD, ymmD, ymmG ; ymmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F + ; 1H 1J 1L 1N 1P 1R 1T 1V 2H 2J 2L 2N 2P 2R 2T 2V) + + vpxor ymmH, ymmH, ymmH + + vmovdqa ymmC, ymmA + vpunpcklbw ymmA, ymmA, ymmH ; ymmA=(00 02 04 06 08 0A 0C 0E 0G 0I 0K 0M 0O 0Q 0S 0U) + vpunpckhbw ymmC, ymmC, ymmH ; ymmC=(10 12 14 16 18 1A 1C 1E 1G 1I 1K 1M 1O 1Q 1S 1U) + + vmovdqa ymmB, ymmE + vpunpcklbw ymmE, ymmE, ymmH ; ymmE=(20 22 24 26 28 2A 2C 2E 2G 2I 2K 2M 2O 2Q 2S 2U) + vpunpckhbw ymmB, ymmB, ymmH ; ymmB=(01 03 05 07 09 0B 0D 0F 0H 0J 0L 0N 0P 0R 0T 0V) + + vmovdqa ymmF, ymmD + vpunpcklbw ymmD, ymmD, ymmH ; ymmD=(11 13 15 17 19 1B 1D 1F 1H 1J 1L 1N 1P 1R 1T 1V) + vpunpckhbw ymmF, ymmF, ymmH ; ymmF=(21 23 25 27 29 2B 2D 2F 2H 2J 2L 2N 2P 2R 2T 2V) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub ecx, byte SIZEOF_XMMWORD/16 + vmovd xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub ecx, byte SIZEOF_XMMWORD/8 + vmovq xmmF, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE] + vpslldq xmmA, xmmA, SIZEOF_MMWORD + vpor xmmA, xmmA, xmmF +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub ecx, byte SIZEOF_XMMWORD/4 + vmovdqa xmmF, xmmA + vperm2i128 ymmF, ymmF, ymmF, 1 + vmovdqu xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE] + vpor ymmA, ymmA, ymmF +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + jz short .column_ld16 + sub ecx, byte SIZEOF_XMMWORD/2 + vmovdqa ymmF, ymmA + vmovdqu ymmA, YMMWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld16: + test cl, SIZEOF_XMMWORD + mov ecx, SIZEOF_YMMWORD + jz short .rgb_gray_cnv + vmovdqa ymmE, ymmA + vmovdqa ymmH, ymmF + vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD] + vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD] + jmp short .rgb_gray_cnv + alignx 16, 7 + +.columnloop: + vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD] + vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD] + vmovdqu ymmE, YMMWORD [esi+2*SIZEOF_YMMWORD] + vmovdqu ymmH, YMMWORD [esi+3*SIZEOF_YMMWORD] + +.rgb_gray_cnv: + ; ymmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ; 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; ymmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + ; 0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + ; ymmE=(0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J + ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N) + ; ymmH=(0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R + ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V) + + vmovdqa ymmB, ymmA + vinserti128 ymmA, ymmA, xmmE, 1 ; ymmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ; 0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J) + vperm2i128 ymmE, ymmB, ymmE, 0x31 ; ymmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 + ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N) + + vmovdqa ymmB, ymmF + vinserti128 ymmF, ymmF, xmmH, 1 ; ymmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + ; 0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R) + vperm2i128 ymmH, ymmB, ymmH, 0x31 ; ymmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F + ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V) + + vmovdqa ymmD, ymmA + vpunpcklbw ymmA, ymmA, ymmE ; ymmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35 + ; 0G 0K 1G 1K 2G 2K 3G 3K 0H 0L 1H 1L 2H 2L 3H 3L) + vpunpckhbw ymmD, ymmD, ymmE ; ymmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37 + ; 0I 0M 1I 1M 2I 2M 3I 3M 0J 0N 1J 1N 2J 2N 3J 3N) + + vmovdqa ymmC, ymmF + vpunpcklbw ymmF, ymmF, ymmH ; ymmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D + ; 0O 0S 1O 1S 2O 2S 3O 3S 0P 0T 1P 1T 2P 2T 3P 3T) + vpunpckhbw ymmC, ymmC, ymmH ; ymmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F + ; 0Q 0U 1Q 1U 2Q 2U 3Q 3U 0R 0V 1R 1V 2R 2V 3R 3V) + + vmovdqa ymmB, ymmA + vpunpcklwd ymmA, ymmA, ymmF ; ymmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C + ; 0G 0K 0O 0S 1G 1K 1O 1S 2G 2K 2O 2S 3G 3K 3O 3S) + vpunpckhwd ymmB, ymmB, ymmF ; ymmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D + ; 0H 0L 0P 0T 1H 1L 1P 1T 2H 2L 2P 2T 3H 3L 3P 3T) + + vmovdqa ymmG, ymmD + vpunpcklwd ymmD, ymmD, ymmC ; ymmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E + ; 0I 0M 0Q 0U 1I 1M 1Q 1U 2I 2M 2Q 2U 3I 3M 3Q 3U) + vpunpckhwd ymmG, ymmG, ymmC ; ymmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F + ; 0J 0N 0R 0V 1J 1N 1R 1V 2J 2N 2R 2V 3J 3N 3R 3V) + + vmovdqa ymmE, ymmA + vpunpcklbw ymmA, ymmA, ymmD ; ymmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E + ; 0G 0I 0K 0M 0O 0Q 0S 0U 1G 1I 1K 1M 1O 1Q 1S 1U) + vpunpckhbw ymmE, ymmE, ymmD ; ymmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E + ; 2G 2I 2K 2M 2O 2Q 2S 2U 3G 3I 3K 3M 3O 3Q 3S 3U) + + vmovdqa ymmH, ymmB + vpunpcklbw ymmB, ymmB, ymmG ; ymmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F + ; 0H 0J 0L 0N 0P 0R 0T 0V 1H 1J 1L 1N 1P 1R 1T 1V) + vpunpckhbw ymmH, ymmH, ymmG ; ymmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F + ; 2H 2J 2L 2N 2P 2R 2T 2V 3H 3J 3L 3N 3P 3R 3T 3V) + + vpxor ymmF, ymmF, ymmF + + vmovdqa ymmC, ymmA + vpunpcklbw ymmA, ymmA, ymmF ; ymmA=(00 02 04 06 08 0A 0C 0E 0G 0I 0K 0M 0O 0Q 0S 0U) + vpunpckhbw ymmC, ymmC, ymmF ; ymmC=(10 12 14 16 18 1A 1C 1E 1G 1I 1K 1M 1O 1Q 1S 1U) + + vmovdqa ymmD, ymmB + vpunpcklbw ymmB, ymmB, ymmF ; ymmB=(01 03 05 07 09 0B 0D 0F 0H 0J 0L 0N 0P 0R 0T 0V) + vpunpckhbw ymmD, ymmD, ymmF ; ymmD=(11 13 15 17 19 1B 1D 1F 1H 1J 1L 1N 1P 1R 1T 1V) + + vmovdqa ymmG, ymmE + vpunpcklbw ymmE, ymmE, ymmF ; ymmE=(20 22 24 26 28 2A 2C 2E 2G 2I 2K 2M 2O 2Q 2S 2U) + vpunpckhbw ymmG, ymmG, ymmF ; ymmG=(30 32 34 36 38 3A 3C 3E 3G 3I 3K 3M 3O 3Q 3S 3U) + + vpunpcklbw ymmF, ymmF, ymmH + vpunpckhbw ymmH, ymmH, ymmH + vpsrlw ymmF, ymmF, BYTE_BIT ; ymmF=(21 23 25 27 29 2B 2D 2F 2H 2J 2L 2N 2P 2R 2T 2V) + vpsrlw ymmH, ymmH, BYTE_BIT ; ymmH=(31 33 35 37 39 3B 3D 3F 3H 3J 3L 3N 3P 3R 3T 3V) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; ymm0=R(02468ACEGIKMOQSU)=RE, ymm2=G(02468ACEGIKMOQSU)=GE, ymm4=B(02468ACEGIKMOQSU)=BE + ; ymm1=R(13579BDFHJLNPRTV)=RO, ymm3=G(13579BDFHJLNPRTV)=GO, ymm5=B(13579BDFHJLNPRTV)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + + vmovdqa ymm6, ymm1 + vpunpcklwd ymm1, ymm1, ymm3 + vpunpckhwd ymm6, ymm6, ymm3 + vpmaddwd ymm1, ymm1, [GOTOFF(eax,PW_F0299_F0337)] ; ymm1=ROL*FIX(0.299)+GOL*FIX(0.337) + vpmaddwd ymm6, ymm6, [GOTOFF(eax,PW_F0299_F0337)] ; ymm6=ROH*FIX(0.299)+GOH*FIX(0.337) + + vmovdqa ymm7, ymm6 ; ymm7=ROH*FIX(0.299)+GOH*FIX(0.337) + + vmovdqa ymm6, ymm0 + vpunpcklwd ymm0, ymm0, ymm2 + vpunpckhwd ymm6, ymm6, ymm2 + vpmaddwd ymm0, ymm0, [GOTOFF(eax,PW_F0299_F0337)] ; ymm0=REL*FIX(0.299)+GEL*FIX(0.337) + vpmaddwd ymm6, ymm6, [GOTOFF(eax,PW_F0299_F0337)] ; ymm6=REH*FIX(0.299)+GEH*FIX(0.337) + + vmovdqa YMMWORD [wk(0)], ymm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337) + vmovdqa YMMWORD [wk(1)], ymm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337) + + vmovdqa ymm0, ymm5 ; ymm0=BO + vmovdqa ymm6, ymm4 ; ymm6=BE + + vmovdqa ymm4, ymm0 + vpunpcklwd ymm0, ymm0, ymm3 + vpunpckhwd ymm4, ymm4, ymm3 + vpmaddwd ymm0, ymm0, [GOTOFF(eax,PW_F0114_F0250)] ; ymm0=BOL*FIX(0.114)+GOL*FIX(0.250) + vpmaddwd ymm4, ymm4, [GOTOFF(eax,PW_F0114_F0250)] ; ymm4=BOH*FIX(0.114)+GOH*FIX(0.250) + + vmovdqa ymm3, [GOTOFF(eax,PD_ONEHALF)] ; ymm3=[PD_ONEHALF] + + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm4, ymm4, ymm7 + vpaddd ymm0, ymm0, ymm3 + vpaddd ymm4, ymm4, ymm3 + vpsrld ymm0, ymm0, SCALEBITS ; ymm0=YOL + vpsrld ymm4, ymm4, SCALEBITS ; ymm4=YOH + vpackssdw ymm0, ymm0, ymm4 ; ymm0=YO + + vmovdqa ymm4, ymm6 + vpunpcklwd ymm6, ymm6, ymm2 + vpunpckhwd ymm4, ymm4, ymm2 + vpmaddwd ymm6, ymm6, [GOTOFF(eax,PW_F0114_F0250)] ; ymm6=BEL*FIX(0.114)+GEL*FIX(0.250) + vpmaddwd ymm4, ymm4, [GOTOFF(eax,PW_F0114_F0250)] ; ymm4=BEH*FIX(0.114)+GEH*FIX(0.250) + + vmovdqa ymm2, [GOTOFF(eax,PD_ONEHALF)] ; ymm2=[PD_ONEHALF] + + vpaddd ymm6, ymm6, YMMWORD [wk(0)] + vpaddd ymm4, ymm4, YMMWORD [wk(1)] + vpaddd ymm6, ymm6, ymm2 + vpaddd ymm4, ymm4, ymm2 + vpsrld ymm6, ymm6, SCALEBITS ; ymm6=YEL + vpsrld ymm4, ymm4, SCALEBITS ; ymm4=YEH + vpackssdw ymm6, ymm6, ymm4 ; ymm6=YE + + vpsllw ymm0, ymm0, BYTE_BIT + vpor ymm6, ymm6, ymm0 ; ymm6=Y + vmovdqu YMMWORD [edi], ymm6 ; Save Y + + sub ecx, byte SIZEOF_YMMWORD + add esi, RGB_PIXELSIZE*SIZEOF_YMMWORD ; inptr + add edi, byte SIZEOF_YMMWORD ; outptr0 + cmp ecx, byte SIZEOF_YMMWORD + jae near .columnloop + test ecx, ecx + jnz near .column_ld1 + + pop ecx ; col + pop esi + pop edi + poppic eax + + add esi, byte SIZEOF_JSAMPROW ; input_buf + add edi, byte SIZEOF_JSAMPROW + dec eax ; num_rows + jg near .rowloop + +.return: + vzeroupper + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jcgryext-mmx.asm b/simd/i386/jcgryext-mmx.asm new file mode 100644 index 0000000..4a9ab0d --- /dev/null +++ b/simd/i386/jcgryext-mmx.asm @@ -0,0 +1,357 @@ +; +; jcgryext.asm - grayscale colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2011, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_gray_convert_mmx(JDIMENSION img_width, JSAMPARRAY input_buf, +; JSAMPIMAGE output_buf, JDIMENSION output_row, +; int num_rows); +; + +%define img_width(b) (b) + 8 ; JDIMENSION img_width +%define input_buf(b) (b) + 12 ; JSAMPARRAY input_buf +%define output_buf(b) (b) + 16 ; JSAMPIMAGE output_buf +%define output_row(b) (b) + 20 ; JDIMENSION output_row +%define num_rows(b) (b) + 24 ; int num_rows + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_MMWORD + ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define gotptr wk(0) - SIZEOF_POINTER ; void * gotptr + + align 32 + GLOBAL_FUNCTION(jsimd_rgb_gray_convert_mmx) + +EXTN(jsimd_rgb_gray_convert_mmx): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [img_width(eax)] ; num_cols + test ecx, ecx + jz near .return + + push ecx + + mov esi, JSAMPIMAGE [output_buf(eax)] + mov ecx, JDIMENSION [output_row(eax)] + mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] + lea edi, [edi+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov esi, JSAMPARRAY [input_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax, eax + jle near .return + alignx 16, 7 +.rowloop: + pushpic eax + push edi + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr0 + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + cmp ecx, byte SIZEOF_MMWORD + jae short .columnloop + alignx 16, 7 + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push eax + push edx + lea ecx, [ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub ecx, byte SIZEOF_BYTE + xor eax, eax + mov al, BYTE [esi+ecx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub ecx, byte SIZEOF_WORD + xor edx, edx + mov dx, WORD [esi+ecx] + shl eax, WORD_BIT + or eax, edx +.column_ld4: + movd mmA, eax + pop edx + pop eax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub ecx, byte SIZEOF_DWORD + movd mmG, DWORD [esi+ecx] + psllq mmA, DWORD_BIT + por mmA, mmG +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + movq mmG, mmA + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + mov ecx, SIZEOF_MMWORD + jmp short .rgb_gray_cnv +.column_ld16: + test cl, 2*SIZEOF_MMWORD + mov ecx, SIZEOF_MMWORD + jz short .rgb_gray_cnv + movq mmF, mmA + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] + jmp short .rgb_gray_cnv + alignx 16, 7 + +.columnloop: + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+2*SIZEOF_MMWORD] + +.rgb_gray_cnv: + ; mmA=(00 10 20 01 11 21 02 12) + ; mmG=(22 03 13 23 04 14 24 05) + ; mmF=(15 25 06 16 26 07 17 27) + + movq mmD, mmA + psllq mmA, 4*BYTE_BIT ; mmA=(-- -- -- -- 00 10 20 01) + psrlq mmD, 4*BYTE_BIT ; mmD=(11 21 02 12 -- -- -- --) + + punpckhbw mmA, mmG ; mmA=(00 04 10 14 20 24 01 05) + psllq mmG, 4*BYTE_BIT ; mmG=(-- -- -- -- 22 03 13 23) + + punpcklbw mmD, mmF ; mmD=(11 15 21 25 02 06 12 16) + punpckhbw mmG, mmF ; mmG=(22 26 03 07 13 17 23 27) + + movq mmE, mmA + psllq mmA, 4*BYTE_BIT ; mmA=(-- -- -- -- 00 04 10 14) + psrlq mmE, 4*BYTE_BIT ; mmE=(20 24 01 05 -- -- -- --) + + punpckhbw mmA, mmD ; mmA=(00 02 04 06 10 12 14 16) + psllq mmD, 4*BYTE_BIT ; mmD=(-- -- -- -- 11 15 21 25) + + punpcklbw mmE, mmG ; mmE=(20 22 24 26 01 03 05 07) + punpckhbw mmD, mmG ; mmD=(11 13 15 17 21 23 25 27) + + pxor mmH, mmH + + movq mmC, mmA + punpcklbw mmA, mmH ; mmA=(00 02 04 06) + punpckhbw mmC, mmH ; mmC=(10 12 14 16) + + movq mmB, mmE + punpcklbw mmE, mmH ; mmE=(20 22 24 26) + punpckhbw mmB, mmH ; mmB=(01 03 05 07) + + movq mmF, mmD + punpcklbw mmD, mmH ; mmD=(11 13 15 17) + punpckhbw mmF, mmH ; mmF=(21 23 25 27) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_MMWORD/8 + jz short .column_ld2 + sub ecx, byte SIZEOF_MMWORD/8 + movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_MMWORD/4 + jz short .column_ld4 + sub ecx, byte SIZEOF_MMWORD/4 + movq mmF, mmA + movq mmA, MMWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld4: + test cl, SIZEOF_MMWORD/2 + mov ecx, SIZEOF_MMWORD + jz short .rgb_gray_cnv + movq mmD, mmA + movq mmC, mmF + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] + jmp short .rgb_gray_cnv + alignx 16, 7 + +.columnloop: + movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] + movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] + movq mmD, MMWORD [esi+2*SIZEOF_MMWORD] + movq mmC, MMWORD [esi+3*SIZEOF_MMWORD] + +.rgb_gray_cnv: + ; mmA=(00 10 20 30 01 11 21 31) + ; mmF=(02 12 22 32 03 13 23 33) + ; mmD=(04 14 24 34 05 15 25 35) + ; mmC=(06 16 26 36 07 17 27 37) + + movq mmB, mmA + punpcklbw mmA, mmF ; mmA=(00 02 10 12 20 22 30 32) + punpckhbw mmB, mmF ; mmB=(01 03 11 13 21 23 31 33) + + movq mmG, mmD + punpcklbw mmD, mmC ; mmD=(04 06 14 16 24 26 34 36) + punpckhbw mmG, mmC ; mmG=(05 07 15 17 25 27 35 37) + + movq mmE, mmA + punpcklwd mmA, mmD ; mmA=(00 02 04 06 10 12 14 16) + punpckhwd mmE, mmD ; mmE=(20 22 24 26 30 32 34 36) + + movq mmH, mmB + punpcklwd mmB, mmG ; mmB=(01 03 05 07 11 13 15 17) + punpckhwd mmH, mmG ; mmH=(21 23 25 27 31 33 35 37) + + pxor mmF, mmF + + movq mmC, mmA + punpcklbw mmA, mmF ; mmA=(00 02 04 06) + punpckhbw mmC, mmF ; mmC=(10 12 14 16) + + movq mmD, mmB + punpcklbw mmB, mmF ; mmB=(01 03 05 07) + punpckhbw mmD, mmF ; mmD=(11 13 15 17) + + movq mmG, mmE + punpcklbw mmE, mmF ; mmE=(20 22 24 26) + punpckhbw mmG, mmF ; mmG=(30 32 34 36) + + punpcklbw mmF, mmH + punpckhbw mmH, mmH + psrlw mmF, BYTE_BIT ; mmF=(21 23 25 27) + psrlw mmH, BYTE_BIT ; mmH=(31 33 35 37) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE + ; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + + movq mm6, mm1 + punpcklwd mm1, mm3 + punpckhwd mm6, mm3 + pmaddwd mm1, [GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd mm6, [GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337) + + movq mm7, mm6 ; mm7=ROH*FIX(0.299)+GOH*FIX(0.337) + + movq mm6, mm0 + punpcklwd mm0, mm2 + punpckhwd mm6, mm2 + pmaddwd mm0, [GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd mm6, [GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337) + + movq MMWORD [wk(0)], mm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337) + movq MMWORD [wk(1)], mm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337) + + movq mm0, mm5 ; mm0=BO + movq mm6, mm4 ; mm6=BE + + movq mm4, mm0 + punpcklwd mm0, mm3 + punpckhwd mm4, mm3 + pmaddwd mm0, [GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd mm4, [GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250) + + movq mm3, [GOTOFF(eax,PD_ONEHALF)] ; mm3=[PD_ONEHALF] + + paddd mm0, mm1 + paddd mm4, mm7 + paddd mm0, mm3 + paddd mm4, mm3 + psrld mm0, SCALEBITS ; mm0=YOL + psrld mm4, SCALEBITS ; mm4=YOH + packssdw mm0, mm4 ; mm0=YO + + movq mm4, mm6 + punpcklwd mm6, mm2 + punpckhwd mm4, mm2 + pmaddwd mm6, [GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd mm4, [GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250) + + movq mm2, [GOTOFF(eax,PD_ONEHALF)] ; mm2=[PD_ONEHALF] + + paddd mm6, MMWORD [wk(0)] + paddd mm4, MMWORD [wk(1)] + paddd mm6, mm2 + paddd mm4, mm2 + psrld mm6, SCALEBITS ; mm6=YEL + psrld mm4, SCALEBITS ; mm4=YEH + packssdw mm6, mm4 ; mm6=YE + + psllw mm0, BYTE_BIT + por mm6, mm0 ; mm6=Y + movq MMWORD [edi], mm6 ; Save Y + + sub ecx, byte SIZEOF_MMWORD + add esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; inptr + add edi, byte SIZEOF_MMWORD ; outptr0 + cmp ecx, byte SIZEOF_MMWORD + jae near .columnloop + test ecx, ecx + jnz near .column_ld1 + + pop ecx ; col + pop esi + pop edi + poppic eax + + add esi, byte SIZEOF_JSAMPROW ; input_buf + add edi, byte SIZEOF_JSAMPROW + dec eax ; num_rows + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jcgryext-sse2.asm b/simd/i386/jcgryext-sse2.asm new file mode 100644 index 0000000..04d891c --- /dev/null +++ b/simd/i386/jcgryext-sse2.asm @@ -0,0 +1,384 @@ +; +; jcgryext.asm - grayscale colorspace conversion (SSE2) +; +; Copyright (C) 2011, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_gray_convert_sse2(JDIMENSION img_width, JSAMPARRAY input_buf, +; JSAMPIMAGE output_buf, JDIMENSION output_row, +; int num_rows); +; + +%define img_width(b) (b) + 8 ; JDIMENSION img_width +%define input_buf(b) (b) + 12 ; JSAMPARRAY input_buf +%define output_buf(b) (b) + 16 ; JSAMPIMAGE output_buf +%define output_row(b) (b) + 20 ; JDIMENSION output_row +%define num_rows(b) (b) + 24 ; int num_rows + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define gotptr wk(0) - SIZEOF_POINTER ; void * gotptr + + align 32 + GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2) + +EXTN(jsimd_rgb_gray_convert_sse2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [img_width(eax)] + test ecx, ecx + jz near .return + + push ecx + + mov esi, JSAMPIMAGE [output_buf(eax)] + mov ecx, JDIMENSION [output_row(eax)] + mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] + lea edi, [edi+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov esi, JSAMPARRAY [input_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax, eax + jle near .return + alignx 16, 7 +.rowloop: + pushpic eax + push edi + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr0 + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + alignx 16, 7 + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push eax + push edx + lea ecx, [ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub ecx, byte SIZEOF_BYTE + movzx eax, BYTE [esi+ecx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub ecx, byte SIZEOF_WORD + movzx edx, WORD [esi+ecx] + shl eax, WORD_BIT + or eax, edx +.column_ld4: + movd xmmA, eax + pop edx + pop eax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub ecx, byte SIZEOF_DWORD + movd xmmF, XMM_DWORD [esi+ecx] + pslldq xmmA, SIZEOF_DWORD + por xmmA, xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub ecx, byte SIZEOF_MMWORD + movq xmmB, XMM_MMWORD [esi+ecx] + pslldq xmmA, SIZEOF_MMWORD + por xmmA, xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + movdqa xmmF, xmmA + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + mov ecx, SIZEOF_XMMWORD + jmp short .rgb_gray_cnv +.column_ld32: + test cl, 2*SIZEOF_XMMWORD + mov ecx, SIZEOF_XMMWORD + jz short .rgb_gray_cnv + movdqa xmmB, xmmA + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] + jmp short .rgb_gray_cnv + alignx 16, 7 + +.columnloop: + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] + movdqu xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD] + +.rgb_gray_cnv: + ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + movdqa xmmG, xmmA + pslldq xmmA, 8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) + psrldq xmmG, 8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) + + punpckhbw xmmA, xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) + pslldq xmmF, 8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) + + punpcklbw xmmG, xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) + punpckhbw xmmF, xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) + + movdqa xmmD, xmmA + pslldq xmmA, 8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) + psrldq xmmD, 8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) + + punpckhbw xmmA, xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) + pslldq xmmG, 8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) + + punpcklbw xmmD, xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) + punpckhbw xmmG, xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) + + movdqa xmmE, xmmA + pslldq xmmA, 8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) + psrldq xmmE, 8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) + + punpckhbw xmmA, xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + pslldq xmmD, 8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) + + punpcklbw xmmE, xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD, xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) + + pxor xmmH, xmmH + + movdqa xmmC, xmmA + punpcklbw xmmA, xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC, xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmB, xmmE + punpcklbw xmmE, xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmB, xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) + + movdqa xmmF, xmmD + punpcklbw xmmD, xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) + punpckhbw xmmF, xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub ecx, byte SIZEOF_XMMWORD/16 + movd xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub ecx, byte SIZEOF_XMMWORD/8 + movq xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE] + pslldq xmmA, SIZEOF_MMWORD + por xmmA, xmmE +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub ecx, byte SIZEOF_XMMWORD/4 + movdqa xmmE, xmmA + movdqu xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE] +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + mov ecx, SIZEOF_XMMWORD + jz short .rgb_gray_cnv + movdqa xmmF, xmmA + movdqa xmmH, xmmE + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] + jmp short .rgb_gray_cnv + alignx 16, 7 + +.columnloop: + movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD] + movdqu xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD] + +.rgb_gray_cnv: + ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + movdqa xmmD, xmmA + punpcklbw xmmA, xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) + punpckhbw xmmD, xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) + + movdqa xmmC, xmmF + punpcklbw xmmF, xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) + punpckhbw xmmC, xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) + + movdqa xmmB, xmmA + punpcklwd xmmA, xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) + punpckhwd xmmB, xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) + + movdqa xmmG, xmmD + punpcklwd xmmD, xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) + punpckhwd xmmG, xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) + + movdqa xmmE, xmmA + punpcklbw xmmA, xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + punpckhbw xmmE, xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) + + movdqa xmmH, xmmB + punpcklbw xmmB, xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) + punpckhbw xmmH, xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) + + pxor xmmF, xmmF + + movdqa xmmC, xmmA + punpcklbw xmmA, xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC, xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmD, xmmB + punpcklbw xmmB, xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD, xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) + + movdqa xmmG, xmmE + punpcklbw xmmE, xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmG, xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) + + punpcklbw xmmF, xmmH + punpckhbw xmmH, xmmH + psrlw xmmF, BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) + psrlw xmmH, BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE + ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + + movdqa xmm6, xmm1 + punpcklwd xmm1, xmm3 + punpckhwd xmm6, xmm3 + pmaddwd xmm1, [GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd xmm6, [GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) + + movdqa xmm7, xmm6 ; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337) + + movdqa xmm6, xmm0 + punpcklwd xmm0, xmm2 + punpckhwd xmm6, xmm2 + pmaddwd xmm0, [GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd xmm6, [GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337) + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337) + + movdqa xmm0, xmm5 ; xmm0=BO + movdqa xmm6, xmm4 ; xmm6=BE + + movdqa xmm4, xmm0 + punpcklwd xmm0, xmm3 + punpckhwd xmm4, xmm3 + pmaddwd xmm0, [GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd xmm4, [GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) + + movdqa xmm3, [GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF] + + paddd xmm0, xmm1 + paddd xmm4, xmm7 + paddd xmm0, xmm3 + paddd xmm4, xmm3 + psrld xmm0, SCALEBITS ; xmm0=YOL + psrld xmm4, SCALEBITS ; xmm4=YOH + packssdw xmm0, xmm4 ; xmm0=YO + + movdqa xmm4, xmm6 + punpcklwd xmm6, xmm2 + punpckhwd xmm4, xmm2 + pmaddwd xmm6, [GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd xmm4, [GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) + + movdqa xmm2, [GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF] + + paddd xmm6, XMMWORD [wk(0)] + paddd xmm4, XMMWORD [wk(1)] + paddd xmm6, xmm2 + paddd xmm4, xmm2 + psrld xmm6, SCALEBITS ; xmm6=YEL + psrld xmm4, SCALEBITS ; xmm4=YEH + packssdw xmm6, xmm4 ; xmm6=YE + + psllw xmm0, BYTE_BIT + por xmm6, xmm0 ; xmm6=Y + movdqa XMMWORD [edi], xmm6 ; Save Y + + sub ecx, byte SIZEOF_XMMWORD + add esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr + add edi, byte SIZEOF_XMMWORD ; outptr0 + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + test ecx, ecx + jnz near .column_ld1 + + pop ecx ; col + pop esi + pop edi + poppic eax + + add esi, byte SIZEOF_JSAMPROW ; input_buf + add edi, byte SIZEOF_JSAMPROW + dec eax ; num_rows + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jchuff-sse2.asm b/simd/i386/jchuff-sse2.asm new file mode 100644 index 0000000..6ea69f6 --- /dev/null +++ b/simd/i386/jchuff-sse2.asm @@ -0,0 +1,426 @@ +; +; jchuff-sse2.asm - Huffman entropy encoding (SSE2) +; +; Copyright (C) 2009-2011, 2014-2017, D. R. Commander. +; Copyright (C) 2015, Matthieu Darbois. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains an SSE2 implementation for Huffman coding of one block. +; The following code is based directly on jchuff.c; see jchuff.c for more +; details. +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_huff_encode_one_block) + +EXTN(jconst_huff_encode_one_block): + +%include "jpeg_nbits_table.inc" + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +; These macros perform the same task as the emit_bits() function in the +; original libjpeg code. In addition to reducing overhead by explicitly +; inlining the code, additional performance is achieved by taking into +; account the size of the bit buffer and waiting until it is almost full +; before emptying it. This mostly benefits 64-bit platforms, since 6 +; bytes can be stored in a 64-bit bit buffer before it has to be emptied. + +%macro EMIT_BYTE 0 + sub put_bits, 8 ; put_bits -= 8; + mov edx, put_buffer + mov ecx, put_bits + shr edx, cl ; c = (JOCTET)GETJOCTET(put_buffer >> put_bits); + mov byte [eax], dl ; *buffer++ = c; + add eax, 1 + cmp dl, 0xFF ; need to stuff a zero byte? + jne %%.EMIT_BYTE_END + mov byte [eax], 0 ; *buffer++ = 0; + add eax, 1 +%%.EMIT_BYTE_END: +%endmacro + +%macro PUT_BITS 1 + add put_bits, ecx ; put_bits += size; + shl put_buffer, cl ; put_buffer = (put_buffer << size); + or put_buffer, %1 +%endmacro + +%macro CHECKBUF15 0 + cmp put_bits, 16 ; if (put_bits > 31) { + jl %%.CHECKBUF15_END + mov eax, POINTER [esp+buffer] + EMIT_BYTE + EMIT_BYTE + mov POINTER [esp+buffer], eax +%%.CHECKBUF15_END: +%endmacro + +%macro EMIT_BITS 1 + PUT_BITS %1 + CHECKBUF15 +%endmacro + +%macro kloop_prepare 37 ;(ko, jno0, ..., jno31, xmm0, xmm1, xmm2, xmm3) + pxor xmm4, xmm4 ; __m128i neg = _mm_setzero_si128(); + pxor xmm5, xmm5 ; __m128i neg = _mm_setzero_si128(); + pxor xmm6, xmm6 ; __m128i neg = _mm_setzero_si128(); + pxor xmm7, xmm7 ; __m128i neg = _mm_setzero_si128(); + pinsrw %34, word [esi + %2 * SIZEOF_WORD], 0 ; xmm_shadow[0] = block[jno0]; + pinsrw %35, word [esi + %10 * SIZEOF_WORD], 0 ; xmm_shadow[8] = block[jno8]; + pinsrw %36, word [esi + %18 * SIZEOF_WORD], 0 ; xmm_shadow[16] = block[jno16]; + pinsrw %37, word [esi + %26 * SIZEOF_WORD], 0 ; xmm_shadow[24] = block[jno24]; + pinsrw %34, word [esi + %3 * SIZEOF_WORD], 1 ; xmm_shadow[1] = block[jno1]; + pinsrw %35, word [esi + %11 * SIZEOF_WORD], 1 ; xmm_shadow[9] = block[jno9]; + pinsrw %36, word [esi + %19 * SIZEOF_WORD], 1 ; xmm_shadow[17] = block[jno17]; + pinsrw %37, word [esi + %27 * SIZEOF_WORD], 1 ; xmm_shadow[25] = block[jno25]; + pinsrw %34, word [esi + %4 * SIZEOF_WORD], 2 ; xmm_shadow[2] = block[jno2]; + pinsrw %35, word [esi + %12 * SIZEOF_WORD], 2 ; xmm_shadow[10] = block[jno10]; + pinsrw %36, word [esi + %20 * SIZEOF_WORD], 2 ; xmm_shadow[18] = block[jno18]; + pinsrw %37, word [esi + %28 * SIZEOF_WORD], 2 ; xmm_shadow[26] = block[jno26]; + pinsrw %34, word [esi + %5 * SIZEOF_WORD], 3 ; xmm_shadow[3] = block[jno3]; + pinsrw %35, word [esi + %13 * SIZEOF_WORD], 3 ; xmm_shadow[11] = block[jno11]; + pinsrw %36, word [esi + %21 * SIZEOF_WORD], 3 ; xmm_shadow[19] = block[jno19]; + pinsrw %37, word [esi + %29 * SIZEOF_WORD], 3 ; xmm_shadow[27] = block[jno27]; + pinsrw %34, word [esi + %6 * SIZEOF_WORD], 4 ; xmm_shadow[4] = block[jno4]; + pinsrw %35, word [esi + %14 * SIZEOF_WORD], 4 ; xmm_shadow[12] = block[jno12]; + pinsrw %36, word [esi + %22 * SIZEOF_WORD], 4 ; xmm_shadow[20] = block[jno20]; + pinsrw %37, word [esi + %30 * SIZEOF_WORD], 4 ; xmm_shadow[28] = block[jno28]; + pinsrw %34, word [esi + %7 * SIZEOF_WORD], 5 ; xmm_shadow[5] = block[jno5]; + pinsrw %35, word [esi + %15 * SIZEOF_WORD], 5 ; xmm_shadow[13] = block[jno13]; + pinsrw %36, word [esi + %23 * SIZEOF_WORD], 5 ; xmm_shadow[21] = block[jno21]; + pinsrw %37, word [esi + %31 * SIZEOF_WORD], 5 ; xmm_shadow[29] = block[jno29]; + pinsrw %34, word [esi + %8 * SIZEOF_WORD], 6 ; xmm_shadow[6] = block[jno6]; + pinsrw %35, word [esi + %16 * SIZEOF_WORD], 6 ; xmm_shadow[14] = block[jno14]; + pinsrw %36, word [esi + %24 * SIZEOF_WORD], 6 ; xmm_shadow[22] = block[jno22]; + pinsrw %37, word [esi + %32 * SIZEOF_WORD], 6 ; xmm_shadow[30] = block[jno30]; + pinsrw %34, word [esi + %9 * SIZEOF_WORD], 7 ; xmm_shadow[7] = block[jno7]; + pinsrw %35, word [esi + %17 * SIZEOF_WORD], 7 ; xmm_shadow[15] = block[jno15]; + pinsrw %36, word [esi + %25 * SIZEOF_WORD], 7 ; xmm_shadow[23] = block[jno23]; +%if %1 != 32 + pinsrw %37, word [esi + %33 * SIZEOF_WORD], 7 ; xmm_shadow[31] = block[jno31]; +%else + pinsrw %37, ecx, 7 ; xmm_shadow[31] = block[jno31]; +%endif + pcmpgtw xmm4, %34 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm5, %35 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm6, %36 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm7, %37 ; neg = _mm_cmpgt_epi16(neg, x1); + paddw %34, xmm4 ; x1 = _mm_add_epi16(x1, neg); + paddw %35, xmm5 ; x1 = _mm_add_epi16(x1, neg); + paddw %36, xmm6 ; x1 = _mm_add_epi16(x1, neg); + paddw %37, xmm7 ; x1 = _mm_add_epi16(x1, neg); + pxor %34, xmm4 ; x1 = _mm_xor_si128(x1, neg); + pxor %35, xmm5 ; x1 = _mm_xor_si128(x1, neg); + pxor %36, xmm6 ; x1 = _mm_xor_si128(x1, neg); + pxor %37, xmm7 ; x1 = _mm_xor_si128(x1, neg); + pxor xmm4, %34 ; neg = _mm_xor_si128(neg, x1); + pxor xmm5, %35 ; neg = _mm_xor_si128(neg, x1); + pxor xmm6, %36 ; neg = _mm_xor_si128(neg, x1); + pxor xmm7, %37 ; neg = _mm_xor_si128(neg, x1); + movdqa XMMWORD [esp + t1 + %1 * SIZEOF_WORD], %34 ; _mm_storeu_si128((__m128i *)(t1 + ko), x1); + movdqa XMMWORD [esp + t1 + (%1 + 8) * SIZEOF_WORD], %35 ; _mm_storeu_si128((__m128i *)(t1 + ko + 8), x1); + movdqa XMMWORD [esp + t1 + (%1 + 16) * SIZEOF_WORD], %36 ; _mm_storeu_si128((__m128i *)(t1 + ko + 16), x1); + movdqa XMMWORD [esp + t1 + (%1 + 24) * SIZEOF_WORD], %37 ; _mm_storeu_si128((__m128i *)(t1 + ko + 24), x1); + movdqa XMMWORD [esp + t2 + %1 * SIZEOF_WORD], xmm4 ; _mm_storeu_si128((__m128i *)(t2 + ko), neg); + movdqa XMMWORD [esp + t2 + (%1 + 8) * SIZEOF_WORD], xmm5 ; _mm_storeu_si128((__m128i *)(t2 + ko + 8), neg); + movdqa XMMWORD [esp + t2 + (%1 + 16) * SIZEOF_WORD], xmm6 ; _mm_storeu_si128((__m128i *)(t2 + ko + 16), neg); + movdqa XMMWORD [esp + t2 + (%1 + 24) * SIZEOF_WORD], xmm7 ; _mm_storeu_si128((__m128i *)(t2 + ko + 24), neg); +%endmacro + +; +; Encode a single block's worth of coefficients. +; +; GLOBAL(JOCTET *) +; jsimd_huff_encode_one_block_sse2(working_state *state, JOCTET *buffer, +; JCOEFPTR block, int last_dc_val, +; c_derived_tbl *dctbl, c_derived_tbl *actbl) +; + +; eax + 8 = working_state *state +; eax + 12 = JOCTET *buffer +; eax + 16 = JCOEFPTR block +; eax + 20 = int last_dc_val +; eax + 24 = c_derived_tbl *dctbl +; eax + 28 = c_derived_tbl *actbl + +%define pad 6 * SIZEOF_DWORD ; Align to 16 bytes +%define t1 pad +%define t2 t1 + (DCTSIZE2 * SIZEOF_WORD) +%define block t2 + (DCTSIZE2 * SIZEOF_WORD) +%define actbl block + SIZEOF_DWORD +%define buffer actbl + SIZEOF_DWORD +%define temp buffer + SIZEOF_DWORD +%define temp2 temp + SIZEOF_DWORD +%define temp3 temp2 + SIZEOF_DWORD +%define temp4 temp3 + SIZEOF_DWORD +%define temp5 temp4 + SIZEOF_DWORD +%define gotptr temp5 + SIZEOF_DWORD ; void *gotptr +%define put_buffer ebx +%define put_bits edi + + align 32 + GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2) + +EXTN(jsimd_huff_encode_one_block_sse2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + sub esp, temp5+9*SIZEOF_DWORD-pad + push ebx + push ecx +; push edx ; need not be preserved + push esi + push edi + push ebp + + mov esi, POINTER [eax+8] ; (working_state *state) + mov put_buffer, DWORD [esi+8] ; put_buffer = state->cur.put_buffer; + mov put_bits, DWORD [esi+12] ; put_bits = state->cur.put_bits; + push esi ; esi is now scratch + + get_GOT edx ; get GOT address + movpic POINTER [esp+gotptr], edx ; save GOT address + + mov ecx, POINTER [eax+28] + mov edx, POINTER [eax+16] + mov esi, POINTER [eax+12] + mov POINTER [esp+actbl], ecx + mov POINTER [esp+block], edx + mov POINTER [esp+buffer], esi + + ; Encode the DC coefficient difference per section F.1.2.1 + mov esi, POINTER [esp+block] ; block + movsx ecx, word [esi] ; temp = temp2 = block[0] - last_dc_val; + sub ecx, DWORD [eax+20] + mov esi, ecx + + ; This is a well-known technique for obtaining the absolute value + ; with out a branch. It is derived from an assembly language technique + ; presented in "How to Optimize for the Pentium Processors", + ; Copyright (c) 1996, 1997 by Agner Fog. + mov edx, ecx + sar edx, 31 ; temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); + xor ecx, edx ; temp ^= temp3; + sub ecx, edx ; temp -= temp3; + + ; For a negative input, want temp2 = bitwise complement of abs(input) + ; This code assumes we are on a two's complement machine + add esi, edx ; temp2 += temp3; + mov DWORD [esp+temp], esi ; backup temp2 in temp + + ; Find the number of bits needed for the magnitude of the coefficient + movpic ebp, POINTER [esp+gotptr] ; load GOT address (ebp) + movzx edx, byte [GOTOFF(ebp, jpeg_nbits_table + ecx)] ; nbits = JPEG_NBITS(temp); + mov DWORD [esp+temp2], edx ; backup nbits in temp2 + + ; Emit the Huffman-coded symbol for the number of bits + mov ebp, POINTER [eax+24] ; After this point, arguments are not accessible anymore + mov eax, INT [ebp + edx * 4] ; code = dctbl->ehufco[nbits]; + movzx ecx, byte [ebp + edx + 1024] ; size = dctbl->ehufsi[nbits]; + EMIT_BITS eax ; EMIT_BITS(code, size) + + mov ecx, DWORD [esp+temp2] ; restore nbits + + ; Mask off any extra bits in code + mov eax, 1 + shl eax, cl + dec eax + and eax, DWORD [esp+temp] ; temp2 &= (((JLONG)1)<>= r; + mov DWORD [esp+temp3], edx +.BRLOOP: + cmp ecx, 16 ; while (r > 15) { + jl near .ERLOOP + sub ecx, 16 ; r -= 16; + mov DWORD [esp+temp], ecx + mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0]; + movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0]; + EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0) + mov ecx, DWORD [esp+temp] + jmp .BRLOOP +.ERLOOP: + movsx eax, word [esi] ; temp = t1[k]; + movpic edx, POINTER [esp+gotptr] ; load GOT address (edx) + movzx eax, byte [GOTOFF(edx, jpeg_nbits_table + eax)] ; nbits = JPEG_NBITS(temp); + mov DWORD [esp+temp2], eax + ; Emit Huffman symbol for run length / number of bits + shl ecx, 4 ; temp3 = (r << 4) + nbits; + add ecx, eax + mov eax, INT [ebp + ecx * 4] ; code = actbl->ehufco[temp3]; + movzx ecx, byte [ebp + ecx + 1024] ; size = actbl->ehufsi[temp3]; + EMIT_BITS eax + + movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k]; + ; Mask off any extra bits in code + mov ecx, DWORD [esp+temp2] + mov eax, 1 + shl eax, cl + dec eax + and eax, edx ; temp2 &= (((JLONG)1)<>= 1; + + jmp .BLOOP +.ELOOP: + movdqa xmm0, XMMWORD [esp + t1 + 32 * SIZEOF_WORD] ; __m128i tmp0 = _mm_loadu_si128((__m128i *)(t1 + 0)); + movdqa xmm1, XMMWORD [esp + t1 + 40 * SIZEOF_WORD] ; __m128i tmp1 = _mm_loadu_si128((__m128i *)(t1 + 8)); + movdqa xmm2, XMMWORD [esp + t1 + 48 * SIZEOF_WORD] ; __m128i tmp2 = _mm_loadu_si128((__m128i *)(t1 + 16)); + movdqa xmm3, XMMWORD [esp + t1 + 56 * SIZEOF_WORD] ; __m128i tmp3 = _mm_loadu_si128((__m128i *)(t1 + 24)); + pcmpeqw xmm0, xmm7 ; tmp0 = _mm_cmpeq_epi16(tmp0, zero); + pcmpeqw xmm1, xmm7 ; tmp1 = _mm_cmpeq_epi16(tmp1, zero); + pcmpeqw xmm2, xmm7 ; tmp2 = _mm_cmpeq_epi16(tmp2, zero); + pcmpeqw xmm3, xmm7 ; tmp3 = _mm_cmpeq_epi16(tmp3, zero); + packsswb xmm0, xmm1 ; tmp0 = _mm_packs_epi16(tmp0, tmp1); + packsswb xmm2, xmm3 ; tmp2 = _mm_packs_epi16(tmp2, tmp3); + pmovmskb edx, xmm0 ; index = ((uint64_t)_mm_movemask_epi8(tmp0)) << 0; + pmovmskb ecx, xmm2 ; index = ((uint64_t)_mm_movemask_epi8(tmp2)) << 16; + shl ecx, 16 + or edx, ecx + not edx ; index = ~index; + + lea eax, [esp + t1 + (DCTSIZE2/2) * 2] + sub eax, esi + shr eax, 1 + bsf ecx, edx ; r = __builtin_ctzl(index); + jz near .ELOOP2 + shr edx, cl ; index >>= r; + add ecx, eax + lea esi, [esi+ecx*2] ; k += r; + mov DWORD [esp+temp3], edx + jmp .BRLOOP2 +.BLOOP2: + bsf ecx, edx ; r = __builtin_ctzl(index); + jz near .ELOOP2 + lea esi, [esi+ecx*2] ; k += r; + shr edx, cl ; index >>= r; + mov DWORD [esp+temp3], edx +.BRLOOP2: + cmp ecx, 16 ; while (r > 15) { + jl near .ERLOOP2 + sub ecx, 16 ; r -= 16; + mov DWORD [esp+temp], ecx + mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0]; + movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0]; + EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0) + mov ecx, DWORD [esp+temp] + jmp .BRLOOP2 +.ERLOOP2: + movsx eax, word [esi] ; temp = t1[k]; + bsr eax, eax ; nbits = 32 - __builtin_clz(temp); + inc eax + mov DWORD [esp+temp2], eax + ; Emit Huffman symbol for run length / number of bits + shl ecx, 4 ; temp3 = (r << 4) + nbits; + add ecx, eax + mov eax, INT [ebp + ecx * 4] ; code = actbl->ehufco[temp3]; + movzx ecx, byte [ebp + ecx + 1024] ; size = actbl->ehufsi[temp3]; + EMIT_BITS eax + + movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k]; + ; Mask off any extra bits in code + mov ecx, DWORD [esp+temp2] + mov eax, 1 + shl eax, cl + dec eax + and eax, edx ; temp2 &= (((JLONG)1)<>= 1; + + jmp .BLOOP2 +.ELOOP2: + ; If the last coef(s) were zero, emit an end-of-block code + lea edx, [esp + t1 + (DCTSIZE2-1) * 2] ; r = DCTSIZE2-1-k; + cmp edx, esi ; if (r > 0) { + je .EFN + mov eax, INT [ebp] ; code = actbl->ehufco[0]; + movzx ecx, byte [ebp + 1024] ; size = actbl->ehufsi[0]; + EMIT_BITS eax +.EFN: + mov eax, [esp+buffer] + pop esi + ; Save put_buffer & put_bits + mov DWORD [esi+8], put_buffer ; state->cur.put_buffer = put_buffer; + mov DWORD [esi+12], put_bits ; state->cur.put_bits = put_bits; + + pop ebp + pop edi + pop esi +; pop edx ; need not be preserved + pop ecx + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jcphuff-sse2.asm b/simd/i386/jcphuff-sse2.asm new file mode 100644 index 0000000..25c63c7 --- /dev/null +++ b/simd/i386/jcphuff-sse2.asm @@ -0,0 +1,660 @@ +; +; jcphuff-sse2.asm - prepare data for progressive Huffman encoding (SSE2) +; +; Copyright (C) 2016, 2018, Matthieu Darbois +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains an SSE2 implementation of data preparation for progressive +; Huffman encoding. See jcphuff.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +; -------------------------------------------------------------------------- +; Macros to load data for jsimd_encode_mcu_AC_first_prepare_sse2() and +; jsimd_encode_mcu_AC_refine_prepare_sse2() + +%macro LOAD16 0 + pxor N0, N0 + pxor N1, N1 + + mov T0, INT [LUT + 0*SIZEOF_INT] + mov T1, INT [LUT + 8*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 0 + pinsrw X1, word [BLOCK + T1 * 2], 0 + + mov T0, INT [LUT + 1*SIZEOF_INT] + mov T1, INT [LUT + 9*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 1 + pinsrw X1, word [BLOCK + T1 * 2], 1 + + mov T0, INT [LUT + 2*SIZEOF_INT] + mov T1, INT [LUT + 10*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 2 + pinsrw X1, word [BLOCK + T1 * 2], 2 + + mov T0, INT [LUT + 3*SIZEOF_INT] + mov T1, INT [LUT + 11*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 3 + pinsrw X1, word [BLOCK + T1 * 2], 3 + + mov T0, INT [LUT + 4*SIZEOF_INT] + mov T1, INT [LUT + 12*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 4 + pinsrw X1, word [BLOCK + T1 * 2], 4 + + mov T0, INT [LUT + 5*SIZEOF_INT] + mov T1, INT [LUT + 13*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 5 + pinsrw X1, word [BLOCK + T1 * 2], 5 + + mov T0, INT [LUT + 6*SIZEOF_INT] + mov T1, INT [LUT + 14*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 6 + pinsrw X1, word [BLOCK + T1 * 2], 6 + + mov T0, INT [LUT + 7*SIZEOF_INT] + mov T1, INT [LUT + 15*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 7 + pinsrw X1, word [BLOCK + T1 * 2], 7 +%endmacro + +%macro LOAD15 0 + pxor N0, N0 + pxor N1, N1 + pxor X1, X1 + + mov T0, INT [LUT + 0*SIZEOF_INT] + mov T1, INT [LUT + 8*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 0 + pinsrw X1, word [BLOCK + T1 * 2], 0 + + mov T0, INT [LUT + 1*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 1 + + mov T0, INT [LUT + 2*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 2 + + mov T0, INT [LUT + 3*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 3 + + mov T0, INT [LUT + 4*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 4 + + mov T0, INT [LUT + 5*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 5 + + mov T0, INT [LUT + 6*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 6 + + mov T0, INT [LUT + 7*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 7 + + cmp LENEND, 2 + jl %%.ELOAD15 + mov T1, INT [LUT + 9*SIZEOF_INT] + pinsrw X1, word [BLOCK + T1 * 2], 1 + + cmp LENEND, 3 + jl %%.ELOAD15 + mov T1, INT [LUT + 10*SIZEOF_INT] + pinsrw X1, word [BLOCK + T1 * 2], 2 + + cmp LENEND, 4 + jl %%.ELOAD15 + mov T1, INT [LUT + 11*SIZEOF_INT] + pinsrw X1, word [BLOCK + T1 * 2], 3 + + cmp LENEND, 5 + jl %%.ELOAD15 + mov T1, INT [LUT + 12*SIZEOF_INT] + pinsrw X1, word [BLOCK + T1 * 2], 4 + + cmp LENEND, 6 + jl %%.ELOAD15 + mov T1, INT [LUT + 13*SIZEOF_INT] + pinsrw X1, word [BLOCK + T1 * 2], 5 + + cmp LENEND, 7 + jl %%.ELOAD15 + mov T1, INT [LUT + 14*SIZEOF_INT] + pinsrw X1, word [BLOCK + T1 * 2], 6 +%%.ELOAD15: +%endmacro + +%macro LOAD8 0 + pxor N0, N0 + + mov T0, INT [LUT + 0*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 0 + + mov T0, INT [LUT + 1*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 1 + + mov T0, INT [LUT + 2*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 2 + + mov T0, INT [LUT + 3*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 3 + + mov T0, INT [LUT + 4*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 4 + + mov T0, INT [LUT + 5*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 5 + + mov T0, INT [LUT + 6*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 6 + + mov T0, INT [LUT + 7*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 7 +%endmacro + +%macro LOAD7 0 + pxor N0, N0 + pxor X0, X0 + + mov T1, INT [LUT + 0*SIZEOF_INT] + pinsrw X0, word [BLOCK + T1 * 2], 0 + + cmp LENEND, 2 + jl %%.ELOAD7 + mov T1, INT [LUT + 1*SIZEOF_INT] + pinsrw X0, word [BLOCK + T1 * 2], 1 + + cmp LENEND, 3 + jl %%.ELOAD7 + mov T1, INT [LUT + 2*SIZEOF_INT] + pinsrw X0, word [BLOCK + T1 * 2], 2 + + cmp LENEND, 4 + jl %%.ELOAD7 + mov T1, INT [LUT + 3*SIZEOF_INT] + pinsrw X0, word [BLOCK + T1 * 2], 3 + + cmp LENEND, 5 + jl %%.ELOAD7 + mov T1, INT [LUT + 4*SIZEOF_INT] + pinsrw X0, word [BLOCK + T1 * 2], 4 + + cmp LENEND, 6 + jl %%.ELOAD7 + mov T1, INT [LUT + 5*SIZEOF_INT] + pinsrw X0, word [BLOCK + T1 * 2], 5 + + cmp LENEND, 7 + jl %%.ELOAD7 + mov T1, INT [LUT + 6*SIZEOF_INT] + pinsrw X0, word [BLOCK + T1 * 2], 6 +%%.ELOAD7: +%endmacro + +%macro REDUCE0 0 + movdqa xmm0, XMMWORD [VALUES + ( 0*2)] + movdqa xmm1, XMMWORD [VALUES + ( 8*2)] + movdqa xmm2, XMMWORD [VALUES + (16*2)] + movdqa xmm3, XMMWORD [VALUES + (24*2)] + movdqa xmm4, XMMWORD [VALUES + (32*2)] + movdqa xmm5, XMMWORD [VALUES + (40*2)] + movdqa xmm6, XMMWORD [VALUES + (48*2)] + + pcmpeqw xmm0, ZERO + pcmpeqw xmm1, ZERO + pcmpeqw xmm2, ZERO + pcmpeqw xmm3, ZERO + pcmpeqw xmm4, ZERO + pcmpeqw xmm5, ZERO + pcmpeqw xmm6, ZERO + pcmpeqw xmm7, XMMWORD [VALUES + (56*2)] + + packsswb xmm0, xmm1 + packsswb xmm2, xmm3 + packsswb xmm4, xmm5 + packsswb xmm6, xmm7 + + pmovmskb eax, xmm0 + pmovmskb ecx, xmm2 + pmovmskb edx, xmm4 + pmovmskb esi, xmm6 + + shl ecx, 16 + shl esi, 16 + + or eax, ecx + or edx, esi + + not eax + not edx + + mov edi, ZEROBITS + + mov INT [edi], eax + mov INT [edi+SIZEOF_INT], edx +%endmacro + +; +; Prepare data for jsimd_encode_mcu_AC_first(). +; +; GLOBAL(void) +; jsimd_encode_mcu_AC_first_prepare_sse2(const JCOEF *block, +; const int *jpeg_natural_order_start, +; int Sl, int Al, JCOEF *values, +; size_t *zerobits) +; +; eax + 8 = const JCOEF *block +; eax + 12 = const int *jpeg_natural_order_start +; eax + 16 = int Sl +; eax + 20 = int Al +; eax + 24 = JCOEF *values +; eax + 28 = size_t *zerobits + +%define ZERO xmm7 +%define X0 xmm0 +%define X1 xmm1 +%define N0 xmm2 +%define N1 xmm3 +%define AL xmm4 +%define K eax +%define LENEND eax +%define LUT ebx +%define T0 ecx +%define T1 edx +%define BLOCK esi +%define VALUES edi +%define LEN ebp + +%define ZEROBITS INT [esp + 5 * 4] + + align 32 + GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2) + +EXTN(jsimd_encode_mcu_AC_first_prepare_sse2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + sub esp, 4 + push ebx + push ecx +; push edx ; need not be preserved + push esi + push edi + push ebp + + mov BLOCK, INT [eax + 8] + mov LUT, INT [eax + 12] + mov VALUES, INT [eax + 24] + movd AL, INT [eax + 20] + mov T0, INT [eax + 28] + mov ZEROBITS, T0 + mov LEN, INT [eax + 16] + pxor ZERO, ZERO + mov K, LEN + and K, -16 + shr K, 4 + jz .ELOOP16 +.BLOOP16: + LOAD16 + pcmpgtw N0, X0 + pcmpgtw N1, X1 + paddw X0, N0 + paddw X1, N1 + pxor X0, N0 + pxor X1, N1 + psrlw X0, AL + psrlw X1, AL + pxor N0, X0 + pxor N1, X1 + movdqa XMMWORD [VALUES + (0) * 2], X0 + movdqa XMMWORD [VALUES + (8) * 2], X1 + movdqa XMMWORD [VALUES + (0 + DCTSIZE2) * 2], N0 + movdqa XMMWORD [VALUES + (8 + DCTSIZE2) * 2], N1 + add VALUES, 16*2 + add LUT, 16*SIZEOF_INT + dec K + jnz .BLOOP16 +.ELOOP16: + mov LENEND, LEN + and LENEND, 7 + + test LEN, 8 + jz .TRY7 + test LEN, 7 + jz .TRY8 + + LOAD15 + pcmpgtw N0, X0 + pcmpgtw N1, X1 + paddw X0, N0 + paddw X1, N1 + pxor X0, N0 + pxor X1, N1 + psrlw X0, AL + psrlw X1, AL + pxor N0, X0 + pxor N1, X1 + movdqa XMMWORD [VALUES + (0) * 2], X0 + movdqa XMMWORD [VALUES + (8) * 2], X1 + movdqa XMMWORD [VALUES + (0 + DCTSIZE2) * 2], N0 + movdqa XMMWORD [VALUES + (8 + DCTSIZE2) * 2], N1 + add VALUES, 16*2 + jmp .PADDING +.TRY8: + LOAD8 + pcmpgtw N0, X0 + paddw X0, N0 + pxor X0, N0 + psrlw X0, AL + pxor N0, X0 + movdqa XMMWORD [VALUES + (0) * 2], X0 + movdqa XMMWORD [VALUES + (0 + DCTSIZE2) * 2], N0 + add VALUES, 8*2 + jmp .PADDING +.TRY7: + LOAD7 + pcmpgtw N0, X0 + paddw X0, N0 + pxor X0, N0 + psrlw X0, AL + pxor N0, X0 + movdqa XMMWORD [VALUES + (0) * 2], X0 + movdqa XMMWORD [VALUES + (0 + DCTSIZE2) * 2], N0 + add VALUES, 8*2 +.PADDING: + mov K, LEN + add K, 7 + and K, -8 + shr K, 3 + sub K, DCTSIZE2/8 + jz .EPADDING + align 16 +.ZEROLOOP: + movdqa XMMWORD [VALUES + 0], ZERO + add VALUES, 8*2 + inc K + jnz .ZEROLOOP +.EPADDING: + sub VALUES, DCTSIZE2*2 + + REDUCE0 + + pop ebp + pop edi + pop esi +; pop edx ; need not be preserved + pop ecx + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%undef ZERO +%undef X0 +%undef X1 +%undef N0 +%undef N1 +%undef AL +%undef K +%undef LUT +%undef T0 +%undef T1 +%undef BLOCK +%undef VALUES +%undef LEN + +; +; Prepare data for jsimd_encode_mcu_AC_refine(). +; +; GLOBAL(int) +; jsimd_encode_mcu_AC_refine_prepare_sse2(const JCOEF *block, +; const int *jpeg_natural_order_start, +; int Sl, int Al, JCOEF *absvalues, +; size_t *bits) +; +; eax + 8 = const JCOEF *block +; eax + 12 = const int *jpeg_natural_order_start +; eax + 16 = int Sl +; eax + 20 = int Al +; eax + 24 = JCOEF *values +; eax + 28 = size_t *bits + +%define ZERO xmm7 +%define ONE xmm5 +%define X0 xmm0 +%define X1 xmm1 +%define N0 xmm2 +%define N1 xmm3 +%define AL xmm4 +%define K eax +%define LENEND eax +%define LUT ebx +%define T0 ecx +%define T0w cx +%define T1 edx +%define BLOCK esi +%define VALUES edi +%define KK ebp + +%define ZEROBITS INT [esp + 5 * 4] +%define EOB INT [esp + 5 * 4 + 4] +%define LEN INT [esp + 5 * 4 + 8] + + align 32 + GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2) + +EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + sub esp, 16 + push ebx + push ecx +; push edx ; need not be preserved + push esi + push edi + push ebp + + pcmpeqw ONE, ONE + psrlw ONE, 15 + mov BLOCK, INT [eax + 8] + mov LUT, INT [eax + 12] + mov VALUES, INT [eax + 24] + movd AL, INT [eax + 20] + mov T0, INT [eax + 28] + mov K, INT [eax + 16] + mov INT [T0 + 2 * SIZEOF_INT], -1 + mov INT [T0 + 3 * SIZEOF_INT], -1 + mov ZEROBITS, T0 + mov LEN, K + pxor ZERO, ZERO + and K, -16 + mov EOB, 0 + xor KK, KK + shr K, 4 + jz .ELOOPR16 +.BLOOPR16: + LOAD16 + pcmpgtw N0, X0 + pcmpgtw N1, X1 + paddw X0, N0 + paddw X1, N1 + pxor X0, N0 + pxor X1, N1 + psrlw X0, AL + psrlw X1, AL + movdqa XMMWORD [VALUES + (0) * 2], X0 + movdqa XMMWORD [VALUES + (8) * 2], X1 + pcmpeqw X0, ONE + pcmpeqw X1, ONE + packsswb N0, N1 + packsswb X0, X1 + pmovmskb T0, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg); + mov T1, ZEROBITS + not T0 + mov word [T1 + 2 * SIZEOF_INT + KK], T0w + pmovmskb T1, X0 ; idx = _mm_movemask_epi8(x1); + bsr T1, T1 ; idx = 16 - (__builtin_clz(idx)>>1); + jz .CONTINUER16 ; if (idx) { + lea T1, [T1+KK*8] + mov EOB, T1 ; EOB = k + idx; +.CONTINUER16: + add VALUES, 16*2 + add LUT, 16*SIZEOF_INT + add KK, 2 + dec K + jnz .BLOOPR16 +.ELOOPR16: + mov LENEND, LEN + + test LENEND, 8 + jz .TRYR7 + test LENEND, 7 + jz .TRYR8 + + and LENEND, 7 + LOAD15 + pcmpgtw N0, X0 + pcmpgtw N1, X1 + paddw X0, N0 + paddw X1, N1 + pxor X0, N0 + pxor X1, N1 + psrlw X0, AL + psrlw X1, AL + movdqa XMMWORD [VALUES + (0) * 2], X0 + movdqa XMMWORD [VALUES + (8) * 2], X1 + pcmpeqw X0, ONE + pcmpeqw X1, ONE + packsswb N0, N1 + packsswb X0, X1 + pmovmskb T0, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg); + mov T1, ZEROBITS + not T0 + mov word [T1 + 2 * SIZEOF_INT + KK], T0w + pmovmskb T1, X0 ; idx = _mm_movemask_epi8(x1); + bsr T1, T1 ; idx = 16 - (__builtin_clz(idx)>>1); + jz .CONTINUER15 ; if (idx) { + lea T1, [T1+KK*8] + mov EOB, T1 ; EOB = k + idx; +.CONTINUER15: + add VALUES, 16*2 + jmp .PADDINGR +.TRYR8: + LOAD8 + + pcmpgtw N0, X0 + paddw X0, N0 + pxor X0, N0 + psrlw X0, AL + movdqa XMMWORD [VALUES + (0) * 2], X0 + pcmpeqw X0, ONE + packsswb N0, ZERO + packsswb X0, ZERO + pmovmskb T0, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg); + mov T1, ZEROBITS + not T0 + mov word [T1 + 2 * SIZEOF_INT + KK], T0w + pmovmskb T1, X0 ; idx = _mm_movemask_epi8(x1); + bsr T1, T1 ; idx = 16 - (__builtin_clz(idx)>>1); + jz .CONTINUER8 ; if (idx) { + lea T1, [T1+KK*8] + mov EOB, T1 ; EOB = k + idx; +.CONTINUER8: + add VALUES, 8*2 + jmp .PADDINGR +.TRYR7: + and LENEND, 7 + LOAD7 + + pcmpgtw N0, X0 + paddw X0, N0 + pxor X0, N0 + psrlw X0, AL + movdqa XMMWORD [VALUES + (0) * 2], X0 + pcmpeqw X0, ONE + packsswb N0, ZERO + packsswb X0, ZERO + pmovmskb T0, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg); + mov T1, ZEROBITS + not T0 + mov word [T1 + 2 * SIZEOF_INT + KK], T0w + pmovmskb T1, X0 ; idx = _mm_movemask_epi8(x1); + bsr T1, T1 ; idx = 16 - (__builtin_clz(idx)>>1); + jz .CONTINUER7 ; if (idx) { + lea T1, [T1+KK*8] + mov EOB, T1 ; EOB = k + idx; +.CONTINUER7: + add VALUES, 8*2 +.PADDINGR: + mov K, LEN + add K, 7 + and K, -8 + shr K, 3 + sub K, DCTSIZE2/8 + jz .EPADDINGR + align 16 +.ZEROLOOPR: + movdqa XMMWORD [VALUES + 0], ZERO + add VALUES, 8*2 + inc K + jnz .ZEROLOOPR +.EPADDINGR: + sub VALUES, DCTSIZE2*2 + + REDUCE0 + + mov eax, EOB + + pop ebp + pop edi + pop esi +; pop edx ; need not be preserved + pop ecx + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +%undef ZERO +%undef ONE +%undef X0 +%undef X1 +%undef N0 +%undef N1 +%undef AL +%undef K +%undef KK +%undef EOB +%undef SIGN +%undef LUT +%undef T0 +%undef T1 +%undef BLOCK +%undef VALUES +%undef LEN +%undef LENEND + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jcsample-avx2.asm b/simd/i386/jcsample-avx2.asm new file mode 100644 index 0000000..5bcdefd --- /dev/null +++ b/simd/i386/jcsample-avx2.asm @@ -0,0 +1,390 @@ +; +; jcsample.asm - downsampling (AVX2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2015, Intel Corporation. +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Downsample pixel values of a single component. +; This version handles the common case of 2:1 horizontal and 1:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v1_downsample_avx2(JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, +; JDIMENSION width_in_blocks, JSAMPARRAY input_data, +; JSAMPARRAY output_data); +; + +%define img_width(b) (b) + 8 ; JDIMENSION image_width +%define max_v_samp(b) (b) + 12 ; int max_v_samp_factor +%define v_samp(b) (b) + 16 ; JDIMENSION v_samp_factor +%define width_blks(b) (b) + 20 ; JDIMENSION width_in_blocks +%define input_data(b) (b) + 24 ; JSAMPARRAY input_data +%define output_data(b) (b) + 28 ; JSAMPARRAY output_data + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2) + +EXTN(jsimd_h2v1_downsample_avx2): + push ebp + mov ebp, esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, JDIMENSION [width_blks(ebp)] + shl ecx, 3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, JDIMENSION [img_width(ebp)] + + ; -- expand_right_edge + + push ecx + shl ecx, 1 ; output_cols * 2 + sub ecx, edx + jle short .expand_end + + mov eax, INT [max_v_samp(ebp)] + test eax, eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16, 7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi, edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v1_downsample + + mov eax, JDIMENSION [v_samp(ebp)] ; rowctr + test eax, eax + jle near .return + + mov edx, 0x00010000 ; bias pattern + vmovd xmm7, edx + vpshufd xmm7, xmm7, 0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1} + vperm2i128 ymm7, ymm7, ymm7, 0 ; ymm7={xmm7, xmm7} + vpcmpeqw ymm6, ymm6, ymm6 + vpsrlw ymm6, ymm6, BYTE_BIT ; ymm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16, 7 +.rowloop: + push ecx + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + + cmp ecx, byte SIZEOF_YMMWORD + jae short .columnloop + alignx 16, 7 + +.columnloop_r24: + ; ecx can possibly be 8, 16, 24 + cmp ecx, 24 + jne .columnloop_r16 + vmovdqu ymm0, YMMWORD [esi+0*SIZEOF_YMMWORD] + vmovdqu xmm1, XMMWORD [esi+1*SIZEOF_YMMWORD] + mov ecx, SIZEOF_YMMWORD + jmp short .downsample + +.columnloop_r16: + cmp ecx, 16 + jne .columnloop_r8 + vmovdqu ymm0, YMMWORD [esi+0*SIZEOF_YMMWORD] + vpxor ymm1, ymm1, ymm1 + mov ecx, SIZEOF_YMMWORD + jmp short .downsample + +.columnloop_r8: + vmovdqu xmm0, XMMWORD[esi+0*SIZEOF_YMMWORD] + vpxor ymm1, ymm1, ymm1 + mov ecx, SIZEOF_YMMWORD + jmp short .downsample + alignx 16, 7 + +.columnloop: + vmovdqu ymm0, YMMWORD [esi+0*SIZEOF_YMMWORD] + vmovdqu ymm1, YMMWORD [esi+1*SIZEOF_YMMWORD] + +.downsample: + vpsrlw ymm2, ymm0, BYTE_BIT + vpand ymm0, ymm0, ymm6 + vpsrlw ymm3, ymm1, BYTE_BIT + vpand ymm1, ymm1, ymm6 + + vpaddw ymm0, ymm0, ymm2 + vpaddw ymm1, ymm1, ymm3 + vpaddw ymm0, ymm0, ymm7 + vpaddw ymm1, ymm1, ymm7 + vpsrlw ymm0, ymm0, 1 + vpsrlw ymm1, ymm1, 1 + + vpackuswb ymm0, ymm0, ymm1 + vpermq ymm0, ymm0, 0xd8 + + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm0 + + sub ecx, byte SIZEOF_YMMWORD ; outcol + add esi, byte 2*SIZEOF_YMMWORD ; inptr + add edi, byte 1*SIZEOF_YMMWORD ; outptr + cmp ecx, byte SIZEOF_YMMWORD + jae short .columnloop + test ecx, ecx + jnz near .columnloop_r24 + + pop esi + pop edi + pop ecx + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg near .rowloop + +.return: + vzeroupper + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Downsample pixel values of a single component. +; This version handles the standard case of 2:1 horizontal and 2:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v2_downsample_avx2(JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, +; JDIMENSION width_in_blocks, JSAMPARRAY input_data, +; JSAMPARRAY output_data); +; + +%define img_width(b) (b) + 8 ; JDIMENSION image_width +%define max_v_samp(b) (b) + 12 ; int max_v_samp_factor +%define v_samp(b) (b) + 16 ; JDIMENSION v_samp_factor +%define width_blks(b) (b) + 20 ; JDIMENSION width_in_blocks +%define input_data(b) (b) + 24 ; JSAMPARRAY input_data +%define output_data(b) (b) + 28 ; JSAMPARRAY output_data + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2) + +EXTN(jsimd_h2v2_downsample_avx2): + push ebp + mov ebp, esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, JDIMENSION [width_blks(ebp)] + shl ecx, 3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, JDIMENSION [img_width(ebp)] + + ; -- expand_right_edge + + push ecx + shl ecx, 1 ; output_cols * 2 + sub ecx, edx + jle short .expand_end + + mov eax, INT [max_v_samp(ebp)] + test eax, eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16, 7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi, edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v2_downsample + + mov eax, JDIMENSION [v_samp(ebp)] ; rowctr + test eax, eax + jle near .return + + mov edx, 0x00020001 ; bias pattern + vmovd xmm7, edx + vpcmpeqw ymm6, ymm6, ymm6 + vpshufd xmm7, xmm7, 0x00 ; ymm7={1, 2, 1, 2, 1, 2, 1, 2} + vperm2i128 ymm7, ymm7, ymm7, 0 + vpsrlw ymm6, ymm6, BYTE_BIT ; ymm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16, 7 +.rowloop: + push ecx + push edi + push esi + + mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1 + mov edi, JSAMPROW [edi] ; outptr + + cmp ecx, byte SIZEOF_YMMWORD + jae short .columnloop + alignx 16, 7 + +.columnloop_r24: + cmp ecx, 24 + jne .columnloop_r16 + vmovdqu ymm0, YMMWORD [edx+0*SIZEOF_YMMWORD] + vmovdqu ymm1, YMMWORD [esi+0*SIZEOF_YMMWORD] + vmovdqu xmm2, XMMWORD [edx+1*SIZEOF_YMMWORD] + vmovdqu xmm3, XMMWORD [esi+1*SIZEOF_YMMWORD] + mov ecx, SIZEOF_YMMWORD + jmp short .downsample + +.columnloop_r16: + cmp ecx, 16 + jne .columnloop_r8 + vmovdqu ymm0, YMMWORD [edx+0*SIZEOF_YMMWORD] + vmovdqu ymm1, YMMWORD [esi+0*SIZEOF_YMMWORD] + vpxor ymm2, ymm2, ymm2 + vpxor ymm3, ymm3, ymm3 + mov ecx, SIZEOF_YMMWORD + jmp short .downsample + +.columnloop_r8: + vmovdqu xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD] + vmovdqu xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] + vpxor ymm2, ymm2, ymm2 + vpxor ymm3, ymm3, ymm3 + mov ecx, SIZEOF_YMMWORD + jmp short .downsample + alignx 16, 7 + +.columnloop: + vmovdqu ymm0, YMMWORD [edx+0*SIZEOF_YMMWORD] + vmovdqu ymm1, YMMWORD [esi+0*SIZEOF_YMMWORD] + vmovdqu ymm2, YMMWORD [edx+1*SIZEOF_YMMWORD] + vmovdqu ymm3, YMMWORD [esi+1*SIZEOF_YMMWORD] + +.downsample: + vpand ymm4, ymm0, ymm6 + vpsrlw ymm0, ymm0, BYTE_BIT + vpand ymm5, ymm1, ymm6 + vpsrlw ymm1, ymm1, BYTE_BIT + vpaddw ymm0, ymm0, ymm4 + vpaddw ymm1, ymm1, ymm5 + + vpand ymm4, ymm2, ymm6 + vpsrlw ymm2, ymm2, BYTE_BIT + vpand ymm5, ymm3, ymm6 + vpsrlw ymm3, ymm3, BYTE_BIT + vpaddw ymm2, ymm2, ymm4 + vpaddw ymm3, ymm3, ymm5 + + vpaddw ymm0, ymm0, ymm1 + vpaddw ymm2, ymm2, ymm3 + vpaddw ymm0, ymm0, ymm7 + vpaddw ymm2, ymm2, ymm7 + vpsrlw ymm0, ymm0, 2 + vpsrlw ymm2, ymm2, 2 + + vpackuswb ymm0, ymm0, ymm2 + vpermq ymm0, ymm0, 0xd8 + + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm0 + + sub ecx, byte SIZEOF_YMMWORD ; outcol + add edx, byte 2*SIZEOF_YMMWORD ; inptr0 + add esi, byte 2*SIZEOF_YMMWORD ; inptr1 + add edi, byte 1*SIZEOF_YMMWORD ; outptr + cmp ecx, byte SIZEOF_YMMWORD + jae near .columnloop + test ecx, ecx + jnz near .columnloop_r24 + + pop esi + pop edi + pop ecx + + add esi, byte 2*SIZEOF_JSAMPROW ; input_data + add edi, byte 1*SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg near .rowloop + +.return: + vzeroupper + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jcsample-mmx.asm b/simd/i386/jcsample-mmx.asm new file mode 100644 index 0000000..faf4234 --- /dev/null +++ b/simd/i386/jcsample-mmx.asm @@ -0,0 +1,326 @@ +; +; jcsample.asm - downsampling (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Downsample pixel values of a single component. +; This version handles the common case of 2:1 horizontal and 1:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v1_downsample_mmx(JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, +; JDIMENSION width_in_blocks, JSAMPARRAY input_data, +; JSAMPARRAY output_data); +; + +%define img_width(b) (b) + 8 ; JDIMENSION image_width +%define max_v_samp(b) (b) + 12 ; int max_v_samp_factor +%define v_samp(b) (b) + 16 ; JDIMENSION v_samp_factor +%define width_blks(b) (b) + 20 ; JDIMENSION width_in_blocks +%define input_data(b) (b) + 24 ; JSAMPARRAY input_data +%define output_data(b) (b) + 28 ; JSAMPARRAY output_data + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_downsample_mmx) + +EXTN(jsimd_h2v1_downsample_mmx): + push ebp + mov ebp, esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, JDIMENSION [width_blks(ebp)] + shl ecx, 3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, JDIMENSION [img_width(ebp)] + + ; -- expand_right_edge + + push ecx + shl ecx, 1 ; output_cols * 2 + sub ecx, edx + jle short .expand_end + + mov eax, INT [max_v_samp(ebp)] + test eax, eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16, 7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi, edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v1_downsample + + mov eax, JDIMENSION [v_samp(ebp)] ; rowctr + test eax, eax + jle near .return + + mov edx, 0x00010000 ; bias pattern + movd mm7, edx + pcmpeqw mm6, mm6 + punpckldq mm7, mm7 ; mm7={0, 1, 0, 1} + psrlw mm6, BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16, 7 +.rowloop: + push ecx + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + alignx 16, 7 +.columnloop: + + movq mm0, MMWORD [esi+0*SIZEOF_MMWORD] + movq mm1, MMWORD [esi+1*SIZEOF_MMWORD] + movq mm2, mm0 + movq mm3, mm1 + + pand mm0, mm6 + psrlw mm2, BYTE_BIT + pand mm1, mm6 + psrlw mm3, BYTE_BIT + + paddw mm0, mm2 + paddw mm1, mm3 + paddw mm0, mm7 + paddw mm1, mm7 + psrlw mm0, 1 + psrlw mm1, 1 + + packuswb mm0, mm1 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 + + add esi, byte 2*SIZEOF_MMWORD ; inptr + add edi, byte 1*SIZEOF_MMWORD ; outptr + sub ecx, byte SIZEOF_MMWORD ; outcol + jnz short .columnloop + + pop esi + pop edi + pop ecx + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg short .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Downsample pixel values of a single component. +; This version handles the standard case of 2:1 horizontal and 2:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v2_downsample_mmx(JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, +; JDIMENSION width_in_blocks, JSAMPARRAY input_data, +; JSAMPARRAY output_data); +; + +%define img_width(b) (b) + 8 ; JDIMENSION image_width +%define max_v_samp(b) (b) + 12 ; int max_v_samp_factor +%define v_samp(b) (b) + 16 ; JDIMENSION v_samp_factor +%define width_blks(b) (b) + 20 ; JDIMENSION width_in_blocks +%define input_data(b) (b) + 24 ; JSAMPARRAY input_data +%define output_data(b) (b) + 28 ; JSAMPARRAY output_data + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_downsample_mmx) + +EXTN(jsimd_h2v2_downsample_mmx): + push ebp + mov ebp, esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, JDIMENSION [width_blks(ebp)] + shl ecx, 3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, JDIMENSION [img_width(ebp)] + + ; -- expand_right_edge + + push ecx + shl ecx, 1 ; output_cols * 2 + sub ecx, edx + jle short .expand_end + + mov eax, INT [max_v_samp(ebp)] + test eax, eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16, 7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi, edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v2_downsample + + mov eax, JDIMENSION [v_samp(ebp)] ; rowctr + test eax, eax + jle near .return + + mov edx, 0x00020001 ; bias pattern + movd mm7, edx + pcmpeqw mm6, mm6 + punpckldq mm7, mm7 ; mm7={1, 2, 1, 2} + psrlw mm6, BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16, 7 +.rowloop: + push ecx + push edi + push esi + + mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1 + mov edi, JSAMPROW [edi] ; outptr + alignx 16, 7 +.columnloop: + + movq mm0, MMWORD [edx+0*SIZEOF_MMWORD] + movq mm1, MMWORD [esi+0*SIZEOF_MMWORD] + movq mm2, MMWORD [edx+1*SIZEOF_MMWORD] + movq mm3, MMWORD [esi+1*SIZEOF_MMWORD] + + movq mm4, mm0 + movq mm5, mm1 + pand mm0, mm6 + psrlw mm4, BYTE_BIT + pand mm1, mm6 + psrlw mm5, BYTE_BIT + paddw mm0, mm4 + paddw mm1, mm5 + + movq mm4, mm2 + movq mm5, mm3 + pand mm2, mm6 + psrlw mm4, BYTE_BIT + pand mm3, mm6 + psrlw mm5, BYTE_BIT + paddw mm2, mm4 + paddw mm3, mm5 + + paddw mm0, mm1 + paddw mm2, mm3 + paddw mm0, mm7 + paddw mm2, mm7 + psrlw mm0, 2 + psrlw mm2, 2 + + packuswb mm0, mm2 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 + + add edx, byte 2*SIZEOF_MMWORD ; inptr0 + add esi, byte 2*SIZEOF_MMWORD ; inptr1 + add edi, byte 1*SIZEOF_MMWORD ; outptr + sub ecx, byte SIZEOF_MMWORD ; outcol + jnz near .columnloop + + pop esi + pop edi + pop ecx + + add esi, byte 2*SIZEOF_JSAMPROW ; input_data + add edi, byte 1*SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jcsample-sse2.asm b/simd/i386/jcsample-sse2.asm new file mode 100644 index 0000000..b10fa83 --- /dev/null +++ b/simd/i386/jcsample-sse2.asm @@ -0,0 +1,353 @@ +; +; jcsample.asm - downsampling (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Downsample pixel values of a single component. +; This version handles the common case of 2:1 horizontal and 1:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v1_downsample_sse2(JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, +; JDIMENSION width_in_blocks, JSAMPARRAY input_data, +; JSAMPARRAY output_data); +; + +%define img_width(b) (b) + 8 ; JDIMENSION image_width +%define max_v_samp(b) (b) + 12 ; int max_v_samp_factor +%define v_samp(b) (b) + 16 ; JDIMENSION v_samp_factor +%define width_blks(b) (b) + 20 ; JDIMENSION width_in_blocks +%define input_data(b) (b) + 24 ; JSAMPARRAY input_data +%define output_data(b) (b) + 28 ; JSAMPARRAY output_data + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2) + +EXTN(jsimd_h2v1_downsample_sse2): + push ebp + mov ebp, esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, JDIMENSION [width_blks(ebp)] + shl ecx, 3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, JDIMENSION [img_width(ebp)] + + ; -- expand_right_edge + + push ecx + shl ecx, 1 ; output_cols * 2 + sub ecx, edx + jle short .expand_end + + mov eax, INT [max_v_samp(ebp)] + test eax, eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16, 7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi, edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v1_downsample + + mov eax, JDIMENSION [v_samp(ebp)] ; rowctr + test eax, eax + jle near .return + + mov edx, 0x00010000 ; bias pattern + movd xmm7, edx + pcmpeqw xmm6, xmm6 + pshufd xmm7, xmm7, 0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1} + psrlw xmm6, BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16, 7 +.rowloop: + push ecx + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + + cmp ecx, byte SIZEOF_XMMWORD + jae short .columnloop + alignx 16, 7 + +.columnloop_r8: + movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + pxor xmm1, xmm1 + mov ecx, SIZEOF_XMMWORD + jmp short .downsample + alignx 16, 7 + +.columnloop: + movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [esi+1*SIZEOF_XMMWORD] + +.downsample: + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + + pand xmm0, xmm6 + psrlw xmm2, BYTE_BIT + pand xmm1, xmm6 + psrlw xmm3, BYTE_BIT + + paddw xmm0, xmm2 + paddw xmm1, xmm3 + paddw xmm0, xmm7 + paddw xmm1, xmm7 + psrlw xmm0, 1 + psrlw xmm1, 1 + + packuswb xmm0, xmm1 + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + + sub ecx, byte SIZEOF_XMMWORD ; outcol + add esi, byte 2*SIZEOF_XMMWORD ; inptr + add edi, byte 1*SIZEOF_XMMWORD ; outptr + cmp ecx, byte SIZEOF_XMMWORD + jae short .columnloop + test ecx, ecx + jnz short .columnloop_r8 + + pop esi + pop edi + pop ecx + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Downsample pixel values of a single component. +; This version handles the standard case of 2:1 horizontal and 2:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v2_downsample_sse2(JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, +; JDIMENSION width_in_blocks, JSAMPARRAY input_data, +; JSAMPARRAY output_data); +; + +%define img_width(b) (b) + 8 ; JDIMENSION image_width +%define max_v_samp(b) (b) + 12 ; int max_v_samp_factor +%define v_samp(b) (b) + 16 ; JDIMENSION v_samp_factor +%define width_blks(b) (b) + 20 ; JDIMENSION width_in_blocks +%define input_data(b) (b) + 24 ; JSAMPARRAY input_data +%define output_data(b) (b) + 28 ; JSAMPARRAY output_data + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2) + +EXTN(jsimd_h2v2_downsample_sse2): + push ebp + mov ebp, esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov ecx, JDIMENSION [width_blks(ebp)] + shl ecx, 3 ; imul ecx,DCTSIZE (ecx = output_cols) + jz near .return + + mov edx, JDIMENSION [img_width(ebp)] + + ; -- expand_right_edge + + push ecx + shl ecx, 1 ; output_cols * 2 + sub ecx, edx + jle short .expand_end + + mov eax, INT [max_v_samp(ebp)] + test eax, eax + jle short .expand_end + + cld + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + alignx 16, 7 +.expandloop: + push eax + push ecx + + mov edi, JSAMPROW [esi] + add edi, edx + mov al, JSAMPLE [edi-1] + + rep stosb + + pop ecx + pop eax + + add esi, byte SIZEOF_JSAMPROW + dec eax + jg short .expandloop + +.expand_end: + pop ecx ; output_cols + + ; -- h2v2_downsample + + mov eax, JDIMENSION [v_samp(ebp)] ; rowctr + test eax, eax + jle near .return + + mov edx, 0x00020001 ; bias pattern + movd xmm7, edx + pcmpeqw xmm6, xmm6 + pshufd xmm7, xmm7, 0x00 ; xmm7={1, 2, 1, 2, 1, 2, 1, 2} + psrlw xmm6, BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, JSAMPARRAY [output_data(ebp)] ; output_data + alignx 16, 7 +.rowloop: + push ecx + push edi + push esi + + mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1 + mov edi, JSAMPROW [edi] ; outptr + + cmp ecx, byte SIZEOF_XMMWORD + jae short .columnloop + alignx 16, 7 + +.columnloop_r8: + movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] + pxor xmm2, xmm2 + pxor xmm3, xmm3 + mov ecx, SIZEOF_XMMWORD + jmp short .downsample + alignx 16, 7 + +.columnloop: + movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqa xmm2, XMMWORD [edx+1*SIZEOF_XMMWORD] + movdqa xmm3, XMMWORD [esi+1*SIZEOF_XMMWORD] + +.downsample: + movdqa xmm4, xmm0 + movdqa xmm5, xmm1 + pand xmm0, xmm6 + psrlw xmm4, BYTE_BIT + pand xmm1, xmm6 + psrlw xmm5, BYTE_BIT + paddw xmm0, xmm4 + paddw xmm1, xmm5 + + movdqa xmm4, xmm2 + movdqa xmm5, xmm3 + pand xmm2, xmm6 + psrlw xmm4, BYTE_BIT + pand xmm3, xmm6 + psrlw xmm5, BYTE_BIT + paddw xmm2, xmm4 + paddw xmm3, xmm5 + + paddw xmm0, xmm1 + paddw xmm2, xmm3 + paddw xmm0, xmm7 + paddw xmm2, xmm7 + psrlw xmm0, 2 + psrlw xmm2, 2 + + packuswb xmm0, xmm2 + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + + sub ecx, byte SIZEOF_XMMWORD ; outcol + add edx, byte 2*SIZEOF_XMMWORD ; inptr0 + add esi, byte 2*SIZEOF_XMMWORD ; inptr1 + add edi, byte 1*SIZEOF_XMMWORD ; outptr + cmp ecx, byte SIZEOF_XMMWORD + jae near .columnloop + test ecx, ecx + jnz near .columnloop_r8 + + pop esi + pop edi + pop ecx + + add esi, byte 2*SIZEOF_JSAMPROW ; input_data + add edi, byte 1*SIZEOF_JSAMPROW ; output_data + dec eax ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jdcolext-avx2.asm b/simd/i386/jdcolext-avx2.asm new file mode 100644 index 0000000..46de9b9 --- /dev/null +++ b/simd/i386/jdcolext-avx2.asm @@ -0,0 +1,517 @@ +; +; jdcolext.asm - colorspace conversion (AVX2) +; +; Copyright 2009, 2012 Pierre Ossman for Cendio AB +; Copyright (C) 2012, 2016, D. R. Commander. +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_ycc_rgb_convert_avx2(JDIMENSION out_width, JSAMPIMAGE input_buf, +; JDIMENSION input_row, JSAMPARRAY output_buf, +; int num_rows) +; + +%define out_width(b) (b) + 8 ; JDIMENSION out_width +%define input_buf(b) (b) + 12 ; JSAMPIMAGE input_buf +%define input_row(b) (b) + 16 ; JDIMENSION input_row +%define output_buf(b) (b) + 20 ; JSAMPARRAY output_buf +%define num_rows(b) (b) + 24 ; int num_rows + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_YMMWORD + ; ymmword wk[WK_NUM] +%define WK_NUM 2 +%define gotptr wk(0) - SIZEOF_POINTER ; void * gotptr + + align 32 + GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2) + +EXTN(jsimd_ycc_rgb_convert_avx2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_YMMWORD) ; align to 256 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [out_width(eax)] ; num_cols + test ecx, ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [input_row(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov edi, JSAMPARRAY [output_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax, eax + jle near .return + alignx 16, 7 +.rowloop: + push eax + push edi + push edx + push ebx + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr0 + mov ebx, JSAMPROW [ebx] ; inptr1 + mov edx, JSAMPROW [edx] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + movpic eax, POINTER [gotptr] ; load GOT address (eax) + alignx 16, 7 +.columnloop: + + vmovdqu ymm5, YMMWORD [ebx] ; ymm5=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV) + vmovdqu ymm1, YMMWORD [edx] ; ymm1=Cr(0123456789ABCDEFGHIJKLMNOPQRSTUV) + + vpcmpeqw ymm0, ymm0, ymm0 + vpcmpeqw ymm7, ymm7, ymm7 + vpsrlw ymm0, ymm0, BYTE_BIT ; ymm0={0xFF 0x00 0xFF 0x00 ..} + vpsllw ymm7, ymm7, 7 ; ymm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + vpand ymm4, ymm0, ymm5 ; ymm4=Cb(02468ACEGIKMOQSU)=CbE + vpsrlw ymm5, ymm5, BYTE_BIT ; ymm5=Cb(13579BDFHJLNPRTV)=CbO + vpand ymm0, ymm0, ymm1 ; ymm0=Cr(02468ACEGIKMOQSU)=CrE + vpsrlw ymm1, ymm1, BYTE_BIT ; ymm1=Cr(13579BDFHJLNPRTV)=CrO + + vpaddw ymm2, ymm4, ymm7 + vpaddw ymm3, ymm5, ymm7 + vpaddw ymm6, ymm0, ymm7 + vpaddw ymm7, ymm1, ymm7 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + vpaddw ymm4, ymm2, ymm2 ; ymm4=2*CbE + vpaddw ymm5, ymm3, ymm3 ; ymm5=2*CbO + vpaddw ymm0, ymm6, ymm6 ; ymm0=2*CrE + vpaddw ymm1, ymm7, ymm7 ; ymm1=2*CrO + + vpmulhw ymm4, ymm4, [GOTOFF(eax,PW_MF0228)] ; ymm4=(2*CbE * -FIX(0.22800)) + vpmulhw ymm5, ymm5, [GOTOFF(eax,PW_MF0228)] ; ymm5=(2*CbO * -FIX(0.22800)) + vpmulhw ymm0, ymm0, [GOTOFF(eax,PW_F0402)] ; ymm0=(2*CrE * FIX(0.40200)) + vpmulhw ymm1, ymm1, [GOTOFF(eax,PW_F0402)] ; ymm1=(2*CrO * FIX(0.40200)) + + vpaddw ymm4, ymm4, [GOTOFF(eax,PW_ONE)] + vpaddw ymm5, ymm5, [GOTOFF(eax,PW_ONE)] + vpsraw ymm4, ymm4, 1 ; ymm4=(CbE * -FIX(0.22800)) + vpsraw ymm5, ymm5, 1 ; ymm5=(CbO * -FIX(0.22800)) + vpaddw ymm0, ymm0, [GOTOFF(eax,PW_ONE)] + vpaddw ymm1, ymm1, [GOTOFF(eax,PW_ONE)] + vpsraw ymm0, ymm0, 1 ; ymm0=(CrE * FIX(0.40200)) + vpsraw ymm1, ymm1, 1 ; ymm1=(CrO * FIX(0.40200)) + + vpaddw ymm4, ymm4, ymm2 + vpaddw ymm5, ymm5, ymm3 + vpaddw ymm4, ymm4, ymm2 ; ymm4=(CbE * FIX(1.77200))=(B-Y)E + vpaddw ymm5, ymm5, ymm3 ; ymm5=(CbO * FIX(1.77200))=(B-Y)O + vpaddw ymm0, ymm0, ymm6 ; ymm0=(CrE * FIX(1.40200))=(R-Y)E + vpaddw ymm1, ymm1, ymm7 ; ymm1=(CrO * FIX(1.40200))=(R-Y)O + + vmovdqa YMMWORD [wk(0)], ymm4 ; wk(0)=(B-Y)E + vmovdqa YMMWORD [wk(1)], ymm5 ; wk(1)=(B-Y)O + + vpunpckhwd ymm4, ymm2, ymm6 + vpunpcklwd ymm2, ymm2, ymm6 + vpmaddwd ymm2, ymm2, [GOTOFF(eax,PW_MF0344_F0285)] + vpmaddwd ymm4, ymm4, [GOTOFF(eax,PW_MF0344_F0285)] + vpunpckhwd ymm5, ymm3, ymm7 + vpunpcklwd ymm3, ymm3, ymm7 + vpmaddwd ymm3, ymm3, [GOTOFF(eax,PW_MF0344_F0285)] + vpmaddwd ymm5, ymm5, [GOTOFF(eax,PW_MF0344_F0285)] + + vpaddd ymm2, ymm2, [GOTOFF(eax,PD_ONEHALF)] + vpaddd ymm4, ymm4, [GOTOFF(eax,PD_ONEHALF)] + vpsrad ymm2, ymm2, SCALEBITS + vpsrad ymm4, ymm4, SCALEBITS + vpaddd ymm3, ymm3, [GOTOFF(eax,PD_ONEHALF)] + vpaddd ymm5, ymm5, [GOTOFF(eax,PD_ONEHALF)] + vpsrad ymm3, ymm3, SCALEBITS + vpsrad ymm5, ymm5, SCALEBITS + + vpackssdw ymm2, ymm2, ymm4 ; ymm2=CbE*-FIX(0.344)+CrE*FIX(0.285) + vpackssdw ymm3, ymm3, ymm5 ; ymm3=CbO*-FIX(0.344)+CrO*FIX(0.285) + vpsubw ymm2, ymm2, ymm6 ; ymm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E + vpsubw ymm3, ymm3, ymm7 ; ymm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O + + vmovdqu ymm5, YMMWORD [esi] ; ymm5=Y(0123456789ABCDEFGHIJKLMNOPQRSTUV) + + vpcmpeqw ymm4, ymm4, ymm4 + vpsrlw ymm4, ymm4, BYTE_BIT ; ymm4={0xFF 0x00 0xFF 0x00 ..} + vpand ymm4, ymm4, ymm5 ; ymm4=Y(02468ACEGIKMOQSU)=YE + vpsrlw ymm5, ymm5, BYTE_BIT ; ymm5=Y(13579BDFHJLNPRTV)=YO + + vpaddw ymm0, ymm0, ymm4 ; ymm0=((R-Y)E+YE)=RE=R(02468ACEGIKMOQSU) + vpaddw ymm1, ymm1, ymm5 ; ymm1=((R-Y)O+YO)=RO=R(13579BDFHJLNPRTV) + vpackuswb ymm0, ymm0, ymm0 ; ymm0=R(02468ACE********GIKMOQSU********) + vpackuswb ymm1, ymm1, ymm1 ; ymm1=R(13579BDF********HJLNPRTV********) + + vpaddw ymm2, ymm2, ymm4 ; ymm2=((G-Y)E+YE)=GE=G(02468ACEGIKMOQSU) + vpaddw ymm3, ymm3, ymm5 ; ymm3=((G-Y)O+YO)=GO=G(13579BDFHJLNPRTV) + vpackuswb ymm2, ymm2, ymm2 ; ymm2=G(02468ACE********GIKMOQSU********) + vpackuswb ymm3, ymm3, ymm3 ; ymm3=G(13579BDF********HJLNPRTV********) + + vpaddw ymm4, ymm4, YMMWORD [wk(0)] ; ymm4=(YE+(B-Y)E)=BE=B(02468ACEGIKMOQSU) + vpaddw ymm5, ymm5, YMMWORD [wk(1)] ; ymm5=(YO+(B-Y)O)=BO=B(13579BDFHJLNPRTV) + vpackuswb ymm4, ymm4, ymm4 ; ymm4=B(02468ACE********GIKMOQSU********) + vpackuswb ymm5, ymm5, ymm5 ; ymm5=B(13579BDF********HJLNPRTV********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; ymmA=(00 02 04 06 08 0A 0C 0E ** 0G 0I 0K 0M 0O 0Q 0S 0U **) + ; ymmB=(01 03 05 07 09 0B 0D 0F ** 0H 0J 0L 0N 0P 0R 0T 0V **) + ; ymmC=(10 12 14 16 18 1A 1C 1E ** 1G 1I 1K 1M 1O 1Q 1S 1U **) + ; ymmD=(11 13 15 17 19 1B 1D 1F ** 1H 1J 1L 1N 1P 1R 1T 1V **) + ; ymmE=(20 22 24 26 28 2A 2C 2E ** 2G 2I 2K 2M 2O 2Q 2S 2U **) + ; ymmF=(21 23 25 27 29 2B 2D 2F ** 2H 2J 2L 2N 2P 2R 2T 2V **) + ; ymmG=(** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **) + ; ymmH=(** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **) + + vpunpcklbw ymmA, ymmA, ymmC ; ymmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E + ; 0G 1G 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U) + vpunpcklbw ymmE, ymmE, ymmB ; ymmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F + ; 2G 0H 2I 0J 2K 0L 2M 0N 2O 0P 2Q 0R 2S 0T 2U 0V) + vpunpcklbw ymmD, ymmD, ymmF ; ymmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F + ; 1H 2H 1J 2J 1L 2L 1N 2N 1P 2P 1R 2R 1T 2T 1V 2V) + + vpsrldq ymmH, ymmA, 2 ; ymmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E 0G 1G + ; 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U -- --) + vpunpckhwd ymmG, ymmA, ymmE ; ymmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F + ; 0O 1O 2O 0P 0Q 1Q 2Q 0R 0S 1S 2S 0T 0U 1U 2U 0V) + vpunpcklwd ymmA, ymmA, ymmE ; ymmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07 + ; 0G 1G 2G 0H 0I 1I 2I 0J 0K 1K 2K 0L 0M 1M 2M 0N) + + vpsrldq ymmE, ymmE, 2 ; ymmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F 2G 0H + ; 2I 0J 2K 0L 2M 0N 2O 0P 2Q 0R 2S 0T 2U 0V -- --) + + vpsrldq ymmB, ymmD, 2 ; ymmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F 1H 2H + ; 1J 2J 1L 2L 1N 2N 1P 2P 1R 2R 1T 2T 1V 2V -- --) + vpunpckhwd ymmC, ymmD, ymmH ; ymmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F 0G 1G + ; 1P 2P 0Q 1Q 1R 2R 0S 1S 1T 2T 0U 1U 1V 2V -- --) + vpunpcklwd ymmD, ymmD, ymmH ; ymmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18 + ; 1H 2H 0I 1I 1J 2J 0K 1K 1L 2L 0M 1M 1N 2N 0O 1O) + + vpunpckhwd ymmF, ymmE, ymmB ; ymmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F 2G 0H 1H 2H + ; 2Q 0R 1R 2R 2S 0T 1T 2T 2U 0V 1V 2V -- -- -- --) + vpunpcklwd ymmE, ymmE, ymmB ; ymmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29 + ; 2I 0J 1J 2J 2K 0L 1L 2L 2M 0N 1N 2N 2O 0P 1P 2P) + + vpshufd ymmH, ymmA, 0x4E ; ymmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03 + ; 0K 1K 2K 0L 0M 1M 2M 0N 0G 1G 2G 0H 0I 1I 2I 0J) + vpunpckldq ymmA, ymmA, ymmD ; ymmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14 + ; 0G 1G 2G 0H 1H 2H 0I 1I 0I 1I 2I 0J 1J 2J 0K 1K) + vpunpckhdq ymmD, ymmD, ymmE ; ymmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29 + ; 1L 2L 0M 1M 2M 0N 1N 2N 1N 2N 0O 1O 2O 0P 1P 2P) + vpunpckldq ymmE, ymmE, ymmH ; ymmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07 + ; 2I 0J 1J 2J 0K 1K 2K 0L 2K 0L 1L 2L 0M 1M 2M 0N) + + vpshufd ymmH, ymmG, 0x4E ; ymmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B + ; 0S 1S 2S 0T 0U 1U 2U 0V 0O 1O 2O 0P 0Q 1Q 2Q 0R) + vpunpckldq ymmG, ymmG, ymmC ; ymmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C + ; 0O 1O 2O 0P 1P 2P 0Q 1Q 0Q 1Q 2Q 0R 1R 2R 0S 1S) + vpunpckhdq ymmC, ymmC, ymmF ; ymmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F 0G 1G 2G 0H 1H 2H + ; 1T 2T 0U 1U 2U 0V 1V 2V 1V 2V -- -- -- -- -- --) + vpunpckldq ymmF, ymmF, ymmH ; ymmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F + ; 2Q 0R 1R 2R 0S 1S 2S 0T 2S 0T 1T 2T 0U 1U 2U 0V) + + vpunpcklqdq ymmH, ymmA, ymmE ; ymmH=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05 + ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L) + vpunpcklqdq ymmG, ymmD, ymmG ; ymmG=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A + ; 1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q) + vpunpcklqdq ymmC, ymmF, ymmC ; ymmC=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F + ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V) + + vperm2i128 ymmA, ymmH, ymmG, 0x20 ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05 + ; 15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + vperm2i128 ymmD, ymmC, ymmH, 0x30 ; ymmD=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F + ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L) + vperm2i128 ymmF, ymmG, ymmC, 0x31 ; ymmF=(1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q + ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V) + + cmp ecx, byte SIZEOF_YMMWORD + jb short .column_st64 + + test edi, SIZEOF_YMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + vmovntdq YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA + vmovntdq YMMWORD [edi+1*SIZEOF_YMMWORD], ymmD + vmovntdq YMMWORD [edi+2*SIZEOF_YMMWORD], ymmF + jmp short .out0 +.out1: ; --(unaligned)----------------- + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA + vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymmD + vmovdqu YMMWORD [edi+2*SIZEOF_YMMWORD], ymmF +.out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_YMMWORD ; outptr + sub ecx, byte SIZEOF_YMMWORD + jz near .nextrow + + add esi, byte SIZEOF_YMMWORD ; inptr0 + add ebx, byte SIZEOF_YMMWORD ; inptr1 + add edx, byte SIZEOF_YMMWORD ; inptr2 + jmp near .columnloop + alignx 16, 7 + +.column_st64: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_YMMWORD + jb short .column_st32 + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA + vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymmD + add edi, byte 2*SIZEOF_YMMWORD ; outptr + vmovdqa ymmA, ymmF + sub ecx, byte 2*SIZEOF_YMMWORD + jmp short .column_st31 +.column_st32: + cmp ecx, byte SIZEOF_YMMWORD + jb short .column_st31 + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA + add edi, byte SIZEOF_YMMWORD ; outptr + vmovdqa ymmA, ymmD + sub ecx, byte SIZEOF_YMMWORD + jmp short .column_st31 +.column_st31: + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st15 + vmovdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + vperm2i128 ymmA, ymmA, ymmA, 1 + sub ecx, byte SIZEOF_XMMWORD +.column_st15: + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st7 + vmovq XMM_MMWORD [edi], xmmA + add edi, byte SIZEOF_MMWORD + sub ecx, byte SIZEOF_MMWORD + vpsrldq xmmA, xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_DWORD + jb short .column_st3 + vmovd XMM_DWORD [edi], xmmA + add edi, byte SIZEOF_DWORD + sub ecx, byte SIZEOF_DWORD + vpsrldq xmmA, xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of eax to the output when it has enough + ; space. + vmovd eax, xmmA + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi], ax + add edi, byte SIZEOF_WORD + sub ecx, byte SIZEOF_WORD + shr eax, 16 +.column_st1: + ; Store the lower 1 byte of eax to the output when it has enough + ; space. + test ecx, ecx + jz short .nextrow + mov BYTE [edi], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + vpcmpeqb ymm6, ymm6, ymm6 ; ymm6=XE=X(02468ACE********GIKMOQSU********) + vpcmpeqb ymm7, ymm7, ymm7 ; ymm7=XO=X(13579BDF********HJLNPRTV********) +%else + vpxor ymm6, ymm6, ymm6 ; ymm6=XE=X(02468ACE********GIKMOQSU********) + vpxor ymm7, ymm7, ymm7 ; ymm7=XO=X(13579BDF********HJLNPRTV********) +%endif + ; ymmA=(00 02 04 06 08 0A 0C 0E ** 0G 0I 0K 0M 0O 0Q 0S 0U **) + ; ymmB=(01 03 05 07 09 0B 0D 0F ** 0H 0J 0L 0N 0P 0R 0T 0V **) + ; ymmC=(10 12 14 16 18 1A 1C 1E ** 1G 1I 1K 1M 1O 1Q 1S 1U **) + ; ymmD=(11 13 15 17 19 1B 1D 1F ** 1H 1J 1L 1N 1P 1R 1T 1V **) + ; ymmE=(20 22 24 26 28 2A 2C 2E ** 2G 2I 2K 2M 2O 2Q 2S 2U **) + ; ymmF=(21 23 25 27 29 2B 2D 2F ** 2H 2J 2L 2N 2P 2R 2T 2V **) + ; ymmG=(30 32 34 36 38 3A 3C 3E ** 3G 3I 3K 3M 3O 3Q 3S 3U **) + ; ymmH=(31 33 35 37 39 3B 3D 3F ** 3H 3J 3L 3N 3P 3R 3T 3V **) + + vpunpcklbw ymmA, ymmA, ymmC ; ymmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E + ; 0G 1G 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U) + vpunpcklbw ymmE, ymmE, ymmG ; ymmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E + ; 2G 3G 2I 3I 2K 3K 2M 3M 2O 3O 2Q 3Q 2S 3S 2U 3U) + vpunpcklbw ymmB, ymmB, ymmD ; ymmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F + ; 0H 1H 0J 1J 0L 1L 0N 1N 0P 1P 0R 1R 0T 1T 0V 1V) + vpunpcklbw ymmF, ymmF, ymmH ; ymmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F + ; 2H 3H 2J 3J 2L 3L 2N 3N 2P 3P 2R 3R 2T 3T 2V 3V) + + vpunpckhwd ymmC, ymmA, ymmE ; ymmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E + ; 0O 1O 2O 3O 0Q 1Q 2Q 3Q 0S 1S 2S 3S 0U 1U 2U 3U) + vpunpcklwd ymmA, ymmA, ymmE ; ymmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36 + ; 0G 1G 2G 3G 0I 1I 2I 3I 0K 1K 2K 3K 0M 1M 2M 3M) + vpunpckhwd ymmG, ymmB, ymmF ; ymmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F + ; 0P 1P 2P 3P 0R 1R 2R 3R 0T 1T 2T 3T 0V 1V 2V 3V) + vpunpcklwd ymmB, ymmB, ymmF ; ymmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37 + ; 0H 1H 2H 3H 0J 1J 2J 3J 0L 1L 2L 3L 0N 1N 2N 3N) + + vpunpckhdq ymmE, ymmA, ymmB ; ymmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 + ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N) + vpunpckldq ymmB, ymmA, ymmB ; ymmB=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ; 0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J) + vpunpckhdq ymmF, ymmC, ymmG ; ymmF=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F + ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V) + vpunpckldq ymmG, ymmC, ymmG ; ymmG=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + ; 0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R) + + vperm2i128 ymmA, ymmB, ymmE, 0x20 ; ymmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ; 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + vperm2i128 ymmD, ymmG, ymmF, 0x20 ; ymmD=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + ; 0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + vperm2i128 ymmC, ymmB, ymmE, 0x31 ; ymmC=(0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J + ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N) + vperm2i128 ymmH, ymmG, ymmF, 0x31 ; ymmH=(0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R + ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V) + + cmp ecx, byte SIZEOF_YMMWORD + jb short .column_st64 + + test edi, SIZEOF_YMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + vmovntdq YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA + vmovntdq YMMWORD [edi+1*SIZEOF_YMMWORD], ymmD + vmovntdq YMMWORD [edi+2*SIZEOF_YMMWORD], ymmC + vmovntdq YMMWORD [edi+3*SIZEOF_YMMWORD], ymmH + jmp short .out0 +.out1: ; --(unaligned)----------------- + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA + vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymmD + vmovdqu YMMWORD [edi+2*SIZEOF_YMMWORD], ymmC + vmovdqu YMMWORD [edi+3*SIZEOF_YMMWORD], ymmH +.out0: + add edi, RGB_PIXELSIZE*SIZEOF_YMMWORD ; outptr + sub ecx, byte SIZEOF_YMMWORD + jz near .nextrow + + add esi, byte SIZEOF_YMMWORD ; inptr0 + add ebx, byte SIZEOF_YMMWORD ; inptr1 + add edx, byte SIZEOF_YMMWORD ; inptr2 + jmp near .columnloop + alignx 16, 7 + +.column_st64: + cmp ecx, byte SIZEOF_YMMWORD/2 + jb short .column_st32 + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA + vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymmD + add edi, byte 2*SIZEOF_YMMWORD ; outptr + vmovdqa ymmA, ymmC + vmovdqa ymmD, ymmH + sub ecx, byte SIZEOF_YMMWORD/2 +.column_st32: + cmp ecx, byte SIZEOF_YMMWORD/4 + jb short .column_st16 + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA + add edi, byte SIZEOF_YMMWORD ; outptr + vmovdqa ymmA, ymmD + sub ecx, byte SIZEOF_YMMWORD/4 +.column_st16: + cmp ecx, byte SIZEOF_YMMWORD/8 + jb short .column_st15 + vmovdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + vperm2i128 ymmA, ymmA, ymmA, 1 + add edi, byte SIZEOF_XMMWORD ; outptr + sub ecx, byte SIZEOF_YMMWORD/8 +.column_st15: + ; Store two pixels (8 bytes) of ymmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_YMMWORD/16 + jb short .column_st7 + vmovq MMWORD [edi], xmmA + add edi, byte SIZEOF_YMMWORD/16*4 + sub ecx, byte SIZEOF_YMMWORD/16 + vpsrldq xmmA, SIZEOF_YMMWORD/16*4 +.column_st7: + ; Store one pixel (4 bytes) of ymmA to the output when it has enough + ; space. + test ecx, ecx + jz short .nextrow + vmovd XMM_DWORD [edi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + + alignx 16, 7 + +.nextrow: + pop ecx + pop esi + pop ebx + pop edx + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + add edi, byte SIZEOF_JSAMPROW ; output_buf + dec eax ; num_rows + jg near .rowloop + + sfence ; flush the write buffer + +.return: + vzeroupper + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jdcolext-mmx.asm b/simd/i386/jdcolext-mmx.asm new file mode 100644 index 0000000..cd2cb3f --- /dev/null +++ b/simd/i386/jdcolext-mmx.asm @@ -0,0 +1,406 @@ +; +; jdcolext.asm - colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_ycc_rgb_convert_mmx(JDIMENSION out_width, JSAMPIMAGE input_buf, +; JDIMENSION input_row, JSAMPARRAY output_buf, +; int num_rows) +; + +%define out_width(b) (b) + 8 ; JDIMENSION out_width +%define input_buf(b) (b) + 12 ; JSAMPIMAGE input_buf +%define input_row(b) (b) + 16 ; JDIMENSION input_row +%define output_buf(b) (b) + 20 ; JSAMPARRAY output_buf +%define num_rows(b) (b) + 24 ; int num_rows + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_MMWORD + ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define gotptr wk(0) - SIZEOF_POINTER ; void * gotptr + + align 32 + GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_mmx) + +EXTN(jsimd_ycc_rgb_convert_mmx): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [out_width(eax)] ; num_cols + test ecx, ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [input_row(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov edi, JSAMPARRAY [output_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax, eax + jle near .return + alignx 16, 7 +.rowloop: + push eax + push edi + push edx + push ebx + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr0 + mov ebx, JSAMPROW [ebx] ; inptr1 + mov edx, JSAMPROW [edx] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + movpic eax, POINTER [gotptr] ; load GOT address (eax) + alignx 16, 7 +.columnloop: + + movq mm5, MMWORD [ebx] ; mm5=Cb(01234567) + movq mm1, MMWORD [edx] ; mm1=Cr(01234567) + + pcmpeqw mm4, mm4 + pcmpeqw mm7, mm7 + psrlw mm4, BYTE_BIT + psllw mm7, 7 ; mm7={0xFF80 0xFF80 0xFF80 0xFF80} + movq mm0, mm4 ; mm0=mm4={0xFF 0x00 0xFF 0x00 ..} + + pand mm4, mm5 ; mm4=Cb(0246)=CbE + psrlw mm5, BYTE_BIT ; mm5=Cb(1357)=CbO + pand mm0, mm1 ; mm0=Cr(0246)=CrE + psrlw mm1, BYTE_BIT ; mm1=Cr(1357)=CrO + + paddw mm4, mm7 + paddw mm5, mm7 + paddw mm0, mm7 + paddw mm1, mm7 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movq mm2, mm4 ; mm2=CbE + movq mm3, mm5 ; mm3=CbO + paddw mm4, mm4 ; mm4=2*CbE + paddw mm5, mm5 ; mm5=2*CbO + movq mm6, mm0 ; mm6=CrE + movq mm7, mm1 ; mm7=CrO + paddw mm0, mm0 ; mm0=2*CrE + paddw mm1, mm1 ; mm1=2*CrO + + pmulhw mm4, [GOTOFF(eax,PW_MF0228)] ; mm4=(2*CbE * -FIX(0.22800)) + pmulhw mm5, [GOTOFF(eax,PW_MF0228)] ; mm5=(2*CbO * -FIX(0.22800)) + pmulhw mm0, [GOTOFF(eax,PW_F0402)] ; mm0=(2*CrE * FIX(0.40200)) + pmulhw mm1, [GOTOFF(eax,PW_F0402)] ; mm1=(2*CrO * FIX(0.40200)) + + paddw mm4, [GOTOFF(eax,PW_ONE)] + paddw mm5, [GOTOFF(eax,PW_ONE)] + psraw mm4, 1 ; mm4=(CbE * -FIX(0.22800)) + psraw mm5, 1 ; mm5=(CbO * -FIX(0.22800)) + paddw mm0, [GOTOFF(eax,PW_ONE)] + paddw mm1, [GOTOFF(eax,PW_ONE)] + psraw mm0, 1 ; mm0=(CrE * FIX(0.40200)) + psraw mm1, 1 ; mm1=(CrO * FIX(0.40200)) + + paddw mm4, mm2 + paddw mm5, mm3 + paddw mm4, mm2 ; mm4=(CbE * FIX(1.77200))=(B-Y)E + paddw mm5, mm3 ; mm5=(CbO * FIX(1.77200))=(B-Y)O + paddw mm0, mm6 ; mm0=(CrE * FIX(1.40200))=(R-Y)E + paddw mm1, mm7 ; mm1=(CrO * FIX(1.40200))=(R-Y)O + + movq MMWORD [wk(0)], mm4 ; wk(0)=(B-Y)E + movq MMWORD [wk(1)], mm5 ; wk(1)=(B-Y)O + + movq mm4, mm2 + movq mm5, mm3 + punpcklwd mm2, mm6 + punpckhwd mm4, mm6 + pmaddwd mm2, [GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd mm4, [GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd mm3, mm7 + punpckhwd mm5, mm7 + pmaddwd mm3, [GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd mm5, [GOTOFF(eax,PW_MF0344_F0285)] + + paddd mm2, [GOTOFF(eax,PD_ONEHALF)] + paddd mm4, [GOTOFF(eax,PD_ONEHALF)] + psrad mm2, SCALEBITS + psrad mm4, SCALEBITS + paddd mm3, [GOTOFF(eax,PD_ONEHALF)] + paddd mm5, [GOTOFF(eax,PD_ONEHALF)] + psrad mm3, SCALEBITS + psrad mm5, SCALEBITS + + packssdw mm2, mm4 ; mm2=CbE*-FIX(0.344)+CrE*FIX(0.285) + packssdw mm3, mm5 ; mm3=CbO*-FIX(0.344)+CrO*FIX(0.285) + psubw mm2, mm6 ; mm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E + psubw mm3, mm7 ; mm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O + + movq mm5, MMWORD [esi] ; mm5=Y(01234567) + + pcmpeqw mm4, mm4 + psrlw mm4, BYTE_BIT ; mm4={0xFF 0x00 0xFF 0x00 ..} + pand mm4, mm5 ; mm4=Y(0246)=YE + psrlw mm5, BYTE_BIT ; mm5=Y(1357)=YO + + paddw mm0, mm4 ; mm0=((R-Y)E+YE)=RE=(R0 R2 R4 R6) + paddw mm1, mm5 ; mm1=((R-Y)O+YO)=RO=(R1 R3 R5 R7) + packuswb mm0, mm0 ; mm0=(R0 R2 R4 R6 ** ** ** **) + packuswb mm1, mm1 ; mm1=(R1 R3 R5 R7 ** ** ** **) + + paddw mm2, mm4 ; mm2=((G-Y)E+YE)=GE=(G0 G2 G4 G6) + paddw mm3, mm5 ; mm3=((G-Y)O+YO)=GO=(G1 G3 G5 G7) + packuswb mm2, mm2 ; mm2=(G0 G2 G4 G6 ** ** ** **) + packuswb mm3, mm3 ; mm3=(G1 G3 G5 G7 ** ** ** **) + + paddw mm4, MMWORD [wk(0)] ; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6) + paddw mm5, MMWORD [wk(1)] ; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7) + packuswb mm4, mm4 ; mm4=(B0 B2 B4 B6 ** ** ** **) + packuswb mm5, mm5 ; mm5=(B1 B3 B5 B7 ** ** ** **) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) + ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) + ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) + ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **) + + punpcklbw mmA, mmC ; mmA=(00 10 02 12 04 14 06 16) + punpcklbw mmE, mmB ; mmE=(20 01 22 03 24 05 26 07) + punpcklbw mmD, mmF ; mmD=(11 21 13 23 15 25 17 27) + + movq mmG, mmA + movq mmH, mmA + punpcklwd mmA, mmE ; mmA=(00 10 20 01 02 12 22 03) + punpckhwd mmG, mmE ; mmG=(04 14 24 05 06 16 26 07) + + psrlq mmH, 2*BYTE_BIT ; mmH=(02 12 04 14 06 16 -- --) + psrlq mmE, 2*BYTE_BIT ; mmE=(22 03 24 05 26 07 -- --) + + movq mmC, mmD + movq mmB, mmD + punpcklwd mmD, mmH ; mmD=(11 21 02 12 13 23 04 14) + punpckhwd mmC, mmH ; mmC=(15 25 06 16 17 27 -- --) + + psrlq mmB, 2*BYTE_BIT ; mmB=(13 23 15 25 17 27 -- --) + + movq mmF, mmE + punpcklwd mmE, mmB ; mmE=(22 03 13 23 24 05 15 25) + punpckhwd mmF, mmB ; mmF=(26 07 17 27 -- -- -- --) + + punpckldq mmA, mmD ; mmA=(00 10 20 01 11 21 02 12) + punpckldq mmE, mmG ; mmE=(22 03 13 23 04 14 24 05) + punpckldq mmC, mmF ; mmC=(15 25 06 16 26 07 17 27) + + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st16 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmE + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC + + sub ecx, byte SIZEOF_MMWORD + jz short .nextrow + + add esi, byte SIZEOF_MMWORD ; inptr0 + add ebx, byte SIZEOF_MMWORD ; inptr1 + add edx, byte SIZEOF_MMWORD ; inptr2 + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr + jmp near .columnloop + alignx 16, 7 + +.column_st16: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_MMWORD + jb short .column_st8 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmE + movq mmA, mmC + sub ecx, byte 2*SIZEOF_MMWORD + add edi, byte 2*SIZEOF_MMWORD + jmp short .column_st4 +.column_st8: + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st4 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq mmA, mmE + sub ecx, byte SIZEOF_MMWORD + add edi, byte SIZEOF_MMWORD +.column_st4: + movd eax, mmA + cmp ecx, byte SIZEOF_DWORD + jb short .column_st2 + mov DWORD [edi+0*SIZEOF_DWORD], eax + psrlq mmA, DWORD_BIT + movd eax, mmA + sub ecx, byte SIZEOF_DWORD + add edi, byte SIZEOF_DWORD +.column_st2: + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi+0*SIZEOF_WORD], ax + shr eax, WORD_BIT + sub ecx, byte SIZEOF_WORD + add edi, byte SIZEOF_WORD +.column_st1: + cmp ecx, byte SIZEOF_BYTE + jb short .nextrow + mov BYTE [edi+0*SIZEOF_BYTE], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb mm6, mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) + pcmpeqb mm7, mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) +%else + pxor mm6, mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) + pxor mm7, mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) +%endif + ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) + ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) + ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) + ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **) + + punpcklbw mmA, mmC ; mmA=(00 10 02 12 04 14 06 16) + punpcklbw mmE, mmG ; mmE=(20 30 22 32 24 34 26 36) + punpcklbw mmB, mmD ; mmB=(01 11 03 13 05 15 07 17) + punpcklbw mmF, mmH ; mmF=(21 31 23 33 25 35 27 37) + + movq mmC, mmA + punpcklwd mmA, mmE ; mmA=(00 10 20 30 02 12 22 32) + punpckhwd mmC, mmE ; mmC=(04 14 24 34 06 16 26 36) + movq mmG, mmB + punpcklwd mmB, mmF ; mmB=(01 11 21 31 03 13 23 33) + punpckhwd mmG, mmF ; mmG=(05 15 25 35 07 17 27 37) + + movq mmD, mmA + punpckldq mmA, mmB ; mmA=(00 10 20 30 01 11 21 31) + punpckhdq mmD, mmB ; mmD=(02 12 22 32 03 13 23 33) + movq mmH, mmC + punpckldq mmC, mmG ; mmC=(04 14 24 34 05 15 25 35) + punpckhdq mmH, mmG ; mmH=(06 16 26 36 07 17 27 37) + + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st16 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmD + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC + movq MMWORD [edi+3*SIZEOF_MMWORD], mmH + + sub ecx, byte SIZEOF_MMWORD + jz short .nextrow + + add esi, byte SIZEOF_MMWORD ; inptr0 + add ebx, byte SIZEOF_MMWORD ; inptr1 + add edx, byte SIZEOF_MMWORD ; inptr2 + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr + jmp near .columnloop + alignx 16, 7 + +.column_st16: + cmp ecx, byte SIZEOF_MMWORD/2 + jb short .column_st8 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmD + movq mmA, mmC + movq mmD, mmH + sub ecx, byte SIZEOF_MMWORD/2 + add edi, byte 2*SIZEOF_MMWORD +.column_st8: + cmp ecx, byte SIZEOF_MMWORD/4 + jb short .column_st4 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq mmA, mmD + sub ecx, byte SIZEOF_MMWORD/4 + add edi, byte 1*SIZEOF_MMWORD +.column_st4: + cmp ecx, byte SIZEOF_MMWORD/8 + jb short .nextrow + movd DWORD [edi+0*SIZEOF_DWORD], mmA + +%endif ; RGB_PIXELSIZE ; --------------- + + alignx 16, 7 + +.nextrow: + pop ecx + pop esi + pop ebx + pop edx + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + add edi, byte SIZEOF_JSAMPROW ; output_buf + dec eax ; num_rows + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jdcolext-sse2.asm b/simd/i386/jdcolext-sse2.asm new file mode 100644 index 0000000..0fcb006 --- /dev/null +++ b/simd/i386/jdcolext-sse2.asm @@ -0,0 +1,460 @@ +; +; jdcolext.asm - colorspace conversion (SSE2) +; +; Copyright 2009, 2012 Pierre Ossman for Cendio AB +; Copyright (C) 2012, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_ycc_rgb_convert_sse2(JDIMENSION out_width, JSAMPIMAGE input_buf, +; JDIMENSION input_row, JSAMPARRAY output_buf, +; int num_rows) +; + +%define out_width(b) (b) + 8 ; JDIMENSION out_width +%define input_buf(b) (b) + 12 ; JSAMPIMAGE input_buf +%define input_row(b) (b) + 16 ; JDIMENSION input_row +%define output_buf(b) (b) + 20 ; JSAMPARRAY output_buf +%define num_rows(b) (b) + 24 ; int num_rows + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define gotptr wk(0) - SIZEOF_POINTER ; void * gotptr + + align 32 + GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2) + +EXTN(jsimd_ycc_rgb_convert_sse2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [out_width(eax)] ; num_cols + test ecx, ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [input_row(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] + lea edx, [edx+ecx*SIZEOF_JSAMPROW] + + pop ecx + + mov edi, JSAMPARRAY [output_buf(eax)] + mov eax, INT [num_rows(eax)] + test eax, eax + jle near .return + alignx 16, 7 +.rowloop: + push eax + push edi + push edx + push ebx + push esi + push ecx ; col + + mov esi, JSAMPROW [esi] ; inptr0 + mov ebx, JSAMPROW [ebx] ; inptr1 + mov edx, JSAMPROW [edx] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + movpic eax, POINTER [gotptr] ; load GOT address (eax) + alignx 16, 7 +.columnloop: + + movdqa xmm5, XMMWORD [ebx] ; xmm5=Cb(0123456789ABCDEF) + movdqa xmm1, XMMWORD [edx] ; xmm1=Cr(0123456789ABCDEF) + + pcmpeqw xmm4, xmm4 + pcmpeqw xmm7, xmm7 + psrlw xmm4, BYTE_BIT + psllw xmm7, 7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + movdqa xmm0, xmm4 ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..} + + pand xmm4, xmm5 ; xmm4=Cb(02468ACE)=CbE + psrlw xmm5, BYTE_BIT ; xmm5=Cb(13579BDF)=CbO + pand xmm0, xmm1 ; xmm0=Cr(02468ACE)=CrE + psrlw xmm1, BYTE_BIT ; xmm1=Cr(13579BDF)=CrO + + paddw xmm4, xmm7 + paddw xmm5, xmm7 + paddw xmm0, xmm7 + paddw xmm1, xmm7 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movdqa xmm2, xmm4 ; xmm2=CbE + movdqa xmm3, xmm5 ; xmm3=CbO + paddw xmm4, xmm4 ; xmm4=2*CbE + paddw xmm5, xmm5 ; xmm5=2*CbO + movdqa xmm6, xmm0 ; xmm6=CrE + movdqa xmm7, xmm1 ; xmm7=CrO + paddw xmm0, xmm0 ; xmm0=2*CrE + paddw xmm1, xmm1 ; xmm1=2*CrO + + pmulhw xmm4, [GOTOFF(eax,PW_MF0228)] ; xmm4=(2*CbE * -FIX(0.22800)) + pmulhw xmm5, [GOTOFF(eax,PW_MF0228)] ; xmm5=(2*CbO * -FIX(0.22800)) + pmulhw xmm0, [GOTOFF(eax,PW_F0402)] ; xmm0=(2*CrE * FIX(0.40200)) + pmulhw xmm1, [GOTOFF(eax,PW_F0402)] ; xmm1=(2*CrO * FIX(0.40200)) + + paddw xmm4, [GOTOFF(eax,PW_ONE)] + paddw xmm5, [GOTOFF(eax,PW_ONE)] + psraw xmm4, 1 ; xmm4=(CbE * -FIX(0.22800)) + psraw xmm5, 1 ; xmm5=(CbO * -FIX(0.22800)) + paddw xmm0, [GOTOFF(eax,PW_ONE)] + paddw xmm1, [GOTOFF(eax,PW_ONE)] + psraw xmm0, 1 ; xmm0=(CrE * FIX(0.40200)) + psraw xmm1, 1 ; xmm1=(CrO * FIX(0.40200)) + + paddw xmm4, xmm2 + paddw xmm5, xmm3 + paddw xmm4, xmm2 ; xmm4=(CbE * FIX(1.77200))=(B-Y)E + paddw xmm5, xmm3 ; xmm5=(CbO * FIX(1.77200))=(B-Y)O + paddw xmm0, xmm6 ; xmm0=(CrE * FIX(1.40200))=(R-Y)E + paddw xmm1, xmm7 ; xmm1=(CrO * FIX(1.40200))=(R-Y)O + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=(B-Y)E + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(B-Y)O + + movdqa xmm4, xmm2 + movdqa xmm5, xmm3 + punpcklwd xmm2, xmm6 + punpckhwd xmm4, xmm6 + pmaddwd xmm2, [GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm4, [GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd xmm3, xmm7 + punpckhwd xmm5, xmm7 + pmaddwd xmm3, [GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm5, [GOTOFF(eax,PW_MF0344_F0285)] + + paddd xmm2, [GOTOFF(eax,PD_ONEHALF)] + paddd xmm4, [GOTOFF(eax,PD_ONEHALF)] + psrad xmm2, SCALEBITS + psrad xmm4, SCALEBITS + paddd xmm3, [GOTOFF(eax,PD_ONEHALF)] + paddd xmm5, [GOTOFF(eax,PD_ONEHALF)] + psrad xmm3, SCALEBITS + psrad xmm5, SCALEBITS + + packssdw xmm2, xmm4 ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285) + packssdw xmm3, xmm5 ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285) + psubw xmm2, xmm6 ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E + psubw xmm3, xmm7 ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O + + movdqa xmm5, XMMWORD [esi] ; xmm5=Y(0123456789ABCDEF) + + pcmpeqw xmm4, xmm4 + psrlw xmm4, BYTE_BIT ; xmm4={0xFF 0x00 0xFF 0x00 ..} + pand xmm4, xmm5 ; xmm4=Y(02468ACE)=YE + psrlw xmm5, BYTE_BIT ; xmm5=Y(13579BDF)=YO + + paddw xmm0, xmm4 ; xmm0=((R-Y)E+YE)=RE=R(02468ACE) + paddw xmm1, xmm5 ; xmm1=((R-Y)O+YO)=RO=R(13579BDF) + packuswb xmm0, xmm0 ; xmm0=R(02468ACE********) + packuswb xmm1, xmm1 ; xmm1=R(13579BDF********) + + paddw xmm2, xmm4 ; xmm2=((G-Y)E+YE)=GE=G(02468ACE) + paddw xmm3, xmm5 ; xmm3=((G-Y)O+YO)=GO=G(13579BDF) + packuswb xmm2, xmm2 ; xmm2=G(02468ACE********) + packuswb xmm3, xmm3 ; xmm3=G(13579BDF********) + + paddw xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE) + paddw xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF) + packuswb xmm4, xmm4 ; xmm4=B(02468ACE********) + packuswb xmm5, xmm5 ; xmm5=B(13579BDF********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) + + punpcklbw xmmA, xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE, xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) + punpcklbw xmmD, xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) + + movdqa xmmG, xmmA + movdqa xmmH, xmmA + punpcklwd xmmA, xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) + punpckhwd xmmG, xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) + + psrldq xmmH, 2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) + psrldq xmmE, 2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) + + movdqa xmmC, xmmD + movdqa xmmB, xmmD + punpcklwd xmmD, xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) + punpckhwd xmmC, xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) + + psrldq xmmB, 2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) + + movdqa xmmF, xmmE + punpcklwd xmmE, xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) + punpckhwd xmmF, xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) + + pshufd xmmH, xmmA, 0x4E ; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) + movdqa xmmB, xmmE + punpckldq xmmA, xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) + punpckldq xmmE, xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) + punpckhdq xmmD, xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) + + pshufd xmmH, xmmG, 0x4E ; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) + movdqa xmmB, xmmF + punpckldq xmmG, xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) + punpckldq xmmF, xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) + punpckhdq xmmC, xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) + + punpcklqdq xmmA, xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + punpcklqdq xmmD, xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + punpcklqdq xmmF, xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF +.out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub ecx, byte SIZEOF_XMMWORD + jz near .nextrow + + add esi, byte SIZEOF_XMMWORD ; inptr0 + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16, 7 + +.column_st32: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_XMMWORD + jb short .column_st16 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + add edi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA, xmmF + sub ecx, byte 2*SIZEOF_XMMWORD + jmp short .column_st15 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st15 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA, xmmD + sub ecx, byte SIZEOF_XMMWORD +.column_st15: + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st7 + movq XMM_MMWORD [edi], xmmA + add edi, byte SIZEOF_MMWORD + sub ecx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_DWORD + jb short .column_st3 + movd XMM_DWORD [edi], xmmA + add edi, byte SIZEOF_DWORD + sub ecx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of eax to the output when it has enough + ; space. + movd eax, xmmA + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi], ax + add edi, byte SIZEOF_WORD + sub ecx, byte SIZEOF_WORD + shr eax, 16 +.column_st1: + ; Store the lower 1 byte of eax to the output when it has enough + ; space. + test ecx, ecx + jz short .nextrow + mov BYTE [edi], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb xmm6, xmm6 ; xmm6=XE=X(02468ACE********) + pcmpeqb xmm7, xmm7 ; xmm7=XO=X(13579BDF********) +%else + pxor xmm6, xmm6 ; xmm6=XE=X(02468ACE********) + pxor xmm7, xmm7 ; xmm7=XO=X(13579BDF********) +%endif + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) + + punpcklbw xmmA, xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE, xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) + punpcklbw xmmB, xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) + punpcklbw xmmF, xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) + + movdqa xmmC, xmmA + punpcklwd xmmA, xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) + punpckhwd xmmC, xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) + movdqa xmmG, xmmB + punpcklwd xmmB, xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) + punpckhwd xmmG, xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) + + movdqa xmmD, xmmA + punpckldq xmmA, xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + punpckhdq xmmD, xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + movdqa xmmH, xmmC + punpckldq xmmC, xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + punpckhdq xmmH, xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movdqu XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH +.out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub ecx, byte SIZEOF_XMMWORD + jz near .nextrow + + add esi, byte SIZEOF_XMMWORD ; inptr0 + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16, 7 + +.column_st32: + cmp ecx, byte SIZEOF_XMMWORD/2 + jb short .column_st16 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + add edi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA, xmmC + movdqa xmmD, xmmH + sub ecx, byte SIZEOF_XMMWORD/2 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD/4 + jb short .column_st15 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA, xmmD + sub ecx, byte SIZEOF_XMMWORD/4 +.column_st15: + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq XMM_MMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD/8*4 + sub ecx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test ecx, ecx + jz short .nextrow + movd XMM_DWORD [edi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + + alignx 16, 7 + +.nextrow: + pop ecx + pop esi + pop ebx + pop edx + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW + add ebx, byte SIZEOF_JSAMPROW + add edx, byte SIZEOF_JSAMPROW + add edi, byte SIZEOF_JSAMPROW ; output_buf + dec eax ; num_rows + jg near .rowloop + + sfence ; flush the write buffer + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jdcolor-avx2.asm b/simd/i386/jdcolor-avx2.asm new file mode 100644 index 0000000..d2f86e6 --- /dev/null +++ b/simd/i386/jdcolor-avx2.asm @@ -0,0 +1,120 @@ +; +; jdcolor.asm - colorspace conversion (AVX2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2015, Intel Corporation. +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_ycc_rgb_convert_avx2) + +EXTN(jconst_ycc_rgb_convert_avx2): + +PW_F0402 times 16 dw F_0_402 +PW_MF0228 times 16 dw -F_0_228 +PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285 +PW_ONE times 16 dw 1 +PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jdcolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_avx2 jsimd_ycc_extrgb_convert_avx2 +%include "jdcolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_ycc_rgb_convert_avx2 jsimd_ycc_extrgbx_convert_avx2 +%include "jdcolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_avx2 jsimd_ycc_extbgr_convert_avx2 +%include "jdcolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_ycc_rgb_convert_avx2 jsimd_ycc_extbgrx_convert_avx2 +%include "jdcolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_avx2 jsimd_ycc_extxbgr_convert_avx2 +%include "jdcolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_avx2 jsimd_ycc_extxrgb_convert_avx2 +%include "jdcolext-avx2.asm" diff --git a/simd/i386/jdcolor-mmx.asm b/simd/i386/jdcolor-mmx.asm new file mode 100644 index 0000000..8f5a3b3 --- /dev/null +++ b/simd/i386/jdcolor-mmx.asm @@ -0,0 +1,119 @@ +; +; jdcolor.asm - colorspace conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_ycc_rgb_convert_mmx) + +EXTN(jconst_ycc_rgb_convert_mmx): + +PW_F0402 times 4 dw F_0_402 +PW_MF0228 times 4 dw -F_0_228 +PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285 +PW_ONE times 4 dw 1 +PD_ONEHALF times 2 dd 1 << (SCALEBITS - 1) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgb_convert_mmx +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgbx_convert_mmx +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgr_convert_mmx +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgrx_convert_mmx +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxbgr_convert_mmx +%include "jdcolext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxrgb_convert_mmx +%include "jdcolext-mmx.asm" diff --git a/simd/i386/jdcolor-sse2.asm b/simd/i386/jdcolor-sse2.asm new file mode 100644 index 0000000..ae553db --- /dev/null +++ b/simd/i386/jdcolor-sse2.asm @@ -0,0 +1,119 @@ +; +; jdcolor.asm - colorspace conversion (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_ycc_rgb_convert_sse2) + +EXTN(jconst_ycc_rgb_convert_sse2): + +PW_F0402 times 8 dw F_0_402 +PW_MF0228 times 8 dw -F_0_228 +PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 +PW_ONE times 8 dw 1 +PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2 +%include "jdcolext-sse2.asm" diff --git a/simd/i386/jdmerge-avx2.asm b/simd/i386/jdmerge-avx2.asm new file mode 100644 index 0000000..1731844 --- /dev/null +++ b/simd/i386/jdmerge-avx2.asm @@ -0,0 +1,138 @@ +; +; jdmerge.asm - merged upsampling/color conversion (AVX2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_merged_upsample_avx2) + +EXTN(jconst_merged_upsample_avx2): + +PW_F0402 times 16 dw F_0_402 +PW_MF0228 times 16 dw -F_0_228 +PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285 +PW_ONE times 16 dw 1 +PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jdmrgext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_avx2 \ + jsimd_h2v1_extrgb_merged_upsample_avx2 +%define jsimd_h2v2_merged_upsample_avx2 \ + jsimd_h2v2_extrgb_merged_upsample_avx2 +%include "jdmrgext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_avx2 \ + jsimd_h2v1_extrgbx_merged_upsample_avx2 +%define jsimd_h2v2_merged_upsample_avx2 \ + jsimd_h2v2_extrgbx_merged_upsample_avx2 +%include "jdmrgext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_avx2 \ + jsimd_h2v1_extbgr_merged_upsample_avx2 +%define jsimd_h2v2_merged_upsample_avx2 \ + jsimd_h2v2_extbgr_merged_upsample_avx2 +%include "jdmrgext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_avx2 \ + jsimd_h2v1_extbgrx_merged_upsample_avx2 +%define jsimd_h2v2_merged_upsample_avx2 \ + jsimd_h2v2_extbgrx_merged_upsample_avx2 +%include "jdmrgext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_avx2 \ + jsimd_h2v1_extxbgr_merged_upsample_avx2 +%define jsimd_h2v2_merged_upsample_avx2 \ + jsimd_h2v2_extxbgr_merged_upsample_avx2 +%include "jdmrgext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_avx2 \ + jsimd_h2v1_extxrgb_merged_upsample_avx2 +%define jsimd_h2v2_merged_upsample_avx2 \ + jsimd_h2v2_extxrgb_merged_upsample_avx2 +%include "jdmrgext-avx2.asm" diff --git a/simd/i386/jdmerge-mmx.asm b/simd/i386/jdmerge-mmx.asm new file mode 100644 index 0000000..607bf39 --- /dev/null +++ b/simd/i386/jdmerge-mmx.asm @@ -0,0 +1,125 @@ +; +; jdmerge.asm - merged upsampling/color conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_merged_upsample_mmx) + +EXTN(jconst_merged_upsample_mmx): + +PW_F0402 times 4 dw F_0_402 +PW_MF0228 times 4 dw -F_0_228 +PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285 +PW_ONE times 4 dw 1 +PD_ONEHALF times 2 dd 1 << (SCALEBITS - 1) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extrgb_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extrgb_merged_upsample_mmx +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extrgbx_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extrgbx_merged_upsample_mmx +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extbgr_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extbgr_merged_upsample_mmx +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extbgrx_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extbgrx_merged_upsample_mmx +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extxbgr_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extxbgr_merged_upsample_mmx +%include "jdmrgext-mmx.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extxrgb_merged_upsample_mmx +%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extxrgb_merged_upsample_mmx +%include "jdmrgext-mmx.asm" diff --git a/simd/i386/jdmerge-sse2.asm b/simd/i386/jdmerge-sse2.asm new file mode 100644 index 0000000..ddb1d5e --- /dev/null +++ b/simd/i386/jdmerge-sse2.asm @@ -0,0 +1,137 @@ +; +; jdmerge.asm - merged upsampling/color conversion (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_merged_upsample_sse2) + +EXTN(jconst_merged_upsample_sse2): + +PW_F0402 times 8 dw F_0_402 +PW_MF0228 times 8 dw -F_0_228 +PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 +PW_ONE times 8 dw 1 +PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 + +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 \ + jsimd_h2v1_extrgb_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 \ + jsimd_h2v2_extrgb_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 \ + jsimd_h2v1_extrgbx_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 \ + jsimd_h2v2_extrgbx_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 \ + jsimd_h2v1_extbgr_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 \ + jsimd_h2v2_extbgr_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 \ + jsimd_h2v1_extbgrx_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 \ + jsimd_h2v2_extbgrx_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 \ + jsimd_h2v1_extxbgr_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 \ + jsimd_h2v2_extxbgr_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 \ + jsimd_h2v1_extxrgb_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 \ + jsimd_h2v2_extxrgb_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" diff --git a/simd/i386/jdmrgext-avx2.asm b/simd/i386/jdmrgext-avx2.asm new file mode 100644 index 0000000..cde4865 --- /dev/null +++ b/simd/i386/jdmrgext-avx2.asm @@ -0,0 +1,577 @@ +; +; jdmrgext.asm - merged upsampling/color conversion (AVX2) +; +; Copyright 2009, 2012 Pierre Ossman for Cendio AB +; Copyright (C) 2012, 2016, D. R. Commander. +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v1_merged_upsample_avx2(JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define output_width(b) (b) + 8 ; JDIMENSION output_width +%define input_buf(b) (b) + 12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b) + 16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b) + 20 ; JSAMPARRAY output_buf + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_YMMWORD + ; ymmword wk[WK_NUM] +%define WK_NUM 3 +%define gotptr wk(0) - SIZEOF_POINTER ; void * gotptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2) + +EXTN(jsimd_h2v1_merged_upsample_avx2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_YMMWORD) ; align to 256 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [output_width(eax)] ; col + test ecx, ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [in_row_group_ctr(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(eax)] + mov esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW] ; inptr0 + mov ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW] ; inptr1 + mov edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + + pop ecx ; col + + alignx 16, 7 +.columnloop: + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + vmovdqu ymm6, YMMWORD [ebx] ; ymm6=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV) + vmovdqu ymm7, YMMWORD [edx] ; ymm7=Cr(0123456789ABCDEFGHIJKLMNOPQRSTUV) + + vpxor ymm1, ymm1, ymm1 ; ymm1=(all 0's) + vpcmpeqw ymm3, ymm3, ymm3 + vpsllw ymm3, ymm3, 7 ; ymm3={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + vpermq ymm6, ymm6, 0xd8 ; ymm6=Cb(01234567GHIJKLMN89ABCDEFOPQRSTUV) + vpermq ymm7, ymm7, 0xd8 ; ymm7=Cr(01234567GHIJKLMN89ABCDEFOPQRSTUV) + vpunpcklbw ymm4, ymm6, ymm1 ; ymm4=Cb(0123456789ABCDEF)=CbL + vpunpckhbw ymm6, ymm6, ymm1 ; ymm6=Cb(GHIJKLMNOPQRSTUV)=CbH + vpunpcklbw ymm0, ymm7, ymm1 ; ymm0=Cr(0123456789ABCDEF)=CrL + vpunpckhbw ymm7, ymm7, ymm1 ; ymm7=Cr(GHIJKLMNOPQRSTUV)=CrH + + vpaddw ymm5, ymm6, ymm3 + vpaddw ymm2, ymm4, ymm3 + vpaddw ymm1, ymm7, ymm3 + vpaddw ymm3, ymm0, ymm3 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + vpaddw ymm6, ymm5, ymm5 ; ymm6=2*CbH + vpaddw ymm4, ymm2, ymm2 ; ymm4=2*CbL + vpaddw ymm7, ymm1, ymm1 ; ymm7=2*CrH + vpaddw ymm0, ymm3, ymm3 ; ymm0=2*CrL + + vpmulhw ymm6, ymm6, [GOTOFF(eax,PW_MF0228)] ; ymm6=(2*CbH * -FIX(0.22800)) + vpmulhw ymm4, ymm4, [GOTOFF(eax,PW_MF0228)] ; ymm4=(2*CbL * -FIX(0.22800)) + vpmulhw ymm7, ymm7, [GOTOFF(eax,PW_F0402)] ; ymm7=(2*CrH * FIX(0.40200)) + vpmulhw ymm0, ymm0, [GOTOFF(eax,PW_F0402)] ; ymm0=(2*CrL * FIX(0.40200)) + + vpaddw ymm6, ymm6, [GOTOFF(eax,PW_ONE)] + vpaddw ymm4, ymm4, [GOTOFF(eax,PW_ONE)] + vpsraw ymm6, ymm6, 1 ; ymm6=(CbH * -FIX(0.22800)) + vpsraw ymm4, ymm4, 1 ; ymm4=(CbL * -FIX(0.22800)) + vpaddw ymm7, ymm7, [GOTOFF(eax,PW_ONE)] + vpaddw ymm0, ymm0, [GOTOFF(eax,PW_ONE)] + vpsraw ymm7, ymm7, 1 ; ymm7=(CrH * FIX(0.40200)) + vpsraw ymm0, ymm0, 1 ; ymm0=(CrL * FIX(0.40200)) + + vpaddw ymm6, ymm6, ymm5 + vpaddw ymm4, ymm4, ymm2 + vpaddw ymm6, ymm6, ymm5 ; ymm6=(CbH * FIX(1.77200))=(B-Y)H + vpaddw ymm4, ymm4, ymm2 ; ymm4=(CbL * FIX(1.77200))=(B-Y)L + vpaddw ymm7, ymm7, ymm1 ; ymm7=(CrH * FIX(1.40200))=(R-Y)H + vpaddw ymm0, ymm0, ymm3 ; ymm0=(CrL * FIX(1.40200))=(R-Y)L + + vmovdqa YMMWORD [wk(0)], ymm6 ; wk(0)=(B-Y)H + vmovdqa YMMWORD [wk(1)], ymm7 ; wk(1)=(R-Y)H + + vpunpckhwd ymm6, ymm5, ymm1 + vpunpcklwd ymm5, ymm5, ymm1 + vpmaddwd ymm5, ymm5, [GOTOFF(eax,PW_MF0344_F0285)] + vpmaddwd ymm6, ymm6, [GOTOFF(eax,PW_MF0344_F0285)] + vpunpckhwd ymm7, ymm2, ymm3 + vpunpcklwd ymm2, ymm2, ymm3 + vpmaddwd ymm2, ymm2, [GOTOFF(eax,PW_MF0344_F0285)] + vpmaddwd ymm7, ymm7, [GOTOFF(eax,PW_MF0344_F0285)] + + vpaddd ymm5, ymm5, [GOTOFF(eax,PD_ONEHALF)] + vpaddd ymm6, ymm6, [GOTOFF(eax,PD_ONEHALF)] + vpsrad ymm5, ymm5, SCALEBITS + vpsrad ymm6, ymm6, SCALEBITS + vpaddd ymm2, ymm2, [GOTOFF(eax,PD_ONEHALF)] + vpaddd ymm7, ymm7, [GOTOFF(eax,PD_ONEHALF)] + vpsrad ymm2, ymm2, SCALEBITS + vpsrad ymm7, ymm7, SCALEBITS + + vpackssdw ymm5, ymm5, ymm6 ; ymm5=CbH*-FIX(0.344)+CrH*FIX(0.285) + vpackssdw ymm2, ymm2, ymm7 ; ymm2=CbL*-FIX(0.344)+CrL*FIX(0.285) + vpsubw ymm5, ymm5, ymm1 ; ymm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H + vpsubw ymm2, ymm2, ymm3 ; ymm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L + + vmovdqa YMMWORD [wk(2)], ymm5 ; wk(2)=(G-Y)H + + mov al, 2 ; Yctr + jmp short .Yloop_1st + alignx 16, 7 + +.Yloop_2nd: + vmovdqa ymm0, YMMWORD [wk(1)] ; ymm0=(R-Y)H + vmovdqa ymm2, YMMWORD [wk(2)] ; ymm2=(G-Y)H + vmovdqa ymm4, YMMWORD [wk(0)] ; ymm4=(B-Y)H + alignx 16, 7 + +.Yloop_1st: + vmovdqu ymm7, YMMWORD [esi] ; ymm7=Y(0123456789ABCDEFGHIJKLMNOPQRSTUV) + + vpcmpeqw ymm6, ymm6, ymm6 + vpsrlw ymm6, ymm6, BYTE_BIT ; ymm6={0xFF 0x00 0xFF 0x00 ..} + vpand ymm6, ymm6, ymm7 ; ymm6=Y(02468ACEGIKMOQSU)=YE + vpsrlw ymm7, ymm7, BYTE_BIT ; ymm7=Y(13579BDFHJLNPRTV)=YO + + vmovdqa ymm1, ymm0 ; ymm1=ymm0=(R-Y)(L/H) + vmovdqa ymm3, ymm2 ; ymm3=ymm2=(G-Y)(L/H) + vmovdqa ymm5, ymm4 ; ymm5=ymm4=(B-Y)(L/H) + + vpaddw ymm0, ymm0, ymm6 ; ymm0=((R-Y)+YE)=RE=R(02468ACEGIKMOQSU) + vpaddw ymm1, ymm1, ymm7 ; ymm1=((R-Y)+YO)=RO=R(13579BDFHJLNPRTV) + vpackuswb ymm0, ymm0, ymm0 ; ymm0=R(02468ACE********GIKMOQSU********) + vpackuswb ymm1, ymm1, ymm1 ; ymm1=R(13579BDF********HJLNPRTV********) + + vpaddw ymm2, ymm2, ymm6 ; ymm2=((G-Y)+YE)=GE=G(02468ACEGIKMOQSU) + vpaddw ymm3, ymm3, ymm7 ; ymm3=((G-Y)+YO)=GO=G(13579BDFHJLNPRTV) + vpackuswb ymm2, ymm2, ymm2 ; ymm2=G(02468ACE********GIKMOQSU********) + vpackuswb ymm3, ymm3, ymm3 ; ymm3=G(13579BDF********HJLNPRTV********) + + vpaddw ymm4, ymm4, ymm6 ; ymm4=((B-Y)+YE)=BE=B(02468ACEGIKMOQSU) + vpaddw ymm5, ymm5, ymm7 ; ymm5=((B-Y)+YO)=BO=B(13579BDFHJLNPRTV) + vpackuswb ymm4, ymm4, ymm4 ; ymm4=B(02468ACE********GIKMOQSU********) + vpackuswb ymm5, ymm5, ymm5 ; ymm5=B(13579BDF********HJLNPRTV********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; ymmA=(00 02 04 06 08 0A 0C 0E ** 0G 0I 0K 0M 0O 0Q 0S 0U **) + ; ymmB=(01 03 05 07 09 0B 0D 0F ** 0H 0J 0L 0N 0P 0R 0T 0V **) + ; ymmC=(10 12 14 16 18 1A 1C 1E ** 1G 1I 1K 1M 1O 1Q 1S 1U **) + ; ymmD=(11 13 15 17 19 1B 1D 1F ** 1H 1J 1L 1N 1P 1R 1T 1V **) + ; ymmE=(20 22 24 26 28 2A 2C 2E ** 2G 2I 2K 2M 2O 2Q 2S 2U **) + ; ymmF=(21 23 25 27 29 2B 2D 2F ** 2H 2J 2L 2N 2P 2R 2T 2V **) + ; ymmG=(** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **) + ; ymmH=(** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **) + + vpunpcklbw ymmA, ymmA, ymmC ; ymmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E + ; 0G 1G 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U) + vpunpcklbw ymmE, ymmE, ymmB ; ymmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F + ; 2G 0H 2I 0J 2K 0L 2M 0N 2O 0P 2Q 0R 2S 0T 2U 0V) + vpunpcklbw ymmD, ymmD, ymmF ; ymmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F + ; 1H 2H 1J 2J 1L 2L 1N 2N 1P 2P 1R 2R 1T 2T 1V 2V) + + vpsrldq ymmH, ymmA, 2 ; ymmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E 0G 1G + ; 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U -- --) + vpunpckhwd ymmG, ymmA, ymmE ; ymmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F + ; 0O 1O 2O 0P 0Q 1Q 2Q 0R 0S 1S 2S 0T 0U 1U 2U 0V) + vpunpcklwd ymmA, ymmA, ymmE ; ymmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07 + ; 0G 1G 2G 0H 0I 1I 2I 0J 0K 1K 2K 0L 0M 1M 2M 0N) + + vpsrldq ymmE, ymmE, 2 ; ymmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F 2G 0H + ; 2I 0J 2K 0L 2M 0N 2O 0P 2Q 0R 2S 0T 2U 0V -- --) + + vpsrldq ymmB, ymmD, 2 ; ymmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F 1H 2H + ; 1J 2J 1L 2L 1N 2N 1P 2P 1R 2R 1T 2T 1V 2V -- --) + vpunpckhwd ymmC, ymmD, ymmH ; ymmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F 0G 1G + ; 1P 2P 0Q 1Q 1R 2R 0S 1S 1T 2T 0U 1U 1V 2V -- --) + vpunpcklwd ymmD, ymmD, ymmH ; ymmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18 + ; 1H 2H 0I 1I 1J 2J 0K 1K 1L 2L 0M 1M 1N 2N 0O 1O) + + vpunpckhwd ymmF, ymmE, ymmB ; ymmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F 2G 0H 1H 2H + ; 2Q 0R 1R 2R 2S 0T 1T 2T 2U 0V 1V 2V -- -- -- --) + vpunpcklwd ymmE, ymmE, ymmB ; ymmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29 + ; 2I 0J 1J 2J 2K 0L 1L 2L 2M 0N 1N 2N 2O 0P 1P 2P) + + vpshufd ymmH, ymmA, 0x4E ; ymmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03 + ; 0K 1K 2K 0L 0M 1M 2M 0N 0G 1G 2G 0H 0I 1I 2I 0J) + vpunpckldq ymmA, ymmA, ymmD ; ymmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14 + ; 0G 1G 2G 0H 1H 2H 0I 1I 0I 1I 2I 0J 1J 2J 0K 1K) + vpunpckhdq ymmD, ymmD, ymmE ; ymmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29 + ; 1L 2L 0M 1M 2M 0N 1N 2N 1N 2N 0O 1O 2O 0P 1P 2P) + vpunpckldq ymmE, ymmE, ymmH ; ymmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07 + ; 2I 0J 1J 2J 0K 1K 2K 0L 2K 0L 1L 2L 0M 1M 2M 0N) + + vpshufd ymmH, ymmG, 0x4E ; ymmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B + ; 0S 1S 2S 0T 0U 1U 2U 0V 0O 1O 2O 0P 0Q 1Q 2Q 0R) + vpunpckldq ymmG, ymmG, ymmC ; ymmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C + ; 0O 1O 2O 0P 1P 2P 0Q 1Q 0Q 1Q 2Q 0R 1R 2R 0S 1S) + vpunpckhdq ymmC, ymmC, ymmF ; ymmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F 0G 1G 2G 0H 1H 2H + ; 1T 2T 0U 1U 2U 0V 1V 2V 1V 2V -- -- -- -- -- --) + vpunpckldq ymmF, ymmF, ymmH ; ymmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F + ; 2Q 0R 1R 2R 0S 1S 2S 0T 2S 0T 1T 2T 0U 1U 2U 0V) + + vpunpcklqdq ymmH, ymmA, ymmE ; ymmH=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05 + ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L) + vpunpcklqdq ymmG, ymmD, ymmG ; ymmG=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A + ; 1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q) + vpunpcklqdq ymmC, ymmF, ymmC ; ymmC=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F + ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V) + + vperm2i128 ymmA, ymmH, ymmG, 0x20 ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05 + ; 15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + vperm2i128 ymmD, ymmC, ymmH, 0x30 ; ymmD=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F + ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L) + vperm2i128 ymmF, ymmG, ymmC, 0x31 ; ymmF=(1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q + ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V) + + cmp ecx, byte SIZEOF_YMMWORD + jb short .column_st64 + + test edi, SIZEOF_YMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + vmovntdq YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA + vmovntdq YMMWORD [edi+1*SIZEOF_YMMWORD], ymmD + vmovntdq YMMWORD [edi+2*SIZEOF_YMMWORD], ymmF + jmp short .out0 +.out1: ; --(unaligned)----------------- + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA + vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymmD + vmovdqu YMMWORD [edi+2*SIZEOF_YMMWORD], ymmF +.out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_YMMWORD ; outptr + sub ecx, byte SIZEOF_YMMWORD + jz near .endcolumn + + add esi, byte SIZEOF_YMMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_YMMWORD ; inptr1 + add edx, byte SIZEOF_YMMWORD ; inptr2 + jmp near .columnloop + alignx 16, 7 + +.column_st64: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_YMMWORD + jb short .column_st32 + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA + vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymmD + add edi, byte 2*SIZEOF_YMMWORD ; outptr + vmovdqa ymmA, ymmF + sub ecx, byte 2*SIZEOF_YMMWORD + jmp short .column_st31 +.column_st32: + cmp ecx, byte SIZEOF_YMMWORD + jb short .column_st31 + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA + add edi, byte SIZEOF_YMMWORD ; outptr + vmovdqa ymmA, ymmD + sub ecx, byte SIZEOF_YMMWORD + jmp short .column_st31 +.column_st31: + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st15 + vmovdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + vperm2i128 ymmA, ymmA, ymmA, 1 + sub ecx, byte SIZEOF_XMMWORD +.column_st15: + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st7 + vmovq XMM_MMWORD [edi], xmmA + add edi, byte SIZEOF_MMWORD + sub ecx, byte SIZEOF_MMWORD + vpsrldq xmmA, xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_DWORD + jb short .column_st3 + vmovd XMM_DWORD [edi], xmmA + add edi, byte SIZEOF_DWORD + sub ecx, byte SIZEOF_DWORD + vpsrldq xmmA, xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of eax to the output when it has enough + ; space. + vmovd eax, xmmA + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi], ax + add edi, byte SIZEOF_WORD + sub ecx, byte SIZEOF_WORD + shr eax, 16 +.column_st1: + ; Store the lower 1 byte of eax to the output when it has enough + ; space. + test ecx, ecx + jz short .endcolumn + mov BYTE [edi], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + vpcmpeqb ymm6, ymm6, ymm6 ; ymm6=XE=X(02468ACE********GIKMOQSU********) + vpcmpeqb ymm7, ymm7, ymm7 ; ymm7=XO=X(13579BDF********HJLNPRTV********) +%else + vpxor ymm6, ymm6, ymm6 ; ymm6=XE=X(02468ACE********GIKMOQSU********) + vpxor ymm7, ymm7, ymm7 ; ymm7=XO=X(13579BDF********HJLNPRTV********) +%endif + ; ymmA=(00 02 04 06 08 0A 0C 0E ** 0G 0I 0K 0M 0O 0Q 0S 0U **) + ; ymmB=(01 03 05 07 09 0B 0D 0F ** 0H 0J 0L 0N 0P 0R 0T 0V **) + ; ymmC=(10 12 14 16 18 1A 1C 1E ** 1G 1I 1K 1M 1O 1Q 1S 1U **) + ; ymmD=(11 13 15 17 19 1B 1D 1F ** 1H 1J 1L 1N 1P 1R 1T 1V **) + ; ymmE=(20 22 24 26 28 2A 2C 2E ** 2G 2I 2K 2M 2O 2Q 2S 2U **) + ; ymmF=(21 23 25 27 29 2B 2D 2F ** 2H 2J 2L 2N 2P 2R 2T 2V **) + ; ymmG=(30 32 34 36 38 3A 3C 3E ** 3G 3I 3K 3M 3O 3Q 3S 3U **) + ; ymmH=(31 33 35 37 39 3B 3D 3F ** 3H 3J 3L 3N 3P 3R 3T 3V **) + + vpunpcklbw ymmA, ymmA, ymmC ; ymmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E + ; 0G 1G 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U) + vpunpcklbw ymmE, ymmE, ymmG ; ymmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E + ; 2G 3G 2I 3I 2K 3K 2M 3M 2O 3O 2Q 3Q 2S 3S 2U 3U) + vpunpcklbw ymmB, ymmB, ymmD ; ymmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F + ; 0H 1H 0J 1J 0L 1L 0N 1N 0P 1P 0R 1R 0T 1T 0V 1V) + vpunpcklbw ymmF, ymmF, ymmH ; ymmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F + ; 2H 3H 2J 3J 2L 3L 2N 3N 2P 3P 2R 3R 2T 3T 2V 3V) + + vpunpckhwd ymmC, ymmA, ymmE ; ymmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E + ; 0O 1O 2O 3O 0Q 1Q 2Q 3Q 0S 1S 2S 3S 0U 1U 2U 3U) + vpunpcklwd ymmA, ymmA, ymmE ; ymmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36 + ; 0G 1G 2G 3G 0I 1I 2I 3I 0K 1K 2K 3K 0M 1M 2M 3M) + vpunpckhwd ymmG, ymmB, ymmF ; ymmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F + ; 0P 1P 2P 3P 0R 1R 2R 3R 0T 1T 2T 3T 0V 1V 2V 3V) + vpunpcklwd ymmB, ymmB, ymmF ; ymmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37 + ; 0H 1H 2H 3H 0J 1J 2J 3J 0L 1L 2L 3L 0N 1N 2N 3N) + + vpunpckhdq ymmE, ymmA, ymmB ; ymmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 + ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N) + vpunpckldq ymmB, ymmA, ymmB ; ymmB=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ; 0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J) + vpunpckhdq ymmF, ymmC, ymmG ; ymmF=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F + ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V) + vpunpckldq ymmG, ymmC, ymmG ; ymmG=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + ; 0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R) + + vperm2i128 ymmA, ymmB, ymmE, 0x20 ; ymmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ; 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + vperm2i128 ymmD, ymmG, ymmF, 0x20 ; ymmD=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + ; 0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + vperm2i128 ymmC, ymmB, ymmE, 0x31 ; ymmC=(0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J + ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N) + vperm2i128 ymmH, ymmG, ymmF, 0x31 ; ymmH=(0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R + ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V) + + cmp ecx, byte SIZEOF_YMMWORD + jb short .column_st64 + + test edi, SIZEOF_YMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + vmovntdq YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA + vmovntdq YMMWORD [edi+1*SIZEOF_YMMWORD], ymmD + vmovntdq YMMWORD [edi+2*SIZEOF_YMMWORD], ymmC + vmovntdq YMMWORD [edi+3*SIZEOF_YMMWORD], ymmH + jmp short .out0 +.out1: ; --(unaligned)----------------- + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA + vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymmD + vmovdqu YMMWORD [edi+2*SIZEOF_YMMWORD], ymmC + vmovdqu YMMWORD [edi+3*SIZEOF_YMMWORD], ymmH +.out0: + add edi, RGB_PIXELSIZE*SIZEOF_YMMWORD ; outptr + sub ecx, byte SIZEOF_YMMWORD + jz near .endcolumn + + add esi, byte SIZEOF_YMMWORD ; inptr0 + dec al + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_YMMWORD ; inptr1 + add edx, byte SIZEOF_YMMWORD ; inptr2 + jmp near .columnloop + alignx 16, 7 + +.column_st64: + cmp ecx, byte SIZEOF_YMMWORD/2 + jb short .column_st32 + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA + vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymmD + add edi, byte 2*SIZEOF_YMMWORD ; outptr + vmovdqa ymmA, ymmC + vmovdqa ymmD, ymmH + sub ecx, byte SIZEOF_YMMWORD/2 +.column_st32: + cmp ecx, byte SIZEOF_YMMWORD/4 + jb short .column_st16 + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA + add edi, byte SIZEOF_YMMWORD ; outptr + vmovdqa ymmA, ymmD + sub ecx, byte SIZEOF_YMMWORD/4 +.column_st16: + cmp ecx, byte SIZEOF_YMMWORD/8 + jb short .column_st15 + vmovdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + vperm2i128 ymmA, ymmA, ymmA, 1 + sub ecx, byte SIZEOF_YMMWORD/8 +.column_st15: + ; Store two pixels (8 bytes) of ymmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_YMMWORD/16 + jb short .column_st7 + vmovq MMWORD [edi], xmmA + add edi, byte SIZEOF_YMMWORD/16*4 + sub ecx, byte SIZEOF_YMMWORD/16 + vpsrldq xmmA, SIZEOF_YMMWORD/16*4 +.column_st7: + ; Store one pixel (4 bytes) of ymmA to the output when it has enough + ; space. + test ecx, ecx + jz short .endcolumn + vmovd XMM_DWORD [edi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.endcolumn: + sfence ; flush the write buffer + +.return: + vzeroupper + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v2_merged_upsample_avx2(JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define output_width(b) (b) + 8 ; JDIMENSION output_width +%define input_buf(b) (b) + 12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b) + 16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b) + 20 ; JSAMPARRAY output_buf + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2) + +EXTN(jsimd_h2v2_merged_upsample_avx2): + push ebp + mov ebp, esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov eax, POINTER [output_width(ebp)] + + mov edi, JSAMPIMAGE [input_buf(ebp)] + mov ecx, JDIMENSION [in_row_group_ctr(ebp)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(ebp)] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + + push edx ; inptr2 + push ebx ; inptr1 + push esi ; inptr00 + mov ebx, esp + + push edi ; output_buf (outptr0) + push ecx ; in_row_group_ctr + push ebx ; input_buf + push eax ; output_width + + call near EXTN(jsimd_h2v1_merged_upsample_avx2) + + add esi, byte SIZEOF_JSAMPROW ; inptr01 + add edi, byte SIZEOF_JSAMPROW ; outptr1 + mov POINTER [ebx+0*SIZEOF_POINTER], esi + mov POINTER [ebx-1*SIZEOF_POINTER], edi + + call near EXTN(jsimd_h2v1_merged_upsample_avx2) + + add esp, byte 7*SIZEOF_DWORD + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jdmrgext-mmx.asm b/simd/i386/jdmrgext-mmx.asm new file mode 100644 index 0000000..4b9e35d --- /dev/null +++ b/simd/i386/jdmrgext-mmx.asm @@ -0,0 +1,462 @@ +; +; jdmrgext.asm - merged upsampling/color conversion (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v1_merged_upsample_mmx(JDIMENSION output_width, JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define output_width(b) (b) + 8 ; JDIMENSION output_width +%define input_buf(b) (b) + 12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b) + 16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b) + 20 ; JSAMPARRAY output_buf + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 3 +%define gotptr wk(0) - SIZEOF_POINTER ; void * gotptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_mmx) + +EXTN(jsimd_h2v1_merged_upsample_mmx): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [output_width(eax)] ; col + test ecx, ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [in_row_group_ctr(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(eax)] + mov esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW] ; inptr0 + mov ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW] ; inptr1 + mov edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + + pop ecx ; col + + alignx 16, 7 +.columnloop: + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + movq mm6, MMWORD [ebx] ; mm6=Cb(01234567) + movq mm7, MMWORD [edx] ; mm7=Cr(01234567) + + pxor mm1, mm1 ; mm1=(all 0's) + pcmpeqw mm3, mm3 + psllw mm3, 7 ; mm3={0xFF80 0xFF80 0xFF80 0xFF80} + + movq mm4, mm6 + punpckhbw mm6, mm1 ; mm6=Cb(4567)=CbH + punpcklbw mm4, mm1 ; mm4=Cb(0123)=CbL + movq mm0, mm7 + punpckhbw mm7, mm1 ; mm7=Cr(4567)=CrH + punpcklbw mm0, mm1 ; mm0=Cr(0123)=CrL + + paddw mm6, mm3 + paddw mm4, mm3 + paddw mm7, mm3 + paddw mm0, mm3 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movq mm5, mm6 ; mm5=CbH + movq mm2, mm4 ; mm2=CbL + paddw mm6, mm6 ; mm6=2*CbH + paddw mm4, mm4 ; mm4=2*CbL + movq mm1, mm7 ; mm1=CrH + movq mm3, mm0 ; mm3=CrL + paddw mm7, mm7 ; mm7=2*CrH + paddw mm0, mm0 ; mm0=2*CrL + + pmulhw mm6, [GOTOFF(eax,PW_MF0228)] ; mm6=(2*CbH * -FIX(0.22800)) + pmulhw mm4, [GOTOFF(eax,PW_MF0228)] ; mm4=(2*CbL * -FIX(0.22800)) + pmulhw mm7, [GOTOFF(eax,PW_F0402)] ; mm7=(2*CrH * FIX(0.40200)) + pmulhw mm0, [GOTOFF(eax,PW_F0402)] ; mm0=(2*CrL * FIX(0.40200)) + + paddw mm6, [GOTOFF(eax,PW_ONE)] + paddw mm4, [GOTOFF(eax,PW_ONE)] + psraw mm6, 1 ; mm6=(CbH * -FIX(0.22800)) + psraw mm4, 1 ; mm4=(CbL * -FIX(0.22800)) + paddw mm7, [GOTOFF(eax,PW_ONE)] + paddw mm0, [GOTOFF(eax,PW_ONE)] + psraw mm7, 1 ; mm7=(CrH * FIX(0.40200)) + psraw mm0, 1 ; mm0=(CrL * FIX(0.40200)) + + paddw mm6, mm5 + paddw mm4, mm2 + paddw mm6, mm5 ; mm6=(CbH * FIX(1.77200))=(B-Y)H + paddw mm4, mm2 ; mm4=(CbL * FIX(1.77200))=(B-Y)L + paddw mm7, mm1 ; mm7=(CrH * FIX(1.40200))=(R-Y)H + paddw mm0, mm3 ; mm0=(CrL * FIX(1.40200))=(R-Y)L + + movq MMWORD [wk(0)], mm6 ; wk(0)=(B-Y)H + movq MMWORD [wk(1)], mm7 ; wk(1)=(R-Y)H + + movq mm6, mm5 + movq mm7, mm2 + punpcklwd mm5, mm1 + punpckhwd mm6, mm1 + pmaddwd mm5, [GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd mm6, [GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd mm2, mm3 + punpckhwd mm7, mm3 + pmaddwd mm2, [GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd mm7, [GOTOFF(eax,PW_MF0344_F0285)] + + paddd mm5, [GOTOFF(eax,PD_ONEHALF)] + paddd mm6, [GOTOFF(eax,PD_ONEHALF)] + psrad mm5, SCALEBITS + psrad mm6, SCALEBITS + paddd mm2, [GOTOFF(eax,PD_ONEHALF)] + paddd mm7, [GOTOFF(eax,PD_ONEHALF)] + psrad mm2, SCALEBITS + psrad mm7, SCALEBITS + + packssdw mm5, mm6 ; mm5=CbH*-FIX(0.344)+CrH*FIX(0.285) + packssdw mm2, mm7 ; mm2=CbL*-FIX(0.344)+CrL*FIX(0.285) + psubw mm5, mm1 ; mm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H + psubw mm2, mm3 ; mm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L + + movq MMWORD [wk(2)], mm5 ; wk(2)=(G-Y)H + + mov al, 2 ; Yctr + jmp short .Yloop_1st + alignx 16, 7 + +.Yloop_2nd: + movq mm0, MMWORD [wk(1)] ; mm0=(R-Y)H + movq mm2, MMWORD [wk(2)] ; mm2=(G-Y)H + movq mm4, MMWORD [wk(0)] ; mm4=(B-Y)H + alignx 16, 7 + +.Yloop_1st: + movq mm7, MMWORD [esi] ; mm7=Y(01234567) + + pcmpeqw mm6, mm6 + psrlw mm6, BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} + pand mm6, mm7 ; mm6=Y(0246)=YE + psrlw mm7, BYTE_BIT ; mm7=Y(1357)=YO + + movq mm1, mm0 ; mm1=mm0=(R-Y)(L/H) + movq mm3, mm2 ; mm3=mm2=(G-Y)(L/H) + movq mm5, mm4 ; mm5=mm4=(B-Y)(L/H) + + paddw mm0, mm6 ; mm0=((R-Y)+YE)=RE=(R0 R2 R4 R6) + paddw mm1, mm7 ; mm1=((R-Y)+YO)=RO=(R1 R3 R5 R7) + packuswb mm0, mm0 ; mm0=(R0 R2 R4 R6 ** ** ** **) + packuswb mm1, mm1 ; mm1=(R1 R3 R5 R7 ** ** ** **) + + paddw mm2, mm6 ; mm2=((G-Y)+YE)=GE=(G0 G2 G4 G6) + paddw mm3, mm7 ; mm3=((G-Y)+YO)=GO=(G1 G3 G5 G7) + packuswb mm2, mm2 ; mm2=(G0 G2 G4 G6 ** ** ** **) + packuswb mm3, mm3 ; mm3=(G1 G3 G5 G7 ** ** ** **) + + paddw mm4, mm6 ; mm4=((B-Y)+YE)=BE=(B0 B2 B4 B6) + paddw mm5, mm7 ; mm5=((B-Y)+YO)=BO=(B1 B3 B5 B7) + packuswb mm4, mm4 ; mm4=(B0 B2 B4 B6 ** ** ** **) + packuswb mm5, mm5 ; mm5=(B1 B3 B5 B7 ** ** ** **) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) + ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) + ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) + ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **) + + punpcklbw mmA, mmC ; mmA=(00 10 02 12 04 14 06 16) + punpcklbw mmE, mmB ; mmE=(20 01 22 03 24 05 26 07) + punpcklbw mmD, mmF ; mmD=(11 21 13 23 15 25 17 27) + + movq mmG, mmA + movq mmH, mmA + punpcklwd mmA, mmE ; mmA=(00 10 20 01 02 12 22 03) + punpckhwd mmG, mmE ; mmG=(04 14 24 05 06 16 26 07) + + psrlq mmH, 2*BYTE_BIT ; mmH=(02 12 04 14 06 16 -- --) + psrlq mmE, 2*BYTE_BIT ; mmE=(22 03 24 05 26 07 -- --) + + movq mmC, mmD + movq mmB, mmD + punpcklwd mmD, mmH ; mmD=(11 21 02 12 13 23 04 14) + punpckhwd mmC, mmH ; mmC=(15 25 06 16 17 27 -- --) + + psrlq mmB, 2*BYTE_BIT ; mmB=(13 23 15 25 17 27 -- --) + + movq mmF, mmE + punpcklwd mmE, mmB ; mmE=(22 03 13 23 24 05 15 25) + punpckhwd mmF, mmB ; mmF=(26 07 17 27 -- -- -- --) + + punpckldq mmA, mmD ; mmA=(00 10 20 01 11 21 02 12) + punpckldq mmE, mmG ; mmE=(22 03 13 23 04 14 24 05) + punpckldq mmC, mmF ; mmC=(15 25 06 16 26 07 17 27) + + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st16 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmE + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC + + sub ecx, byte SIZEOF_MMWORD + jz near .endcolumn + + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr + add esi, byte SIZEOF_MMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_MMWORD ; inptr1 + add edx, byte SIZEOF_MMWORD ; inptr2 + jmp near .columnloop + alignx 16, 7 + +.column_st16: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_MMWORD + jb short .column_st8 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmE + movq mmA, mmC + sub ecx, byte 2*SIZEOF_MMWORD + add edi, byte 2*SIZEOF_MMWORD + jmp short .column_st4 +.column_st8: + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st4 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq mmA, mmE + sub ecx, byte SIZEOF_MMWORD + add edi, byte SIZEOF_MMWORD +.column_st4: + movd eax, mmA + cmp ecx, byte SIZEOF_DWORD + jb short .column_st2 + mov DWORD [edi+0*SIZEOF_DWORD], eax + psrlq mmA, DWORD_BIT + movd eax, mmA + sub ecx, byte SIZEOF_DWORD + add edi, byte SIZEOF_DWORD +.column_st2: + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi+0*SIZEOF_WORD], ax + shr eax, WORD_BIT + sub ecx, byte SIZEOF_WORD + add edi, byte SIZEOF_WORD +.column_st1: + cmp ecx, byte SIZEOF_BYTE + jb short .endcolumn + mov BYTE [edi+0*SIZEOF_BYTE], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb mm6, mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) + pcmpeqb mm7, mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) +%else + pxor mm6, mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) + pxor mm7, mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) +%endif + ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) + ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) + ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) + ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **) + + punpcklbw mmA, mmC ; mmA=(00 10 02 12 04 14 06 16) + punpcklbw mmE, mmG ; mmE=(20 30 22 32 24 34 26 36) + punpcklbw mmB, mmD ; mmB=(01 11 03 13 05 15 07 17) + punpcklbw mmF, mmH ; mmF=(21 31 23 33 25 35 27 37) + + movq mmC, mmA + punpcklwd mmA, mmE ; mmA=(00 10 20 30 02 12 22 32) + punpckhwd mmC, mmE ; mmC=(04 14 24 34 06 16 26 36) + movq mmG, mmB + punpcklwd mmB, mmF ; mmB=(01 11 21 31 03 13 23 33) + punpckhwd mmG, mmF ; mmG=(05 15 25 35 07 17 27 37) + + movq mmD, mmA + punpckldq mmA, mmB ; mmA=(00 10 20 30 01 11 21 31) + punpckhdq mmD, mmB ; mmD=(02 12 22 32 03 13 23 33) + movq mmH, mmC + punpckldq mmC, mmG ; mmC=(04 14 24 34 05 15 25 35) + punpckhdq mmH, mmG ; mmH=(06 16 26 36 07 17 27 37) + + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st16 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmD + movq MMWORD [edi+2*SIZEOF_MMWORD], mmC + movq MMWORD [edi+3*SIZEOF_MMWORD], mmH + + sub ecx, byte SIZEOF_MMWORD + jz short .endcolumn + + add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr + add esi, byte SIZEOF_MMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_MMWORD ; inptr1 + add edx, byte SIZEOF_MMWORD ; inptr2 + jmp near .columnloop + alignx 16, 7 + +.column_st16: + cmp ecx, byte SIZEOF_MMWORD/2 + jb short .column_st8 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq MMWORD [edi+1*SIZEOF_MMWORD], mmD + movq mmA, mmC + movq mmD, mmH + sub ecx, byte SIZEOF_MMWORD/2 + add edi, byte 2*SIZEOF_MMWORD +.column_st8: + cmp ecx, byte SIZEOF_MMWORD/4 + jb short .column_st4 + movq MMWORD [edi+0*SIZEOF_MMWORD], mmA + movq mmA, mmD + sub ecx, byte SIZEOF_MMWORD/4 + add edi, byte 1*SIZEOF_MMWORD +.column_st4: + cmp ecx, byte SIZEOF_MMWORD/8 + jb short .endcolumn + movd DWORD [edi+0*SIZEOF_DWORD], mmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.endcolumn: + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v2_merged_upsample_mmx(JDIMENSION output_width, JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define output_width(b) (b) + 8 ; JDIMENSION output_width +%define input_buf(b) (b) + 12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b) + 16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b) + 20 ; JSAMPARRAY output_buf + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_mmx) + +EXTN(jsimd_h2v2_merged_upsample_mmx): + push ebp + mov ebp, esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov eax, JDIMENSION [output_width(ebp)] + + mov edi, JSAMPIMAGE [input_buf(ebp)] + mov ecx, JDIMENSION [in_row_group_ctr(ebp)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(ebp)] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + + push edx ; inptr2 + push ebx ; inptr1 + push esi ; inptr00 + mov ebx, esp + + push edi ; output_buf (outptr0) + push ecx ; in_row_group_ctr + push ebx ; input_buf + push eax ; output_width + + call near EXTN(jsimd_h2v1_merged_upsample_mmx) + + add esi, byte SIZEOF_JSAMPROW ; inptr01 + add edi, byte SIZEOF_JSAMPROW ; outptr1 + mov POINTER [ebx+0*SIZEOF_POINTER], esi + mov POINTER [ebx-1*SIZEOF_POINTER], edi + + call near EXTN(jsimd_h2v1_merged_upsample_mmx) + + add esp, byte 7*SIZEOF_DWORD + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jdmrgext-sse2.asm b/simd/i386/jdmrgext-sse2.asm new file mode 100644 index 0000000..ac4697e --- /dev/null +++ b/simd/i386/jdmrgext-sse2.asm @@ -0,0 +1,519 @@ +; +; jdmrgext.asm - merged upsampling/color conversion (SSE2) +; +; Copyright 2009, 2012 Pierre Ossman for Cendio AB +; Copyright (C) 2012, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v1_merged_upsample_sse2(JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define output_width(b) (b) + 8 ; JDIMENSION output_width +%define input_buf(b) (b) + 12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b) + 16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b) + 20 ; JSAMPARRAY output_buf + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 3 +%define gotptr wk(0) - SIZEOF_POINTER ; void * gotptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2) + +EXTN(jsimd_h2v1_merged_upsample_sse2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov ecx, JDIMENSION [output_width(eax)] ; col + test ecx, ecx + jz near .return + + push ecx + + mov edi, JSAMPIMAGE [input_buf(eax)] + mov ecx, JDIMENSION [in_row_group_ctr(eax)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(eax)] + mov esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW] ; inptr0 + mov ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW] ; inptr1 + mov edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW] ; inptr2 + mov edi, JSAMPROW [edi] ; outptr + + pop ecx ; col + + alignx 16, 7 +.columnloop: + movpic eax, POINTER [gotptr] ; load GOT address (eax) + + movdqa xmm6, XMMWORD [ebx] ; xmm6=Cb(0123456789ABCDEF) + movdqa xmm7, XMMWORD [edx] ; xmm7=Cr(0123456789ABCDEF) + + pxor xmm1, xmm1 ; xmm1=(all 0's) + pcmpeqw xmm3, xmm3 + psllw xmm3, 7 ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + movdqa xmm4, xmm6 + punpckhbw xmm6, xmm1 ; xmm6=Cb(89ABCDEF)=CbH + punpcklbw xmm4, xmm1 ; xmm4=Cb(01234567)=CbL + movdqa xmm0, xmm7 + punpckhbw xmm7, xmm1 ; xmm7=Cr(89ABCDEF)=CrH + punpcklbw xmm0, xmm1 ; xmm0=Cr(01234567)=CrL + + paddw xmm6, xmm3 + paddw xmm4, xmm3 + paddw xmm7, xmm3 + paddw xmm0, xmm3 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movdqa xmm5, xmm6 ; xmm5=CbH + movdqa xmm2, xmm4 ; xmm2=CbL + paddw xmm6, xmm6 ; xmm6=2*CbH + paddw xmm4, xmm4 ; xmm4=2*CbL + movdqa xmm1, xmm7 ; xmm1=CrH + movdqa xmm3, xmm0 ; xmm3=CrL + paddw xmm7, xmm7 ; xmm7=2*CrH + paddw xmm0, xmm0 ; xmm0=2*CrL + + pmulhw xmm6, [GOTOFF(eax,PW_MF0228)] ; xmm6=(2*CbH * -FIX(0.22800)) + pmulhw xmm4, [GOTOFF(eax,PW_MF0228)] ; xmm4=(2*CbL * -FIX(0.22800)) + pmulhw xmm7, [GOTOFF(eax,PW_F0402)] ; xmm7=(2*CrH * FIX(0.40200)) + pmulhw xmm0, [GOTOFF(eax,PW_F0402)] ; xmm0=(2*CrL * FIX(0.40200)) + + paddw xmm6, [GOTOFF(eax,PW_ONE)] + paddw xmm4, [GOTOFF(eax,PW_ONE)] + psraw xmm6, 1 ; xmm6=(CbH * -FIX(0.22800)) + psraw xmm4, 1 ; xmm4=(CbL * -FIX(0.22800)) + paddw xmm7, [GOTOFF(eax,PW_ONE)] + paddw xmm0, [GOTOFF(eax,PW_ONE)] + psraw xmm7, 1 ; xmm7=(CrH * FIX(0.40200)) + psraw xmm0, 1 ; xmm0=(CrL * FIX(0.40200)) + + paddw xmm6, xmm5 + paddw xmm4, xmm2 + paddw xmm6, xmm5 ; xmm6=(CbH * FIX(1.77200))=(B-Y)H + paddw xmm4, xmm2 ; xmm4=(CbL * FIX(1.77200))=(B-Y)L + paddw xmm7, xmm1 ; xmm7=(CrH * FIX(1.40200))=(R-Y)H + paddw xmm0, xmm3 ; xmm0=(CrL * FIX(1.40200))=(R-Y)L + + movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=(B-Y)H + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(R-Y)H + + movdqa xmm6, xmm5 + movdqa xmm7, xmm2 + punpcklwd xmm5, xmm1 + punpckhwd xmm6, xmm1 + pmaddwd xmm5, [GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm6, [GOTOFF(eax,PW_MF0344_F0285)] + punpcklwd xmm2, xmm3 + punpckhwd xmm7, xmm3 + pmaddwd xmm2, [GOTOFF(eax,PW_MF0344_F0285)] + pmaddwd xmm7, [GOTOFF(eax,PW_MF0344_F0285)] + + paddd xmm5, [GOTOFF(eax,PD_ONEHALF)] + paddd xmm6, [GOTOFF(eax,PD_ONEHALF)] + psrad xmm5, SCALEBITS + psrad xmm6, SCALEBITS + paddd xmm2, [GOTOFF(eax,PD_ONEHALF)] + paddd xmm7, [GOTOFF(eax,PD_ONEHALF)] + psrad xmm2, SCALEBITS + psrad xmm7, SCALEBITS + + packssdw xmm5, xmm6 ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285) + packssdw xmm2, xmm7 ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285) + psubw xmm5, xmm1 ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H + psubw xmm2, xmm3 ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L + + movdqa XMMWORD [wk(2)], xmm5 ; wk(2)=(G-Y)H + + mov al, 2 ; Yctr + jmp short .Yloop_1st + alignx 16, 7 + +.Yloop_2nd: + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(R-Y)H + movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(G-Y)H + movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(B-Y)H + alignx 16, 7 + +.Yloop_1st: + movdqa xmm7, XMMWORD [esi] ; xmm7=Y(0123456789ABCDEF) + + pcmpeqw xmm6, xmm6 + psrlw xmm6, BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + pand xmm6, xmm7 ; xmm6=Y(02468ACE)=YE + psrlw xmm7, BYTE_BIT ; xmm7=Y(13579BDF)=YO + + movdqa xmm1, xmm0 ; xmm1=xmm0=(R-Y)(L/H) + movdqa xmm3, xmm2 ; xmm3=xmm2=(G-Y)(L/H) + movdqa xmm5, xmm4 ; xmm5=xmm4=(B-Y)(L/H) + + paddw xmm0, xmm6 ; xmm0=((R-Y)+YE)=RE=R(02468ACE) + paddw xmm1, xmm7 ; xmm1=((R-Y)+YO)=RO=R(13579BDF) + packuswb xmm0, xmm0 ; xmm0=R(02468ACE********) + packuswb xmm1, xmm1 ; xmm1=R(13579BDF********) + + paddw xmm2, xmm6 ; xmm2=((G-Y)+YE)=GE=G(02468ACE) + paddw xmm3, xmm7 ; xmm3=((G-Y)+YO)=GO=G(13579BDF) + packuswb xmm2, xmm2 ; xmm2=G(02468ACE********) + packuswb xmm3, xmm3 ; xmm3=G(13579BDF********) + + paddw xmm4, xmm6 ; xmm4=((B-Y)+YE)=BE=B(02468ACE) + paddw xmm5, xmm7 ; xmm5=((B-Y)+YO)=BO=B(13579BDF) + packuswb xmm4, xmm4 ; xmm4=B(02468ACE********) + packuswb xmm5, xmm5 ; xmm5=B(13579BDF********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) + + punpcklbw xmmA, xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE, xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) + punpcklbw xmmD, xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) + + movdqa xmmG, xmmA + movdqa xmmH, xmmA + punpcklwd xmmA, xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) + punpckhwd xmmG, xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) + + psrldq xmmH, 2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) + psrldq xmmE, 2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) + + movdqa xmmC, xmmD + movdqa xmmB, xmmD + punpcklwd xmmD, xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) + punpckhwd xmmC, xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) + + psrldq xmmB, 2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) + + movdqa xmmF, xmmE + punpcklwd xmmE, xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) + punpckhwd xmmF, xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) + + pshufd xmmH, xmmA, 0x4E ; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) + movdqa xmmB, xmmE + punpckldq xmmA, xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) + punpckldq xmmE, xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) + punpckhdq xmmD, xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) + + pshufd xmmH, xmmG, 0x4E ; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) + movdqa xmmB, xmmF + punpckldq xmmG, xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) + punpckldq xmmF, xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) + punpckhdq xmmC, xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) + + punpcklqdq xmmA, xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + punpcklqdq xmmD, xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + punpcklqdq xmmF, xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF +.out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub ecx, byte SIZEOF_XMMWORD + jz near .endcolumn + + add esi, byte SIZEOF_XMMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16, 7 + +.column_st32: + lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE + cmp ecx, byte 2*SIZEOF_XMMWORD + jb short .column_st16 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + add edi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA, xmmF + sub ecx, byte 2*SIZEOF_XMMWORD + jmp short .column_st15 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st15 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA, xmmD + sub ecx, byte SIZEOF_XMMWORD +.column_st15: + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st7 + movq XMM_MMWORD [edi], xmmA + add edi, byte SIZEOF_MMWORD + sub ecx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_DWORD + jb short .column_st3 + movd XMM_DWORD [edi], xmmA + add edi, byte SIZEOF_DWORD + sub ecx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of eax to the output when it has enough + ; space. + movd eax, xmmA + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi], ax + add edi, byte SIZEOF_WORD + sub ecx, byte SIZEOF_WORD + shr eax, 16 +.column_st1: + ; Store the lower 1 byte of eax to the output when it has enough + ; space. + test ecx, ecx + jz short .endcolumn + mov BYTE [edi], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb xmm6, xmm6 ; xmm6=XE=X(02468ACE********) + pcmpeqb xmm7, xmm7 ; xmm7=XO=X(13579BDF********) +%else + pxor xmm6, xmm6 ; xmm6=XE=X(02468ACE********) + pxor xmm7, xmm7 ; xmm7=XO=X(13579BDF********) +%endif + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) + + punpcklbw xmmA, xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE, xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) + punpcklbw xmmB, xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) + punpcklbw xmmF, xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) + + movdqa xmmC, xmmA + punpcklwd xmmA, xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) + punpckhwd xmmC, xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) + movdqa xmmG, xmmB + punpcklwd xmmB, xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) + punpckhwd xmmG, xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) + + movdqa xmmD, xmmA + punpckldq xmmA, xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + punpckhdq xmmD, xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + movdqa xmmH, xmmC + punpckldq xmmC, xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + punpckhdq xmmH, xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + cmp ecx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test edi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC + movdqu XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH +.out0: + add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub ecx, byte SIZEOF_XMMWORD + jz near .endcolumn + + add esi, byte SIZEOF_XMMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add ebx, byte SIZEOF_XMMWORD ; inptr1 + add edx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + alignx 16, 7 + +.column_st32: + cmp ecx, byte SIZEOF_XMMWORD/2 + jb short .column_st16 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD + add edi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA, xmmC + movdqa xmmD, xmmH + sub ecx, byte SIZEOF_XMMWORD/2 +.column_st16: + cmp ecx, byte SIZEOF_XMMWORD/4 + jb short .column_st15 + movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA + add edi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA, xmmD + sub ecx, byte SIZEOF_XMMWORD/4 +.column_st15: + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq XMM_MMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD/8*4 + sub ecx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test ecx, ecx + jz short .endcolumn + movd XMM_DWORD [edi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.endcolumn: + sfence ; flush the write buffer + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v2_merged_upsample_sse2(JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +%define output_width(b) (b) + 8 ; JDIMENSION output_width +%define input_buf(b) (b) + 12 ; JSAMPIMAGE input_buf +%define in_row_group_ctr(b) (b) + 16 ; JDIMENSION in_row_group_ctr +%define output_buf(b) (b) + 20 ; JSAMPARRAY output_buf + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2) + +EXTN(jsimd_h2v2_merged_upsample_sse2): + push ebp + mov ebp, esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov eax, POINTER [output_width(ebp)] + + mov edi, JSAMPIMAGE [input_buf(ebp)] + mov ecx, JDIMENSION [in_row_group_ctr(ebp)] + mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] + mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] + mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] + mov edi, JSAMPARRAY [output_buf(ebp)] + lea esi, [esi+ecx*SIZEOF_JSAMPROW] + + push edx ; inptr2 + push ebx ; inptr1 + push esi ; inptr00 + mov ebx, esp + + push edi ; output_buf (outptr0) + push ecx ; in_row_group_ctr + push ebx ; input_buf + push eax ; output_width + + call near EXTN(jsimd_h2v1_merged_upsample_sse2) + + add esi, byte SIZEOF_JSAMPROW ; inptr01 + add edi, byte SIZEOF_JSAMPROW ; outptr1 + mov POINTER [ebx+0*SIZEOF_POINTER], esi + mov POINTER [ebx-1*SIZEOF_POINTER], edi + + call near EXTN(jsimd_h2v1_merged_upsample_sse2) + + add esp, byte 7*SIZEOF_DWORD + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jdsample-avx2.asm b/simd/i386/jdsample-avx2.asm new file mode 100644 index 0000000..61ce511 --- /dev/null +++ b/simd/i386/jdsample-avx2.asm @@ -0,0 +1,762 @@ +; +; jdsample.asm - upsampling (AVX2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2015, Intel Corporation. +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_fancy_upsample_avx2) + +EXTN(jconst_fancy_upsample_avx2): + +PW_ONE times 16 dw 1 +PW_TWO times 16 dw 2 +PW_THREE times 16 dw 3 +PW_SEVEN times 16 dw 7 +PW_EIGHT times 16 dw 8 + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. +; +; The upsampling algorithm is linear interpolation between pixel centers, +; also known as a "triangle filter". This is a good compromise between +; speed and visual quality. The centers of the output pixels are 1/4 and 3/4 +; of the way between input pixel centers. +; +; GLOBAL(void) +; jsimd_h2v1_fancy_upsample_avx2(int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b) + 8 ; int max_v_samp_factor +%define downsamp_width(b) (b) + 12 ; JDIMENSION downsampled_width +%define input_data(b) (b) + 16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2) + +EXTN(jsimd_h2v1_fancy_upsample_avx2): + push ebp + mov ebp, esp + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr + test eax, eax + jz near .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx, ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16, 7 +.rowloop: + push eax ; colctr + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + + test eax, SIZEOF_YMMWORD-1 + jz short .skip + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample +.skip: + vpxor ymm0, ymm0, ymm0 ; ymm0=(all 0's) + vpcmpeqb xmm7, xmm7, xmm7 + vpsrldq xmm7, xmm7, (SIZEOF_XMMWORD-1) ; (ff -- -- -- ... -- --) LSB is ff + vpand ymm7, ymm7, YMMWORD [esi+0*SIZEOF_YMMWORD] + + add eax, byte SIZEOF_YMMWORD-1 + and eax, byte -SIZEOF_YMMWORD + cmp eax, byte SIZEOF_YMMWORD + ja short .columnloop + alignx 16, 7 + +.columnloop_last: + vpcmpeqb xmm6, xmm6, xmm6 + vpslldq xmm6, xmm6, (SIZEOF_XMMWORD-1) + vperm2i128 ymm6, ymm6, ymm6, 1 ; (---- ---- ... ---- ---- ff) MSB is ff + vpand ymm6, ymm6, YMMWORD [esi+0*SIZEOF_YMMWORD] + jmp short .upsample + alignx 16, 7 + +.columnloop: + vmovdqu ymm6, YMMWORD [esi+1*SIZEOF_YMMWORD] + vperm2i128 ymm6, ymm0, ymm6, 0x20 + vpslldq ymm6, ymm6, 15 + +.upsample: + vmovdqu ymm1, YMMWORD [esi+0*SIZEOF_YMMWORD] ; ymm1=( 0 1 2 ... 29 30 31) + + vperm2i128 ymm2, ymm0, ymm1, 0x20 + vpalignr ymm2, ymm1, ymm2, 15 ; ymm2=(-- 0 1 ... 28 29 30) + vperm2i128 ymm4, ymm0, ymm1, 0x03 + vpalignr ymm3, ymm4, ymm1, 1 ; ymm3=( 1 2 3 ... 30 31 --) + + vpor ymm2, ymm2, ymm7 ; ymm2=(-1 0 1 ... 28 29 30) + vpor ymm3, ymm3, ymm6 ; ymm3=( 1 2 3 ... 30 31 32) + + vpsrldq ymm7, ymm4, (SIZEOF_XMMWORD-1) ; ymm7=(31 -- -- ... -- -- --) + + vpunpckhbw ymm4, ymm1, ymm0 ; ymm4=( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31) + vpunpcklbw ymm5, ymm1, ymm0 ; ymm5=( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23) + vperm2i128 ymm1, ymm5, ymm4, 0x20 ; ymm1=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vperm2i128 ymm4, ymm5, ymm4, 0x31 ; ymm4=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vpunpckhbw ymm5, ymm2, ymm0 ; ymm5=( 7 8 9 10 11 12 13 14 23 24 25 26 27 28 29 30) + vpunpcklbw ymm6, ymm2, ymm0 ; ymm6=(-1 0 1 2 3 4 5 6 15 16 17 18 19 20 21 22) + vperm2i128 ymm2, ymm6, ymm5, 0x20 ; ymm2=(-1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14) + vperm2i128 ymm5, ymm6, ymm5, 0x31 ; ymm5=(15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30) + + vpunpckhbw ymm6, ymm3, ymm0 ; ymm6=( 1 2 3 4 5 6 7 8 17 18 19 20 21 22 23 24) + vpunpcklbw ymm0, ymm3, ymm0 ; ymm0=( 9 10 11 12 13 14 15 16 25 26 27 28 29 30 31 32) + vperm2i128 ymm3, ymm0, ymm6, 0x20 ; ymm3=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16) + vperm2i128 ymm6, ymm0, ymm6, 0x31 ; ymm6=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32) + + vpxor ymm0, ymm0, ymm0 ; ymm0=(all 0's) + + vpmullw ymm1, ymm1, [GOTOFF(ebx,PW_THREE)] + vpmullw ymm4, ymm4, [GOTOFF(ebx,PW_THREE)] + vpaddw ymm2, ymm2, [GOTOFF(ebx,PW_ONE)] + vpaddw ymm5, ymm5, [GOTOFF(ebx,PW_ONE)] + vpaddw ymm3, ymm3, [GOTOFF(ebx,PW_TWO)] + vpaddw ymm6, ymm6, [GOTOFF(ebx,PW_TWO)] + + vpaddw ymm2, ymm2, ymm1 + vpaddw ymm5, ymm5, ymm4 + vpsrlw ymm2, ymm2, 2 ; ymm2=OutLE=( 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30) + vpsrlw ymm5, ymm5, 2 ; ymm5=OutHE=(32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62) + vpaddw ymm3, ymm3, ymm1 + vpaddw ymm6, ymm6, ymm4 + vpsrlw ymm3, ymm3, 2 ; ymm3=OutLO=( 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31) + vpsrlw ymm6, ymm6, 2 ; ymm6=OutHO=(33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63) + + vpsllw ymm3, ymm3, BYTE_BIT + vpsllw ymm6, ymm6, BYTE_BIT + vpor ymm2, ymm2, ymm3 ; ymm2=OutL=( 0 1 2 ... 29 30 31) + vpor ymm5, ymm5, ymm6 ; ymm5=OutH=(32 33 34 ... 61 62 63) + + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm2 + vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm5 + + sub eax, byte SIZEOF_YMMWORD + add esi, byte 1*SIZEOF_YMMWORD ; inptr + add edi, byte 2*SIZEOF_YMMWORD ; outptr + cmp eax, byte SIZEOF_YMMWORD + ja near .columnloop + test eax, eax + jnz near .columnloop_last + + pop esi + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg near .rowloop + +.return: + vzeroupper + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. +; Again a triangle filter; see comments for h2v1 case, above. +; +; GLOBAL(void) +; jsimd_h2v2_fancy_upsample_avx2(int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b) + 8 ; int max_v_samp_factor +%define downsamp_width(b) (b) + 12 ; JDIMENSION downsampled_width +%define input_data(b) (b) + 16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_YMMWORD + ; ymmword wk[WK_NUM] +%define WK_NUM 4 +%define gotptr wk(0) - SIZEOF_POINTER ; void *gotptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2) + +EXTN(jsimd_h2v2_fancy_upsample_avx2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_YMMWORD) ; align to 256 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov edx, eax ; edx = original ebp + mov eax, JDIMENSION [downsamp_width(edx)] ; colctr + test eax, eax + jz near .return + + mov ecx, INT [max_v_samp(edx)] ; rowctr + test ecx, ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(edx)] ; input_data + mov edi, POINTER [output_data_ptr(edx)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16, 7 +.rowloop: + push eax ; colctr + push ecx + push edi + push esi + + mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above) + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1(below) + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + + test eax, SIZEOF_YMMWORD-1 + jz short .skip + push edx + mov dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample + pop edx +.skip: + ; -- process the first column block + + vmovdqu ymm0, YMMWORD [ebx+0*SIZEOF_YMMWORD] ; ymm0=row[ 0][0] + vmovdqu ymm1, YMMWORD [ecx+0*SIZEOF_YMMWORD] ; ymm1=row[-1][0] + vmovdqu ymm2, YMMWORD [esi+0*SIZEOF_YMMWORD] ; ymm2=row[+1][0] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + vpxor ymm3, ymm3, ymm3 ; ymm3=(all 0's) + + vpunpckhbw ymm4, ymm0, ymm3 ; ymm4=row[ 0]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31) + vpunpcklbw ymm5, ymm0, ymm3 ; ymm5=row[ 0]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23) + vperm2i128 ymm0, ymm5, ymm4, 0x20 ; ymm0=row[ 0]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vperm2i128 ymm4, ymm5, ymm4, 0x31 ; ymm4=row[ 0](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vpunpckhbw ymm5, ymm1, ymm3 ; ymm5=row[-1]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31) + vpunpcklbw ymm6, ymm1, ymm3 ; ymm6=row[-1]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23) + vperm2i128 ymm1, ymm6, ymm5, 0x20 ; ymm1=row[-1]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vperm2i128 ymm5, ymm6, ymm5, 0x31 ; ymm5=row[-1](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vpunpckhbw ymm6, ymm2, ymm3 ; ymm6=row[+1]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31) + vpunpcklbw ymm3, ymm2, ymm3 ; ymm3=row[+1]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23) + vperm2i128 ymm2, ymm3, ymm6, 0x20 ; ymm2=row[+1]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vperm2i128 ymm6, ymm3, ymm6, 0x31 ; ymm6=row[+1](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vpmullw ymm0, ymm0, [GOTOFF(ebx,PW_THREE)] + vpmullw ymm4, ymm4, [GOTOFF(ebx,PW_THREE)] + + vpcmpeqb xmm7, xmm7, xmm7 + vpsrldq xmm7, xmm7, (SIZEOF_XMMWORD-2) ; (ffff ---- ---- ... ---- ----) LSB is ffff + + vpaddw ymm1, ymm1, ymm0 ; ymm1=Int0L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vpaddw ymm5, ymm5, ymm4 ; ymm5=Int0H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + vpaddw ymm2, ymm2, ymm0 ; ymm2=Int1L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vpaddw ymm6, ymm6, ymm4 ; ymm6=Int1H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vmovdqu YMMWORD [edx+0*SIZEOF_YMMWORD], ymm1 ; temporarily save + vmovdqu YMMWORD [edx+1*SIZEOF_YMMWORD], ymm5 ; the intermediate data + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm2 + vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm6 + + vpand ymm1, ymm1, ymm7 ; ymm1=( 0 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --) + vpand ymm2, ymm2, ymm7 ; ymm2=( 0 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --) + + vmovdqa YMMWORD [wk(0)], ymm1 + vmovdqa YMMWORD [wk(1)], ymm2 + + poppic ebx + + add eax, byte SIZEOF_YMMWORD-1 + and eax, byte -SIZEOF_YMMWORD + cmp eax, byte SIZEOF_YMMWORD + ja short .columnloop + alignx 16, 7 + +.columnloop_last: + ; -- process the last column block + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + vpcmpeqb xmm1, xmm1, xmm1 + vpslldq xmm1, xmm1, (SIZEOF_XMMWORD-2) + vperm2i128 ymm1, ymm1, ymm1, 1 ; (---- ---- ... ---- ---- ffff) MSB is ffff + + vpand ymm2, ymm1, YMMWORD [edi+1*SIZEOF_YMMWORD] + vpand ymm1, ymm1, YMMWORD [edx+1*SIZEOF_YMMWORD] + + vmovdqa YMMWORD [wk(2)], ymm1 ; ymm1=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 31) + vmovdqa YMMWORD [wk(3)], ymm2 ; ymm2=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 31) + + jmp near .upsample + alignx 16, 7 + +.columnloop: + ; -- process the next column block + + vmovdqu ymm0, YMMWORD [ebx+1*SIZEOF_YMMWORD] ; ymm0=row[ 0][1] + vmovdqu ymm1, YMMWORD [ecx+1*SIZEOF_YMMWORD] ; ymm1=row[-1][1] + vmovdqu ymm2, YMMWORD [esi+1*SIZEOF_YMMWORD] ; ymm2=row[+1][1] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + vpxor ymm3, ymm3, ymm3 ; ymm3=(all 0's) + + vpunpckhbw ymm4, ymm0, ymm3 ; ymm4=row[ 0]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31) + vpunpcklbw ymm5, ymm0, ymm3 ; ymm5=row[ 0]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23) + vperm2i128 ymm0, ymm5, ymm4, 0x20 ; ymm0=row[ 0]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vperm2i128 ymm4, ymm5, ymm4, 0x31 ; ymm4=row[ 0](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vpunpckhbw ymm5, ymm1, ymm3 ; ymm5=row[-1]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31) + vpunpcklbw ymm6, ymm1, ymm3 ; ymm6=row[-1]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23) + vperm2i128 ymm1, ymm6, ymm5, 0x20 ; ymm1=row[-1]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vperm2i128 ymm5, ymm6, ymm5, 0x31 ; ymm5=row[-1](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vpunpckhbw ymm6, ymm2, ymm3 ; ymm6=row[+1]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31) + vpunpcklbw ymm7, ymm2, ymm3 ; ymm7=row[+1]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23) + vperm2i128 ymm2, ymm7, ymm6, 0x20 ; ymm2=row[+1]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vperm2i128 ymm6, ymm7, ymm6, 0x31 ; ymm6=row[+1](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vpmullw ymm0, ymm0, [GOTOFF(ebx,PW_THREE)] + vpmullw ymm4, ymm4, [GOTOFF(ebx,PW_THREE)] + + vpaddw ymm1, ymm1, ymm0 ; ymm1=Int0L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vpaddw ymm5, ymm5, ymm4 ; ymm5=Int0H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + vpaddw ymm2, ymm2, ymm0 ; ymm2=Int1L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vpaddw ymm6, ymm6, ymm4 ; ymm6=Int1H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vmovdqu YMMWORD [edx+2*SIZEOF_YMMWORD], ymm1 ; temporarily save + vmovdqu YMMWORD [edx+3*SIZEOF_YMMWORD], ymm5 ; the intermediate data + vmovdqu YMMWORD [edi+2*SIZEOF_YMMWORD], ymm2 + vmovdqu YMMWORD [edi+3*SIZEOF_YMMWORD], ymm6 + + vperm2i128 ymm1, ymm3, ymm1, 0x20 + vpslldq ymm1, ymm1, 14 ; ymm1=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 0) + vperm2i128 ymm2, ymm3, ymm2, 0x20 + vpslldq ymm2, ymm2, 14 ; ymm2=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 0) + + vmovdqa YMMWORD [wk(2)], ymm1 + vmovdqa YMMWORD [wk(3)], ymm2 + +.upsample: + ; -- process the upper row + + vmovdqu ymm7, YMMWORD [edx+0*SIZEOF_YMMWORD] ; ymm7=Int0L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vmovdqu ymm3, YMMWORD [edx+1*SIZEOF_YMMWORD] ; ymm3=Int0H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vpxor ymm1, ymm1, ymm1 ; ymm1=(all 0's) + + vperm2i128 ymm0, ymm1, ymm7, 0x03 + vpalignr ymm0, ymm0, ymm7, 2 ; ymm0=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 --) + vperm2i128 ymm4, ymm1, ymm3, 0x20 + vpslldq ymm4, ymm4, 14 ; ymm4=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 16) + + vperm2i128 ymm5, ymm1, ymm7, 0x03 + vpsrldq ymm5, ymm5, 14 ; ymm5=(15 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --) + vperm2i128 ymm6, ymm1, ymm3, 0x20 + vpalignr ymm6, ymm3, ymm6, 14 ; ymm6=(-- 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30) + + vpor ymm0, ymm0, ymm4 ; ymm0=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16) + vpor ymm5, ymm5, ymm6 ; ymm5=(15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30) + + vperm2i128 ymm2, ymm1, ymm3, 0x03 + vpalignr ymm2, ymm2, ymm3, 2 ; ymm2=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 --) + vperm2i128 ymm4, ymm1, ymm3, 0x03 + vpsrldq ymm4, ymm4, 14 ; ymm4=(31 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --) + vperm2i128 ymm1, ymm1, ymm7, 0x20 + vpalignr ymm1, ymm7, ymm1, 14 ; ymm1=(-- 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14) + + vpor ymm1, ymm1, YMMWORD [wk(0)] ; ymm1=(-1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14) + vpor ymm2, ymm2, YMMWORD [wk(2)] ; ymm2=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32) + + vmovdqa YMMWORD [wk(0)], ymm4 + + vpmullw ymm7, ymm7, [GOTOFF(ebx,PW_THREE)] + vpmullw ymm3, ymm3, [GOTOFF(ebx,PW_THREE)] + vpaddw ymm1, ymm1, [GOTOFF(ebx,PW_EIGHT)] + vpaddw ymm5, ymm5, [GOTOFF(ebx,PW_EIGHT)] + vpaddw ymm0, ymm0, [GOTOFF(ebx,PW_SEVEN)] + vpaddw ymm2, [GOTOFF(ebx,PW_SEVEN)] + + vpaddw ymm1, ymm1, ymm7 + vpaddw ymm5, ymm5, ymm3 + vpsrlw ymm1, ymm1, 4 ; ymm1=Out0LE=( 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30) + vpsrlw ymm5, ymm5, 4 ; ymm5=Out0HE=(32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62) + vpaddw ymm0, ymm0, ymm7 + vpaddw ymm2, ymm2, ymm3 + vpsrlw ymm0, ymm0, 4 ; ymm0=Out0LO=( 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31) + vpsrlw ymm2, ymm2, 4 ; ymm2=Out0HO=(33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63) + + vpsllw ymm0, ymm0, BYTE_BIT + vpsllw ymm2, ymm2, BYTE_BIT + vpor ymm1, ymm1, ymm0 ; ymm1=Out0L=( 0 1 2 ... 29 30 31) + vpor ymm5, ymm5, ymm2 ; ymm5=Out0H=(32 33 34 ... 61 62 63) + + vmovdqu YMMWORD [edx+0*SIZEOF_YMMWORD], ymm1 + vmovdqu YMMWORD [edx+1*SIZEOF_YMMWORD], ymm5 + + ; -- process the lower row + + vmovdqu ymm6, YMMWORD [edi+0*SIZEOF_YMMWORD] ; ymm6=Int1L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vmovdqu ymm4, YMMWORD [edi+1*SIZEOF_YMMWORD] ; ymm4=Int1H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vpxor ymm1, ymm1, ymm1 ; ymm1=(all 0's) + + vperm2i128 ymm7, ymm1, ymm6, 0x03 + vpalignr ymm7, ymm7, ymm6, 2 ; ymm7=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 --) + vperm2i128 ymm3, ymm1, ymm4, 0x20 + vpslldq ymm3, ymm3, 14 ; ymm3=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 16) + + vperm2i128 ymm0, ymm1, ymm6, 0x03 + vpsrldq ymm0, ymm0, 14 ; ymm0=(15 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --) + vperm2i128 ymm2, ymm1, ymm4, 0x20 + vpalignr ymm2, ymm4, ymm2, 14 ; ymm2=(-- 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30) + + vpor ymm7, ymm7, ymm3 ; ymm7=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16) + vpor ymm0, ymm0, ymm2 ; ymm0=(15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30) + + vperm2i128 ymm5, ymm1, ymm4, 0x03 + vpalignr ymm5, ymm5, ymm4, 2 ; ymm5=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 --) + vperm2i128 ymm3, ymm1, ymm4, 0x03 + vpsrldq ymm3, ymm3, 14 ; ymm3=(31 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --) + vperm2i128 ymm1, ymm1, ymm6, 0x20 + vpalignr ymm1, ymm6, ymm1, 14 ; ymm1=(-- 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14) + + vpor ymm1, ymm1, YMMWORD [wk(1)] ; ymm1=(-1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14) + vpor ymm5, ymm5, YMMWORD [wk(3)] ; ymm5=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32) + + vmovdqa YMMWORD [wk(1)], ymm3 + + vpmullw ymm6, ymm6, [GOTOFF(ebx,PW_THREE)] + vpmullw ymm4, ymm4, [GOTOFF(ebx,PW_THREE)] + vpaddw ymm1, ymm1, [GOTOFF(ebx,PW_EIGHT)] + vpaddw ymm0, ymm0, [GOTOFF(ebx,PW_EIGHT)] + vpaddw ymm7, ymm7, [GOTOFF(ebx,PW_SEVEN)] + vpaddw ymm5, ymm5, [GOTOFF(ebx,PW_SEVEN)] + + vpaddw ymm1, ymm1, ymm6 + vpaddw ymm0, ymm0, ymm4 + vpsrlw ymm1, ymm1, 4 ; ymm1=Out1LE=( 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30) + vpsrlw ymm0, ymm0, 4 ; ymm0=Out1HE=(32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62) + vpaddw ymm7, ymm7, ymm6 + vpaddw ymm5, ymm5, ymm4 + vpsrlw ymm7, ymm7, 4 ; ymm7=Out1LO=( 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31) + vpsrlw ymm5, ymm5, 4 ; ymm5=Out1HO=(33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63) + + vpsllw ymm7, ymm7, BYTE_BIT + vpsllw ymm5, ymm5, BYTE_BIT + vpor ymm1, ymm1, ymm7 ; ymm1=Out1L=( 0 1 2 ... 29 30 31) + vpor ymm0, ymm0, ymm5 ; ymm0=Out1H=(32 33 34 ... 61 62 63) + + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm1 + vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm0 + + poppic ebx + + sub eax, byte SIZEOF_YMMWORD + add ecx, byte 1*SIZEOF_YMMWORD ; inptr1(above) + add ebx, byte 1*SIZEOF_YMMWORD ; inptr0 + add esi, byte 1*SIZEOF_YMMWORD ; inptr1(below) + add edx, byte 2*SIZEOF_YMMWORD ; outptr0 + add edi, byte 2*SIZEOF_YMMWORD ; outptr1 + cmp eax, byte SIZEOF_YMMWORD + ja near .columnloop + test eax, eax + jnz near .columnloop_last + + pop esi + pop edi + pop ecx + pop eax + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg near .rowloop + +.return: + vzeroupper + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 1:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v1_upsample_avx2(int max_v_samp_factor, JDIMENSION output_width, +; JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b) + 8 ; int max_v_samp_factor +%define output_width(b) (b) + 12 ; JDIMENSION output_width +%define input_data(b) (b) + 16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2) + +EXTN(jsimd_h2v1_upsample_avx2): + push ebp + mov ebp, esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, JDIMENSION [output_width(ebp)] + add edx, byte (SIZEOF_YMMWORD-1) + and edx, -SIZEOF_YMMWORD + jz short .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx, ecx + jz short .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16, 7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + mov eax, edx ; colctr + alignx 16, 7 +.columnloop: + + cmp eax, byte SIZEOF_YMMWORD + ja near .above_16 + + vmovdqu xmm0, XMMWORD [esi+0*SIZEOF_YMMWORD] + vpunpckhbw xmm1, xmm0, xmm0 + vpunpcklbw xmm0, xmm0, xmm0 + + vmovdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + vmovdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1 + + jmp short .nextrow + +.above_16: + vmovdqu ymm0, YMMWORD [esi+0*SIZEOF_YMMWORD] + + vpermq ymm0, ymm0, 0xd8 + vpunpckhbw ymm1, ymm0, ymm0 + vpunpcklbw ymm0, ymm0, ymm0 + + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm0 + vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm1 + + sub eax, byte 2*SIZEOF_YMMWORD + jz short .nextrow + + add esi, byte SIZEOF_YMMWORD ; inptr + add edi, byte 2*SIZEOF_YMMWORD ; outptr + jmp short .columnloop + alignx 16, 7 + +.nextrow: + pop esi + pop edi + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg short .rowloop + +.return: + vzeroupper + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 2:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v2_upsample_avx2(int max_v_samp_factor, JDIMENSION output_width, +; JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b) + 8 ; int max_v_samp_factor +%define output_width(b) (b) + 12 ; JDIMENSION output_width +%define input_data(b) (b) + 16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2) + +EXTN(jsimd_h2v2_upsample_avx2): + push ebp + mov ebp, esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, JDIMENSION [output_width(ebp)] + add edx, byte (SIZEOF_YMMWORD-1) + and edx, -SIZEOF_YMMWORD + jz near .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx, ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16, 7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + mov eax, edx ; colctr + alignx 16, 7 +.columnloop: + + cmp eax, byte SIZEOF_YMMWORD + ja short .above_16 + + vmovdqu xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + vpunpckhbw xmm1, xmm0, xmm0 + vpunpcklbw xmm0, xmm0, xmm0 + + vmovdqu XMMWORD [ebx+0*SIZEOF_XMMWORD], xmm0 + vmovdqu XMMWORD [ebx+1*SIZEOF_XMMWORD], xmm1 + vmovdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + vmovdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1 + + jmp near .nextrow + +.above_16: + vmovdqu ymm0, YMMWORD [esi+0*SIZEOF_YMMWORD] + + vpermq ymm0, ymm0, 0xd8 + vpunpckhbw ymm1, ymm0, ymm0 + vpunpcklbw ymm0, ymm0, ymm0 + + vmovdqu YMMWORD [ebx+0*SIZEOF_YMMWORD], ymm0 + vmovdqu YMMWORD [ebx+1*SIZEOF_YMMWORD], ymm1 + vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm0 + vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm1 + + sub eax, byte 2*SIZEOF_YMMWORD + jz short .nextrow + + add esi, byte SIZEOF_YMMWORD ; inptr + add ebx, 2*SIZEOF_YMMWORD ; outptr0 + add edi, 2*SIZEOF_YMMWORD ; outptr1 + jmp short .columnloop + alignx 16, 7 + +.nextrow: + pop esi + pop edi + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg near .rowloop + +.return: + vzeroupper + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jdsample-mmx.asm b/simd/i386/jdsample-mmx.asm new file mode 100644 index 0000000..1f810fa --- /dev/null +++ b/simd/i386/jdsample-mmx.asm @@ -0,0 +1,733 @@ +; +; jdsample.asm - upsampling (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_fancy_upsample_mmx) + +EXTN(jconst_fancy_upsample_mmx): + +PW_ONE times 4 dw 1 +PW_TWO times 4 dw 2 +PW_THREE times 4 dw 3 +PW_SEVEN times 4 dw 7 +PW_EIGHT times 4 dw 8 + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. +; +; The upsampling algorithm is linear interpolation between pixel centers, +; also known as a "triangle filter". This is a good compromise between +; speed and visual quality. The centers of the output pixels are 1/4 and 3/4 +; of the way between input pixel centers. +; +; GLOBAL(void) +; jsimd_h2v1_fancy_upsample_mmx(int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b) + 8 ; int max_v_samp_factor +%define downsamp_width(b) (b) + 12 ; JDIMENSION downsampled_width +%define input_data(b) (b) + 16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_mmx) + +EXTN(jsimd_h2v1_fancy_upsample_mmx): + push ebp + mov ebp, esp + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr + test eax, eax + jz near .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx, ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16, 7 +.rowloop: + push eax ; colctr + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + + test eax, SIZEOF_MMWORD-1 + jz short .skip + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample +.skip: + pxor mm0, mm0 ; mm0=(all 0's) + pcmpeqb mm7, mm7 + psrlq mm7, (SIZEOF_MMWORD-1)*BYTE_BIT + pand mm7, MMWORD [esi+0*SIZEOF_MMWORD] + + add eax, byte SIZEOF_MMWORD-1 + and eax, byte -SIZEOF_MMWORD + cmp eax, byte SIZEOF_MMWORD + ja short .columnloop + alignx 16, 7 + +.columnloop_last: + pcmpeqb mm6, mm6 + psllq mm6, (SIZEOF_MMWORD-1)*BYTE_BIT + pand mm6, MMWORD [esi+0*SIZEOF_MMWORD] + jmp short .upsample + alignx 16, 7 + +.columnloop: + movq mm6, MMWORD [esi+1*SIZEOF_MMWORD] + psllq mm6, (SIZEOF_MMWORD-1)*BYTE_BIT + +.upsample: + movq mm1, MMWORD [esi+0*SIZEOF_MMWORD] + movq mm2, mm1 + movq mm3, mm1 ; mm1=( 0 1 2 3 4 5 6 7) + psllq mm2, BYTE_BIT ; mm2=( - 0 1 2 3 4 5 6) + psrlq mm3, BYTE_BIT ; mm3=( 1 2 3 4 5 6 7 -) + + por mm2, mm7 ; mm2=(-1 0 1 2 3 4 5 6) + por mm3, mm6 ; mm3=( 1 2 3 4 5 6 7 8) + + movq mm7, mm1 + psrlq mm7, (SIZEOF_MMWORD-1)*BYTE_BIT ; mm7=( 7 - - - - - - -) + + movq mm4, mm1 + punpcklbw mm1, mm0 ; mm1=( 0 1 2 3) + punpckhbw mm4, mm0 ; mm4=( 4 5 6 7) + movq mm5, mm2 + punpcklbw mm2, mm0 ; mm2=(-1 0 1 2) + punpckhbw mm5, mm0 ; mm5=( 3 4 5 6) + movq mm6, mm3 + punpcklbw mm3, mm0 ; mm3=( 1 2 3 4) + punpckhbw mm6, mm0 ; mm6=( 5 6 7 8) + + pmullw mm1, [GOTOFF(ebx,PW_THREE)] + pmullw mm4, [GOTOFF(ebx,PW_THREE)] + paddw mm2, [GOTOFF(ebx,PW_ONE)] + paddw mm5, [GOTOFF(ebx,PW_ONE)] + paddw mm3, [GOTOFF(ebx,PW_TWO)] + paddw mm6, [GOTOFF(ebx,PW_TWO)] + + paddw mm2, mm1 + paddw mm5, mm4 + psrlw mm2, 2 ; mm2=OutLE=( 0 2 4 6) + psrlw mm5, 2 ; mm5=OutHE=( 8 10 12 14) + paddw mm3, mm1 + paddw mm6, mm4 + psrlw mm3, 2 ; mm3=OutLO=( 1 3 5 7) + psrlw mm6, 2 ; mm6=OutHO=( 9 11 13 15) + + psllw mm3, BYTE_BIT + psllw mm6, BYTE_BIT + por mm2, mm3 ; mm2=OutL=( 0 1 2 3 4 5 6 7) + por mm5, mm6 ; mm5=OutH=( 8 9 10 11 12 13 14 15) + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm5 + + sub eax, byte SIZEOF_MMWORD + add esi, byte 1*SIZEOF_MMWORD ; inptr + add edi, byte 2*SIZEOF_MMWORD ; outptr + cmp eax, byte SIZEOF_MMWORD + ja near .columnloop + test eax, eax + jnz near .columnloop_last + + pop esi + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. +; Again a triangle filter; see comments for h2v1 case, above. +; +; GLOBAL(void) +; jsimd_h2v2_fancy_upsample_mmx(int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b) + 8 ; int max_v_samp_factor +%define downsamp_width(b) (b) + 12 ; JDIMENSION downsampled_width +%define input_data(b) (b) + 16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 4 +%define gotptr wk(0) - SIZEOF_POINTER ; void *gotptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_mmx) + +EXTN(jsimd_h2v2_fancy_upsample_mmx): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov edx, eax ; edx = original ebp + mov eax, JDIMENSION [downsamp_width(edx)] ; colctr + test eax, eax + jz near .return + + mov ecx, INT [max_v_samp(edx)] ; rowctr + test ecx, ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(edx)] ; input_data + mov edi, POINTER [output_data_ptr(edx)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16, 7 +.rowloop: + push eax ; colctr + push ecx + push edi + push esi + + mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above) + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1(below) + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + + test eax, SIZEOF_MMWORD-1 + jz short .skip + push edx + mov dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample + pop edx +.skip: + ; -- process the first column block + + movq mm0, MMWORD [ebx+0*SIZEOF_MMWORD] ; mm0=row[ 0][0] + movq mm1, MMWORD [ecx+0*SIZEOF_MMWORD] ; mm1=row[-1][0] + movq mm2, MMWORD [esi+0*SIZEOF_MMWORD] ; mm2=row[+1][0] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pxor mm3, mm3 ; mm3=(all 0's) + movq mm4, mm0 + punpcklbw mm0, mm3 ; mm0=row[ 0][0]( 0 1 2 3) + punpckhbw mm4, mm3 ; mm4=row[ 0][0]( 4 5 6 7) + movq mm5, mm1 + punpcklbw mm1, mm3 ; mm1=row[-1][0]( 0 1 2 3) + punpckhbw mm5, mm3 ; mm5=row[-1][0]( 4 5 6 7) + movq mm6, mm2 + punpcklbw mm2, mm3 ; mm2=row[+1][0]( 0 1 2 3) + punpckhbw mm6, mm3 ; mm6=row[+1][0]( 4 5 6 7) + + pmullw mm0, [GOTOFF(ebx,PW_THREE)] + pmullw mm4, [GOTOFF(ebx,PW_THREE)] + + pcmpeqb mm7, mm7 + psrlq mm7, (SIZEOF_MMWORD-2)*BYTE_BIT + + paddw mm1, mm0 ; mm1=Int0L=( 0 1 2 3) + paddw mm5, mm4 ; mm5=Int0H=( 4 5 6 7) + paddw mm2, mm0 ; mm2=Int1L=( 0 1 2 3) + paddw mm6, mm4 ; mm6=Int1H=( 4 5 6 7) + + movq MMWORD [edx+0*SIZEOF_MMWORD], mm1 ; temporarily save + movq MMWORD [edx+1*SIZEOF_MMWORD], mm5 ; the intermediate data + movq MMWORD [edi+0*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm6 + + pand mm1, mm7 ; mm1=( 0 - - -) + pand mm2, mm7 ; mm2=( 0 - - -) + + movq MMWORD [wk(0)], mm1 + movq MMWORD [wk(1)], mm2 + + poppic ebx + + add eax, byte SIZEOF_MMWORD-1 + and eax, byte -SIZEOF_MMWORD + cmp eax, byte SIZEOF_MMWORD + ja short .columnloop + alignx 16, 7 + +.columnloop_last: + ; -- process the last column block + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pcmpeqb mm1, mm1 + psllq mm1, (SIZEOF_MMWORD-2)*BYTE_BIT + movq mm2, mm1 + + pand mm1, MMWORD [edx+1*SIZEOF_MMWORD] ; mm1=( - - - 7) + pand mm2, MMWORD [edi+1*SIZEOF_MMWORD] ; mm2=( - - - 7) + + movq MMWORD [wk(2)], mm1 + movq MMWORD [wk(3)], mm2 + + jmp short .upsample + alignx 16, 7 + +.columnloop: + ; -- process the next column block + + movq mm0, MMWORD [ebx+1*SIZEOF_MMWORD] ; mm0=row[ 0][1] + movq mm1, MMWORD [ecx+1*SIZEOF_MMWORD] ; mm1=row[-1][1] + movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] ; mm2=row[+1][1] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pxor mm3, mm3 ; mm3=(all 0's) + movq mm4, mm0 + punpcklbw mm0, mm3 ; mm0=row[ 0][1]( 0 1 2 3) + punpckhbw mm4, mm3 ; mm4=row[ 0][1]( 4 5 6 7) + movq mm5, mm1 + punpcklbw mm1, mm3 ; mm1=row[-1][1]( 0 1 2 3) + punpckhbw mm5, mm3 ; mm5=row[-1][1]( 4 5 6 7) + movq mm6, mm2 + punpcklbw mm2, mm3 ; mm2=row[+1][1]( 0 1 2 3) + punpckhbw mm6, mm3 ; mm6=row[+1][1]( 4 5 6 7) + + pmullw mm0, [GOTOFF(ebx,PW_THREE)] + pmullw mm4, [GOTOFF(ebx,PW_THREE)] + + paddw mm1, mm0 ; mm1=Int0L=( 0 1 2 3) + paddw mm5, mm4 ; mm5=Int0H=( 4 5 6 7) + paddw mm2, mm0 ; mm2=Int1L=( 0 1 2 3) + paddw mm6, mm4 ; mm6=Int1H=( 4 5 6 7) + + movq MMWORD [edx+2*SIZEOF_MMWORD], mm1 ; temporarily save + movq MMWORD [edx+3*SIZEOF_MMWORD], mm5 ; the intermediate data + movq MMWORD [edi+2*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+3*SIZEOF_MMWORD], mm6 + + psllq mm1, (SIZEOF_MMWORD-2)*BYTE_BIT ; mm1=( - - - 0) + psllq mm2, (SIZEOF_MMWORD-2)*BYTE_BIT ; mm2=( - - - 0) + + movq MMWORD [wk(2)], mm1 + movq MMWORD [wk(3)], mm2 + +.upsample: + ; -- process the upper row + + movq mm7, MMWORD [edx+0*SIZEOF_MMWORD] ; mm7=Int0L=( 0 1 2 3) + movq mm3, MMWORD [edx+1*SIZEOF_MMWORD] ; mm3=Int0H=( 4 5 6 7) + + movq mm0, mm7 + movq mm4, mm3 + psrlq mm0, 2*BYTE_BIT ; mm0=( 1 2 3 -) + psllq mm4, (SIZEOF_MMWORD-2)*BYTE_BIT ; mm4=( - - - 4) + movq mm5, mm7 + movq mm6, mm3 + psrlq mm5, (SIZEOF_MMWORD-2)*BYTE_BIT ; mm5=( 3 - - -) + psllq mm6, 2*BYTE_BIT ; mm6=( - 4 5 6) + + por mm0, mm4 ; mm0=( 1 2 3 4) + por mm5, mm6 ; mm5=( 3 4 5 6) + + movq mm1, mm7 + movq mm2, mm3 + psllq mm1, 2*BYTE_BIT ; mm1=( - 0 1 2) + psrlq mm2, 2*BYTE_BIT ; mm2=( 5 6 7 -) + movq mm4, mm3 + psrlq mm4, (SIZEOF_MMWORD-2)*BYTE_BIT ; mm4=( 7 - - -) + + por mm1, MMWORD [wk(0)] ; mm1=(-1 0 1 2) + por mm2, MMWORD [wk(2)] ; mm2=( 5 6 7 8) + + movq MMWORD [wk(0)], mm4 + + pmullw mm7, [GOTOFF(ebx,PW_THREE)] + pmullw mm3, [GOTOFF(ebx,PW_THREE)] + paddw mm1, [GOTOFF(ebx,PW_EIGHT)] + paddw mm5, [GOTOFF(ebx,PW_EIGHT)] + paddw mm0, [GOTOFF(ebx,PW_SEVEN)] + paddw mm2, [GOTOFF(ebx,PW_SEVEN)] + + paddw mm1, mm7 + paddw mm5, mm3 + psrlw mm1, 4 ; mm1=Out0LE=( 0 2 4 6) + psrlw mm5, 4 ; mm5=Out0HE=( 8 10 12 14) + paddw mm0, mm7 + paddw mm2, mm3 + psrlw mm0, 4 ; mm0=Out0LO=( 1 3 5 7) + psrlw mm2, 4 ; mm2=Out0HO=( 9 11 13 15) + + psllw mm0, BYTE_BIT + psllw mm2, BYTE_BIT + por mm1, mm0 ; mm1=Out0L=( 0 1 2 3 4 5 6 7) + por mm5, mm2 ; mm5=Out0H=( 8 9 10 11 12 13 14 15) + + movq MMWORD [edx+0*SIZEOF_MMWORD], mm1 + movq MMWORD [edx+1*SIZEOF_MMWORD], mm5 + + ; -- process the lower row + + movq mm6, MMWORD [edi+0*SIZEOF_MMWORD] ; mm6=Int1L=( 0 1 2 3) + movq mm4, MMWORD [edi+1*SIZEOF_MMWORD] ; mm4=Int1H=( 4 5 6 7) + + movq mm7, mm6 + movq mm3, mm4 + psrlq mm7, 2*BYTE_BIT ; mm7=( 1 2 3 -) + psllq mm3, (SIZEOF_MMWORD-2)*BYTE_BIT ; mm3=( - - - 4) + movq mm0, mm6 + movq mm2, mm4 + psrlq mm0, (SIZEOF_MMWORD-2)*BYTE_BIT ; mm0=( 3 - - -) + psllq mm2, 2*BYTE_BIT ; mm2=( - 4 5 6) + + por mm7, mm3 ; mm7=( 1 2 3 4) + por mm0, mm2 ; mm0=( 3 4 5 6) + + movq mm1, mm6 + movq mm5, mm4 + psllq mm1, 2*BYTE_BIT ; mm1=( - 0 1 2) + psrlq mm5, 2*BYTE_BIT ; mm5=( 5 6 7 -) + movq mm3, mm4 + psrlq mm3, (SIZEOF_MMWORD-2)*BYTE_BIT ; mm3=( 7 - - -) + + por mm1, MMWORD [wk(1)] ; mm1=(-1 0 1 2) + por mm5, MMWORD [wk(3)] ; mm5=( 5 6 7 8) + + movq MMWORD [wk(1)], mm3 + + pmullw mm6, [GOTOFF(ebx,PW_THREE)] + pmullw mm4, [GOTOFF(ebx,PW_THREE)] + paddw mm1, [GOTOFF(ebx,PW_EIGHT)] + paddw mm0, [GOTOFF(ebx,PW_EIGHT)] + paddw mm7, [GOTOFF(ebx,PW_SEVEN)] + paddw mm5, [GOTOFF(ebx,PW_SEVEN)] + + paddw mm1, mm6 + paddw mm0, mm4 + psrlw mm1, 4 ; mm1=Out1LE=( 0 2 4 6) + psrlw mm0, 4 ; mm0=Out1HE=( 8 10 12 14) + paddw mm7, mm6 + paddw mm5, mm4 + psrlw mm7, 4 ; mm7=Out1LO=( 1 3 5 7) + psrlw mm5, 4 ; mm5=Out1HO=( 9 11 13 15) + + psllw mm7, BYTE_BIT + psllw mm5, BYTE_BIT + por mm1, mm7 ; mm1=Out1L=( 0 1 2 3 4 5 6 7) + por mm0, mm5 ; mm0=Out1H=( 8 9 10 11 12 13 14 15) + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm1 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm0 + + poppic ebx + + sub eax, byte SIZEOF_MMWORD + add ecx, byte 1*SIZEOF_MMWORD ; inptr1(above) + add ebx, byte 1*SIZEOF_MMWORD ; inptr0 + add esi, byte 1*SIZEOF_MMWORD ; inptr1(below) + add edx, byte 2*SIZEOF_MMWORD ; outptr0 + add edi, byte 2*SIZEOF_MMWORD ; outptr1 + cmp eax, byte SIZEOF_MMWORD + ja near .columnloop + test eax, eax + jnz near .columnloop_last + + pop esi + pop edi + pop ecx + pop eax + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg near .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 1:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v1_upsample_mmx(int max_v_samp_factor, JDIMENSION output_width, +; JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b) + 8 ; int max_v_samp_factor +%define output_width(b) (b) + 12 ; JDIMENSION output_width +%define input_data(b) (b) + 16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_upsample_mmx) + +EXTN(jsimd_h2v1_upsample_mmx): + push ebp + mov ebp, esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, JDIMENSION [output_width(ebp)] + add edx, byte (2*SIZEOF_MMWORD)-1 + and edx, byte -(2*SIZEOF_MMWORD) + jz short .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx, ecx + jz short .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16, 7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + mov eax, edx ; colctr + alignx 16, 7 +.columnloop: + + movq mm0, MMWORD [esi+0*SIZEOF_MMWORD] + + movq mm1, mm0 + punpcklbw mm0, mm0 + punpckhbw mm1, mm1 + + movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm1 + + sub eax, byte 2*SIZEOF_MMWORD + jz short .nextrow + + movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] + + movq mm3, mm2 + punpcklbw mm2, mm2 + punpckhbw mm3, mm3 + + movq MMWORD [edi+2*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+3*SIZEOF_MMWORD], mm3 + + sub eax, byte 2*SIZEOF_MMWORD + jz short .nextrow + + add esi, byte 2*SIZEOF_MMWORD ; inptr + add edi, byte 4*SIZEOF_MMWORD ; outptr + jmp short .columnloop + alignx 16, 7 + +.nextrow: + pop esi + pop edi + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg short .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 2:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v2_upsample_mmx(int max_v_samp_factor, JDIMENSION output_width, +; JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b) + 8 ; int max_v_samp_factor +%define output_width(b) (b) + 12 ; JDIMENSION output_width +%define input_data(b) (b) + 16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_upsample_mmx) + +EXTN(jsimd_h2v2_upsample_mmx): + push ebp + mov ebp, esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, JDIMENSION [output_width(ebp)] + add edx, byte (2*SIZEOF_MMWORD)-1 + and edx, byte -(2*SIZEOF_MMWORD) + jz near .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx, ecx + jz short .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16, 7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + mov eax, edx ; colctr + alignx 16, 7 +.columnloop: + + movq mm0, MMWORD [esi+0*SIZEOF_MMWORD] + + movq mm1, mm0 + punpcklbw mm0, mm0 + punpckhbw mm1, mm1 + + movq MMWORD [ebx+0*SIZEOF_MMWORD], mm0 + movq MMWORD [ebx+1*SIZEOF_MMWORD], mm1 + movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 + movq MMWORD [edi+1*SIZEOF_MMWORD], mm1 + + sub eax, byte 2*SIZEOF_MMWORD + jz short .nextrow + + movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] + + movq mm3, mm2 + punpcklbw mm2, mm2 + punpckhbw mm3, mm3 + + movq MMWORD [ebx+2*SIZEOF_MMWORD], mm2 + movq MMWORD [ebx+3*SIZEOF_MMWORD], mm3 + movq MMWORD [edi+2*SIZEOF_MMWORD], mm2 + movq MMWORD [edi+3*SIZEOF_MMWORD], mm3 + + sub eax, byte 2*SIZEOF_MMWORD + jz short .nextrow + + add esi, byte 2*SIZEOF_MMWORD ; inptr + add ebx, byte 4*SIZEOF_MMWORD ; outptr0 + add edi, byte 4*SIZEOF_MMWORD ; outptr1 + jmp short .columnloop + alignx 16, 7 + +.nextrow: + pop esi + pop edi + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg short .rowloop + + emms ; empty MMX state + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jdsample-sse2.asm b/simd/i386/jdsample-sse2.asm new file mode 100644 index 0000000..f0da626 --- /dev/null +++ b/simd/i386/jdsample-sse2.asm @@ -0,0 +1,726 @@ +; +; jdsample.asm - upsampling (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_fancy_upsample_sse2) + +EXTN(jconst_fancy_upsample_sse2): + +PW_ONE times 8 dw 1 +PW_TWO times 8 dw 2 +PW_THREE times 8 dw 3 +PW_SEVEN times 8 dw 7 +PW_EIGHT times 8 dw 8 + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. +; +; The upsampling algorithm is linear interpolation between pixel centers, +; also known as a "triangle filter". This is a good compromise between +; speed and visual quality. The centers of the output pixels are 1/4 and 3/4 +; of the way between input pixel centers. +; +; GLOBAL(void) +; jsimd_h2v1_fancy_upsample_sse2(int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b) + 8 ; int max_v_samp_factor +%define downsamp_width(b) (b) + 12 ; JDIMENSION downsampled_width +%define input_data(b) (b) + 16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2) + +EXTN(jsimd_h2v1_fancy_upsample_sse2): + push ebp + mov ebp, esp + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr + test eax, eax + jz near .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx, ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16, 7 +.rowloop: + push eax ; colctr + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + + test eax, SIZEOF_XMMWORD-1 + jz short .skip + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample +.skip: + pxor xmm0, xmm0 ; xmm0=(all 0's) + pcmpeqb xmm7, xmm7 + psrldq xmm7, (SIZEOF_XMMWORD-1) + pand xmm7, XMMWORD [esi+0*SIZEOF_XMMWORD] + + add eax, byte SIZEOF_XMMWORD-1 + and eax, byte -SIZEOF_XMMWORD + cmp eax, byte SIZEOF_XMMWORD + ja short .columnloop + alignx 16, 7 + +.columnloop_last: + pcmpeqb xmm6, xmm6 + pslldq xmm6, (SIZEOF_XMMWORD-1) + pand xmm6, XMMWORD [esi+0*SIZEOF_XMMWORD] + jmp short .upsample + alignx 16, 7 + +.columnloop: + movdqa xmm6, XMMWORD [esi+1*SIZEOF_XMMWORD] + pslldq xmm6, (SIZEOF_XMMWORD-1) + +.upsample: + movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] + movdqa xmm2, xmm1 + movdqa xmm3, xmm1 ; xmm1=( 0 1 2 ... 13 14 15) + pslldq xmm2, 1 ; xmm2=(-- 0 1 ... 12 13 14) + psrldq xmm3, 1 ; xmm3=( 1 2 3 ... 14 15 --) + + por xmm2, xmm7 ; xmm2=(-1 0 1 ... 12 13 14) + por xmm3, xmm6 ; xmm3=( 1 2 3 ... 14 15 16) + + movdqa xmm7, xmm1 + psrldq xmm7, (SIZEOF_XMMWORD-1) ; xmm7=(15 -- -- ... -- -- --) + + movdqa xmm4, xmm1 + punpcklbw xmm1, xmm0 ; xmm1=( 0 1 2 3 4 5 6 7) + punpckhbw xmm4, xmm0 ; xmm4=( 8 9 10 11 12 13 14 15) + movdqa xmm5, xmm2 + punpcklbw xmm2, xmm0 ; xmm2=(-1 0 1 2 3 4 5 6) + punpckhbw xmm5, xmm0 ; xmm5=( 7 8 9 10 11 12 13 14) + movdqa xmm6, xmm3 + punpcklbw xmm3, xmm0 ; xmm3=( 1 2 3 4 5 6 7 8) + punpckhbw xmm6, xmm0 ; xmm6=( 9 10 11 12 13 14 15 16) + + pmullw xmm1, [GOTOFF(ebx,PW_THREE)] + pmullw xmm4, [GOTOFF(ebx,PW_THREE)] + paddw xmm2, [GOTOFF(ebx,PW_ONE)] + paddw xmm5, [GOTOFF(ebx,PW_ONE)] + paddw xmm3, [GOTOFF(ebx,PW_TWO)] + paddw xmm6, [GOTOFF(ebx,PW_TWO)] + + paddw xmm2, xmm1 + paddw xmm5, xmm4 + psrlw xmm2, 2 ; xmm2=OutLE=( 0 2 4 6 8 10 12 14) + psrlw xmm5, 2 ; xmm5=OutHE=(16 18 20 22 24 26 28 30) + paddw xmm3, xmm1 + paddw xmm6, xmm4 + psrlw xmm3, 2 ; xmm3=OutLO=( 1 3 5 7 9 11 13 15) + psrlw xmm6, 2 ; xmm6=OutHO=(17 19 21 23 25 27 29 31) + + psllw xmm3, BYTE_BIT + psllw xmm6, BYTE_BIT + por xmm2, xmm3 ; xmm2=OutL=( 0 1 2 ... 13 14 15) + por xmm5, xmm6 ; xmm5=OutH=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm5 + + sub eax, byte SIZEOF_XMMWORD + add esi, byte 1*SIZEOF_XMMWORD ; inptr + add edi, byte 2*SIZEOF_XMMWORD ; outptr + cmp eax, byte SIZEOF_XMMWORD + ja near .columnloop + test eax, eax + jnz near .columnloop_last + + pop esi + pop edi + pop eax + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. +; Again a triangle filter; see comments for h2v1 case, above. +; +; GLOBAL(void) +; jsimd_h2v2_fancy_upsample_sse2(int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b) + 8 ; int max_v_samp_factor +%define downsamp_width(b) (b) + 12 ; JDIMENSION downsampled_width +%define input_data(b) (b) + 16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 4 +%define gotptr wk(0) - SIZEOF_POINTER ; void *gotptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2) + +EXTN(jsimd_h2v2_fancy_upsample_sse2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic eax ; make a room for GOT address + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + movpic POINTER [gotptr], ebx ; save GOT address + + mov edx, eax ; edx = original ebp + mov eax, JDIMENSION [downsamp_width(edx)] ; colctr + test eax, eax + jz near .return + + mov ecx, INT [max_v_samp(edx)] ; rowctr + test ecx, ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(edx)] ; input_data + mov edi, POINTER [output_data_ptr(edx)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16, 7 +.rowloop: + push eax ; colctr + push ecx + push edi + push esi + + mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above) + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 + mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1(below) + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + + test eax, SIZEOF_XMMWORD-1 + jz short .skip + push edx + mov dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample + pop edx +.skip: + ; -- process the first column block + + movdqa xmm0, XMMWORD [ebx+0*SIZEOF_XMMWORD] ; xmm0=row[ 0][0] + movdqa xmm1, XMMWORD [ecx+0*SIZEOF_XMMWORD] ; xmm1=row[-1][0] + movdqa xmm2, XMMWORD [esi+0*SIZEOF_XMMWORD] ; xmm2=row[+1][0] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pxor xmm3, xmm3 ; xmm3=(all 0's) + movdqa xmm4, xmm0 + punpcklbw xmm0, xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) + punpckhbw xmm4, xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) + movdqa xmm5, xmm1 + punpcklbw xmm1, xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm5, xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) + movdqa xmm6, xmm2 + punpcklbw xmm2, xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm6, xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) + + pmullw xmm0, [GOTOFF(ebx,PW_THREE)] + pmullw xmm4, [GOTOFF(ebx,PW_THREE)] + + pcmpeqb xmm7, xmm7 + psrldq xmm7, (SIZEOF_XMMWORD-2) + + paddw xmm1, xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) + paddw xmm5, xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) + paddw xmm2, xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) + paddw xmm6, xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) + + movdqa XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1 ; temporarily save + movdqa XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5 ; the intermediate data + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm6 + + pand xmm1, xmm7 ; xmm1=( 0 -- -- -- -- -- -- --) + pand xmm2, xmm7 ; xmm2=( 0 -- -- -- -- -- -- --) + + movdqa XMMWORD [wk(0)], xmm1 + movdqa XMMWORD [wk(1)], xmm2 + + poppic ebx + + add eax, byte SIZEOF_XMMWORD-1 + and eax, byte -SIZEOF_XMMWORD + cmp eax, byte SIZEOF_XMMWORD + ja short .columnloop + alignx 16, 7 + +.columnloop_last: + ; -- process the last column block + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pcmpeqb xmm1, xmm1 + pslldq xmm1, (SIZEOF_XMMWORD-2) + movdqa xmm2, xmm1 + + pand xmm1, XMMWORD [edx+1*SIZEOF_XMMWORD] + pand xmm2, XMMWORD [edi+1*SIZEOF_XMMWORD] + + movdqa XMMWORD [wk(2)], xmm1 ; xmm1=(-- -- -- -- -- -- -- 15) + movdqa XMMWORD [wk(3)], xmm2 ; xmm2=(-- -- -- -- -- -- -- 15) + + jmp near .upsample + alignx 16, 7 + +.columnloop: + ; -- process the next column block + + movdqa xmm0, XMMWORD [ebx+1*SIZEOF_XMMWORD] ; xmm0=row[ 0][1] + movdqa xmm1, XMMWORD [ecx+1*SIZEOF_XMMWORD] ; xmm1=row[-1][1] + movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] ; xmm2=row[+1][1] + + pushpic ebx + movpic ebx, POINTER [gotptr] ; load GOT address + + pxor xmm3, xmm3 ; xmm3=(all 0's) + movdqa xmm4, xmm0 + punpcklbw xmm0, xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) + punpckhbw xmm4, xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) + movdqa xmm5, xmm1 + punpcklbw xmm1, xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm5, xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) + movdqa xmm6, xmm2 + punpcklbw xmm2, xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm6, xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) + + pmullw xmm0, [GOTOFF(ebx,PW_THREE)] + pmullw xmm4, [GOTOFF(ebx,PW_THREE)] + + paddw xmm1, xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) + paddw xmm5, xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) + paddw xmm2, xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) + paddw xmm6, xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) + + movdqa XMMWORD [edx+2*SIZEOF_XMMWORD], xmm1 ; temporarily save + movdqa XMMWORD [edx+3*SIZEOF_XMMWORD], xmm5 ; the intermediate data + movdqa XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+3*SIZEOF_XMMWORD], xmm6 + + pslldq xmm1, (SIZEOF_XMMWORD-2) ; xmm1=(-- -- -- -- -- -- -- 0) + pslldq xmm2, (SIZEOF_XMMWORD-2) ; xmm2=(-- -- -- -- -- -- -- 0) + + movdqa XMMWORD [wk(2)], xmm1 + movdqa XMMWORD [wk(3)], xmm2 + +.upsample: + ; -- process the upper row + + movdqa xmm7, XMMWORD [edx+0*SIZEOF_XMMWORD] + movdqa xmm3, XMMWORD [edx+1*SIZEOF_XMMWORD] + + movdqa xmm0, xmm7 ; xmm7=Int0L=( 0 1 2 3 4 5 6 7) + movdqa xmm4, xmm3 ; xmm3=Int0H=( 8 9 10 11 12 13 14 15) + psrldq xmm0, 2 ; xmm0=( 1 2 3 4 5 6 7 --) + pslldq xmm4, (SIZEOF_XMMWORD-2) ; xmm4=(-- -- -- -- -- -- -- 8) + movdqa xmm5, xmm7 + movdqa xmm6, xmm3 + psrldq xmm5, (SIZEOF_XMMWORD-2) ; xmm5=( 7 -- -- -- -- -- -- --) + pslldq xmm6, 2 ; xmm6=(-- 8 9 10 11 12 13 14) + + por xmm0, xmm4 ; xmm0=( 1 2 3 4 5 6 7 8) + por xmm5, xmm6 ; xmm5=( 7 8 9 10 11 12 13 14) + + movdqa xmm1, xmm7 + movdqa xmm2, xmm3 + pslldq xmm1, 2 ; xmm1=(-- 0 1 2 3 4 5 6) + psrldq xmm2, 2 ; xmm2=( 9 10 11 12 13 14 15 --) + movdqa xmm4, xmm3 + psrldq xmm4, (SIZEOF_XMMWORD-2) ; xmm4=(15 -- -- -- -- -- -- --) + + por xmm1, XMMWORD [wk(0)] ; xmm1=(-1 0 1 2 3 4 5 6) + por xmm2, XMMWORD [wk(2)] ; xmm2=( 9 10 11 12 13 14 15 16) + + movdqa XMMWORD [wk(0)], xmm4 + + pmullw xmm7, [GOTOFF(ebx,PW_THREE)] + pmullw xmm3, [GOTOFF(ebx,PW_THREE)] + paddw xmm1, [GOTOFF(ebx,PW_EIGHT)] + paddw xmm5, [GOTOFF(ebx,PW_EIGHT)] + paddw xmm0, [GOTOFF(ebx,PW_SEVEN)] + paddw xmm2, [GOTOFF(ebx,PW_SEVEN)] + + paddw xmm1, xmm7 + paddw xmm5, xmm3 + psrlw xmm1, 4 ; xmm1=Out0LE=( 0 2 4 6 8 10 12 14) + psrlw xmm5, 4 ; xmm5=Out0HE=(16 18 20 22 24 26 28 30) + paddw xmm0, xmm7 + paddw xmm2, xmm3 + psrlw xmm0, 4 ; xmm0=Out0LO=( 1 3 5 7 9 11 13 15) + psrlw xmm2, 4 ; xmm2=Out0HO=(17 19 21 23 25 27 29 31) + + psllw xmm0, BYTE_BIT + psllw xmm2, BYTE_BIT + por xmm1, xmm0 ; xmm1=Out0L=( 0 1 2 ... 13 14 15) + por xmm5, xmm2 ; xmm5=Out0H=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5 + + ; -- process the lower row + + movdqa xmm6, XMMWORD [edi+0*SIZEOF_XMMWORD] + movdqa xmm4, XMMWORD [edi+1*SIZEOF_XMMWORD] + + movdqa xmm7, xmm6 ; xmm6=Int1L=( 0 1 2 3 4 5 6 7) + movdqa xmm3, xmm4 ; xmm4=Int1H=( 8 9 10 11 12 13 14 15) + psrldq xmm7, 2 ; xmm7=( 1 2 3 4 5 6 7 --) + pslldq xmm3, (SIZEOF_XMMWORD-2) ; xmm3=(-- -- -- -- -- -- -- 8) + movdqa xmm0, xmm6 + movdqa xmm2, xmm4 + psrldq xmm0, (SIZEOF_XMMWORD-2) ; xmm0=( 7 -- -- -- -- -- -- --) + pslldq xmm2, 2 ; xmm2=(-- 8 9 10 11 12 13 14) + + por xmm7, xmm3 ; xmm7=( 1 2 3 4 5 6 7 8) + por xmm0, xmm2 ; xmm0=( 7 8 9 10 11 12 13 14) + + movdqa xmm1, xmm6 + movdqa xmm5, xmm4 + pslldq xmm1, 2 ; xmm1=(-- 0 1 2 3 4 5 6) + psrldq xmm5, 2 ; xmm5=( 9 10 11 12 13 14 15 --) + movdqa xmm3, xmm4 + psrldq xmm3, (SIZEOF_XMMWORD-2) ; xmm3=(15 -- -- -- -- -- -- --) + + por xmm1, XMMWORD [wk(1)] ; xmm1=(-1 0 1 2 3 4 5 6) + por xmm5, XMMWORD [wk(3)] ; xmm5=( 9 10 11 12 13 14 15 16) + + movdqa XMMWORD [wk(1)], xmm3 + + pmullw xmm6, [GOTOFF(ebx,PW_THREE)] + pmullw xmm4, [GOTOFF(ebx,PW_THREE)] + paddw xmm1, [GOTOFF(ebx,PW_EIGHT)] + paddw xmm0, [GOTOFF(ebx,PW_EIGHT)] + paddw xmm7, [GOTOFF(ebx,PW_SEVEN)] + paddw xmm5, [GOTOFF(ebx,PW_SEVEN)] + + paddw xmm1, xmm6 + paddw xmm0, xmm4 + psrlw xmm1, 4 ; xmm1=Out1LE=( 0 2 4 6 8 10 12 14) + psrlw xmm0, 4 ; xmm0=Out1HE=(16 18 20 22 24 26 28 30) + paddw xmm7, xmm6 + paddw xmm5, xmm4 + psrlw xmm7, 4 ; xmm7=Out1LO=( 1 3 5 7 9 11 13 15) + psrlw xmm5, 4 ; xmm5=Out1HO=(17 19 21 23 25 27 29 31) + + psllw xmm7, BYTE_BIT + psllw xmm5, BYTE_BIT + por xmm1, xmm7 ; xmm1=Out1L=( 0 1 2 ... 13 14 15) + por xmm0, xmm5 ; xmm0=Out1H=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm0 + + poppic ebx + + sub eax, byte SIZEOF_XMMWORD + add ecx, byte 1*SIZEOF_XMMWORD ; inptr1(above) + add ebx, byte 1*SIZEOF_XMMWORD ; inptr0 + add esi, byte 1*SIZEOF_XMMWORD ; inptr1(below) + add edx, byte 2*SIZEOF_XMMWORD ; outptr0 + add edi, byte 2*SIZEOF_XMMWORD ; outptr1 + cmp eax, byte SIZEOF_XMMWORD + ja near .columnloop + test eax, eax + jnz near .columnloop_last + + pop esi + pop edi + pop ecx + pop eax + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg near .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 1:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v1_upsample_sse2(int max_v_samp_factor, JDIMENSION output_width, +; JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b) + 8 ; int max_v_samp_factor +%define output_width(b) (b) + 12 ; JDIMENSION output_width +%define input_data(b) (b) + 16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2) + +EXTN(jsimd_h2v1_upsample_sse2): + push ebp + mov ebp, esp +; push ebx ; unused +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, JDIMENSION [output_width(ebp)] + add edx, byte (2*SIZEOF_XMMWORD)-1 + and edx, byte -(2*SIZEOF_XMMWORD) + jz short .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx, ecx + jz short .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16, 7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov edi, JSAMPROW [edi] ; outptr + mov eax, edx ; colctr + alignx 16, 7 +.columnloop: + + movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + + movdqa xmm1, xmm0 + punpcklbw xmm0, xmm0 + punpckhbw xmm1, xmm1 + + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1 + + sub eax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] + + movdqa xmm3, xmm2 + punpcklbw xmm2, xmm2 + punpckhbw xmm3, xmm3 + + movdqa XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3 + + sub eax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + add esi, byte 2*SIZEOF_XMMWORD ; inptr + add edi, byte 4*SIZEOF_XMMWORD ; outptr + jmp short .columnloop + alignx 16, 7 + +.nextrow: + pop esi + pop edi + + add esi, byte SIZEOF_JSAMPROW ; input_data + add edi, byte SIZEOF_JSAMPROW ; output_data + dec ecx ; rowctr + jg short .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved +; pop ebx ; unused + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 2:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v2_upsample_sse2(int max_v_samp_factor, JDIMENSION output_width, +; JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +; + +%define max_v_samp(b) (b) + 8 ; int max_v_samp_factor +%define output_width(b) (b) + 12 ; JDIMENSION output_width +%define input_data(b) (b) + 16 ; JSAMPARRAY input_data +%define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2) + +EXTN(jsimd_h2v2_upsample_sse2): + push ebp + mov ebp, esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov edx, JDIMENSION [output_width(ebp)] + add edx, byte (2*SIZEOF_XMMWORD)-1 + and edx, byte -(2*SIZEOF_XMMWORD) + jz near .return + + mov ecx, INT [max_v_samp(ebp)] ; rowctr + test ecx, ecx + jz near .return + + mov esi, JSAMPARRAY [input_data(ebp)] ; input_data + mov edi, POINTER [output_data_ptr(ebp)] + mov edi, JSAMPARRAY [edi] ; output_data + alignx 16, 7 +.rowloop: + push edi + push esi + + mov esi, JSAMPROW [esi] ; inptr + mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 + mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 + mov eax, edx ; colctr + alignx 16, 7 +.columnloop: + + movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] + + movdqa xmm1, xmm0 + punpcklbw xmm0, xmm0 + punpckhbw xmm1, xmm1 + + movdqa XMMWORD [ebx+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [ebx+1*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1 + + sub eax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] + + movdqa xmm3, xmm2 + punpcklbw xmm2, xmm2 + punpckhbw xmm3, xmm3 + + movdqa XMMWORD [ebx+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [ebx+3*SIZEOF_XMMWORD], xmm3 + movdqa XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3 + + sub eax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + add esi, byte 2*SIZEOF_XMMWORD ; inptr + add ebx, byte 4*SIZEOF_XMMWORD ; outptr0 + add edi, byte 4*SIZEOF_XMMWORD ; outptr1 + jmp short .columnloop + alignx 16, 7 + +.nextrow: + pop esi + pop edi + + add esi, byte 1*SIZEOF_JSAMPROW ; input_data + add edi, byte 2*SIZEOF_JSAMPROW ; output_data + sub ecx, byte 2 ; rowctr + jg short .rowloop + +.return: + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jfdctflt-3dn.asm b/simd/i386/jfdctflt-3dn.asm new file mode 100644 index 0000000..1d45865 --- /dev/null +++ b/simd/i386/jfdctflt-3dn.asm @@ -0,0 +1,320 @@ +; +; jfdctflt.asm - floating-point FDCT (3DNow!) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the forward DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jfdctflt.c; see the jfdctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_fdct_float_3dnow) + +EXTN(jconst_fdct_float_3dnow): + +PD_0_382 times 2 dd 0.382683432365089771728460 +PD_0_707 times 2 dd 0.707106781186547524400844 +PD_0_541 times 2 dd 0.541196100146196984399723 +PD_1_306 times 2 dd 1.306562964876376527856643 + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_float_3dnow(FAST_FLOAT *data) +; + +%define data(b) (b) + 8 ; FAST_FLOAT *data + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 + + align 32 + GLOBAL_FUNCTION(jsimd_fdct_float_3dnow) + +EXTN(jsimd_fdct_float_3dnow): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE/2 + alignx 16, 7 +.rowloop: + + movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)] + + ; mm0=(00 01), mm1=(10 11), mm2=(06 07), mm3=(16 17) + + movq mm4, mm0 ; transpose coefficients + punpckldq mm0, mm1 ; mm0=(00 10)=data0 + punpckhdq mm4, mm1 ; mm4=(01 11)=data1 + movq mm5, mm2 ; transpose coefficients + punpckldq mm2, mm3 ; mm2=(06 16)=data6 + punpckhdq mm5, mm3 ; mm5=(07 17)=data7 + + movq mm6, mm4 + movq mm7, mm0 + pfsub mm4, mm2 ; mm4=data1-data6=tmp6 + pfsub mm0, mm5 ; mm0=data0-data7=tmp7 + pfadd mm6, mm2 ; mm6=data1+data6=tmp1 + pfadd mm7, mm5 ; mm7=data0+data7=tmp0 + + movq mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)] + movq mm5, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)] + + ; mm1=(02 03), mm3=(12 13), mm2=(04 05), mm5=(14 15) + + movq MMWORD [wk(0)], mm4 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm0 ; wk(1)=tmp7 + + movq mm4, mm1 ; transpose coefficients + punpckldq mm1, mm3 ; mm1=(02 12)=data2 + punpckhdq mm4, mm3 ; mm4=(03 13)=data3 + movq mm0, mm2 ; transpose coefficients + punpckldq mm2, mm5 ; mm2=(04 14)=data4 + punpckhdq mm0, mm5 ; mm0=(05 15)=data5 + + movq mm3, mm4 + movq mm5, mm1 + pfadd mm4, mm2 ; mm4=data3+data4=tmp3 + pfadd mm1, mm0 ; mm1=data2+data5=tmp2 + pfsub mm3, mm2 ; mm3=data3-data4=tmp4 + pfsub mm5, mm0 ; mm5=data2-data5=tmp5 + + ; -- Even part + + movq mm2, mm7 + movq mm0, mm6 + pfsub mm7, mm4 ; mm7=tmp13 + pfsub mm6, mm1 ; mm6=tmp12 + pfadd mm2, mm4 ; mm2=tmp10 + pfadd mm0, mm1 ; mm0=tmp11 + + pfadd mm6, mm7 + pfmul mm6, [GOTOFF(ebx,PD_0_707)] ; mm6=z1 + + movq mm4, mm2 + movq mm1, mm7 + pfsub mm2, mm0 ; mm2=data4 + pfsub mm7, mm6 ; mm7=data6 + pfadd mm4, mm0 ; mm4=data0 + pfadd mm1, mm6 ; mm1=data2 + + movq MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)], mm2 + movq MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], mm1 + + ; -- Odd part + + movq mm0, MMWORD [wk(0)] ; mm0=tmp6 + movq mm6, MMWORD [wk(1)] ; mm6=tmp7 + + pfadd mm3, mm5 ; mm3=tmp10 + pfadd mm5, mm0 ; mm5=tmp11 + pfadd mm0, mm6 ; mm0=tmp12, mm6=tmp7 + + pfmul mm5, [GOTOFF(ebx,PD_0_707)] ; mm5=z3 + + movq mm2, mm3 ; mm2=tmp10 + pfsub mm3, mm0 + pfmul mm3, [GOTOFF(ebx,PD_0_382)] ; mm3=z5 + pfmul mm2, [GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610) + pfmul mm0, [GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296) + pfadd mm2, mm3 ; mm2=z2 + pfadd mm0, mm3 ; mm0=z4 + + movq mm7, mm6 + pfsub mm6, mm5 ; mm6=z13 + pfadd mm7, mm5 ; mm7=z11 + + movq mm4, mm6 + movq mm1, mm7 + pfsub mm6, mm2 ; mm6=data3 + pfsub mm7, mm0 ; mm7=data7 + pfadd mm4, mm2 ; mm4=data5 + pfadd mm1, mm0 ; mm1=data1 + + movq MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], mm6 + movq MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1 + + add edx, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE/2 + alignx 16, 7 +.columnloop: + + movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)] + + ; mm0=(00 10), mm1=(01 11), mm2=(60 70), mm3=(61 71) + + movq mm4, mm0 ; transpose coefficients + punpckldq mm0, mm1 ; mm0=(00 01)=data0 + punpckhdq mm4, mm1 ; mm4=(10 11)=data1 + movq mm5, mm2 ; transpose coefficients + punpckldq mm2, mm3 ; mm2=(60 61)=data6 + punpckhdq mm5, mm3 ; mm5=(70 71)=data7 + + movq mm6, mm4 + movq mm7, mm0 + pfsub mm4, mm2 ; mm4=data1-data6=tmp6 + pfsub mm0, mm5 ; mm0=data0-data7=tmp7 + pfadd mm6, mm2 ; mm6=data1+data6=tmp1 + pfadd mm7, mm5 ; mm7=data0+data7=tmp0 + + movq mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)] + movq mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)] + + ; mm1=(20 30), mm3=(21 31), mm2=(40 50), mm5=(41 51) + + movq MMWORD [wk(0)], mm4 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm0 ; wk(1)=tmp7 + + movq mm4, mm1 ; transpose coefficients + punpckldq mm1, mm3 ; mm1=(20 21)=data2 + punpckhdq mm4, mm3 ; mm4=(30 31)=data3 + movq mm0, mm2 ; transpose coefficients + punpckldq mm2, mm5 ; mm2=(40 41)=data4 + punpckhdq mm0, mm5 ; mm0=(50 51)=data5 + + movq mm3, mm4 + movq mm5, mm1 + pfadd mm4, mm2 ; mm4=data3+data4=tmp3 + pfadd mm1, mm0 ; mm1=data2+data5=tmp2 + pfsub mm3, mm2 ; mm3=data3-data4=tmp4 + pfsub mm5, mm0 ; mm5=data2-data5=tmp5 + + ; -- Even part + + movq mm2, mm7 + movq mm0, mm6 + pfsub mm7, mm4 ; mm7=tmp13 + pfsub mm6, mm1 ; mm6=tmp12 + pfadd mm2, mm4 ; mm2=tmp10 + pfadd mm0, mm1 ; mm0=tmp11 + + pfadd mm6, mm7 + pfmul mm6, [GOTOFF(ebx,PD_0_707)] ; mm6=z1 + + movq mm4, mm2 + movq mm1, mm7 + pfsub mm2, mm0 ; mm2=data4 + pfsub mm7, mm6 ; mm7=data6 + pfadd mm4, mm0 ; mm4=data0 + pfadd mm1, mm6 ; mm1=data2 + + movq MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], mm2 + movq MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], mm1 + + ; -- Odd part + + movq mm0, MMWORD [wk(0)] ; mm0=tmp6 + movq mm6, MMWORD [wk(1)] ; mm6=tmp7 + + pfadd mm3, mm5 ; mm3=tmp10 + pfadd mm5, mm0 ; mm5=tmp11 + pfadd mm0, mm6 ; mm0=tmp12, mm6=tmp7 + + pfmul mm5, [GOTOFF(ebx,PD_0_707)] ; mm5=z3 + + movq mm2, mm3 ; mm2=tmp10 + pfsub mm3, mm0 + pfmul mm3, [GOTOFF(ebx,PD_0_382)] ; mm3=z5 + pfmul mm2, [GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610) + pfmul mm0, [GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296) + pfadd mm2, mm3 ; mm2=z2 + pfadd mm0, mm3 ; mm0=z4 + + movq mm7, mm6 + pfsub mm6, mm5 ; mm6=z13 + pfadd mm7, mm5 ; mm7=z11 + + movq mm4, mm6 + movq mm1, mm7 + pfsub mm6, mm2 ; mm6=data3 + pfsub mm7, mm0 ; mm7=data7 + pfadd mm4, mm2 ; mm4=data5 + pfadd mm1, mm0 ; mm1=data1 + + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], mm6 + movq MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1 + + add edx, byte 2*SIZEOF_FAST_FLOAT + dec ecx + jnz near .columnloop + + femms ; empty MMX/3DNow! state + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jfdctflt-sse.asm b/simd/i386/jfdctflt-sse.asm new file mode 100644 index 0000000..1faf835 --- /dev/null +++ b/simd/i386/jfdctflt-sse.asm @@ -0,0 +1,371 @@ +; +; jfdctflt.asm - floating-point FDCT (SSE) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the forward DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jfdctflt.c; see the jfdctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1, %2, 0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1, %2, 0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_fdct_float_sse) + +EXTN(jconst_fdct_float_sse): + +PD_0_382 times 4 dd 0.382683432365089771728460 +PD_0_707 times 4 dd 0.707106781186547524400844 +PD_0_541 times 4 dd 0.541196100146196984399723 +PD_1_306 times 4 dd 1.306562964876376527856643 + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_float_sse(FAST_FLOAT *data) +; + +%define data(b) (b) + 8 ; FAST_FLOAT *data + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 32 + GLOBAL_FUNCTION(jsimd_fdct_float_sse) + +EXTN(jsimd_fdct_float_sse): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE/4 + alignx 16, 7 +.rowloop: + + movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)] + + ; xmm0=(20 21 22 23), xmm2=(24 25 26 27) + ; xmm1=(30 31 32 33), xmm3=(34 35 36 37) + + movaps xmm4, xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0, xmm1 ; xmm0=(20 30 21 31) + unpckhps xmm4, xmm1 ; xmm4=(22 32 23 33) + movaps xmm5, xmm2 ; transpose coefficients(phase 1) + unpcklps xmm2, xmm3 ; xmm2=(24 34 25 35) + unpckhps xmm5, xmm3 ; xmm5=(26 36 27 37) + + movaps xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + + ; xmm6=(00 01 02 03), xmm1=(04 05 06 07) + ; xmm7=(10 11 12 13), xmm3=(14 15 16 17) + + movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 32 23 33) + movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(24 34 25 35) + + movaps xmm4, xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6, xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm4, xmm7 ; xmm4=(02 12 03 13) + movaps xmm2, xmm1 ; transpose coefficients(phase 1) + unpcklps xmm1, xmm3 ; xmm1=(04 14 05 15) + unpckhps xmm2, xmm3 ; xmm2=(06 16 07 17) + + movaps xmm7, xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6, xmm0 ; xmm6=(00 10 20 30)=data0 + unpckhps2 xmm7, xmm0 ; xmm7=(01 11 21 31)=data1 + movaps xmm3, xmm2 ; transpose coefficients(phase 2) + unpcklps2 xmm2, xmm5 ; xmm2=(06 16 26 36)=data6 + unpckhps2 xmm3, xmm5 ; xmm3=(07 17 27 37)=data7 + + movaps xmm0, xmm7 + movaps xmm5, xmm6 + subps xmm7, xmm2 ; xmm7=data1-data6=tmp6 + subps xmm6, xmm3 ; xmm6=data0-data7=tmp7 + addps xmm0, xmm2 ; xmm0=data1+data6=tmp1 + addps xmm5, xmm3 ; xmm5=data0+data7=tmp0 + + movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 32 23 33) + movaps xmm3, XMMWORD [wk(1)] ; xmm3=(24 34 25 35) + movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 + movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movaps xmm7, xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4, xmm2 ; xmm4=(02 12 22 32)=data2 + unpckhps2 xmm7, xmm2 ; xmm7=(03 13 23 33)=data3 + movaps xmm6, xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1, xmm3 ; xmm1=(04 14 24 34)=data4 + unpckhps2 xmm6, xmm3 ; xmm6=(05 15 25 35)=data5 + + movaps xmm2, xmm7 + movaps xmm3, xmm4 + addps xmm7, xmm1 ; xmm7=data3+data4=tmp3 + addps xmm4, xmm6 ; xmm4=data2+data5=tmp2 + subps xmm2, xmm1 ; xmm2=data3-data4=tmp4 + subps xmm3, xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movaps xmm1, xmm5 + movaps xmm6, xmm0 + subps xmm5, xmm7 ; xmm5=tmp13 + subps xmm0, xmm4 ; xmm0=tmp12 + addps xmm1, xmm7 ; xmm1=tmp10 + addps xmm6, xmm4 ; xmm6=tmp11 + + addps xmm0, xmm5 + mulps xmm0, [GOTOFF(ebx,PD_0_707)] ; xmm0=z1 + + movaps xmm7, xmm1 + movaps xmm4, xmm5 + subps xmm1, xmm6 ; xmm1=data4 + subps xmm5, xmm0 ; xmm5=data6 + addps xmm7, xmm6 ; xmm7=data0 + addps xmm4, xmm0 ; xmm4=data2 + + movaps XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4 + + ; -- Odd part + + movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 + movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 + + addps xmm2, xmm3 ; xmm2=tmp10 + addps xmm3, xmm6 ; xmm3=tmp11 + addps xmm6, xmm0 ; xmm6=tmp12, xmm0=tmp7 + + mulps xmm3, [GOTOFF(ebx,PD_0_707)] ; xmm3=z3 + + movaps xmm1, xmm2 ; xmm1=tmp10 + subps xmm2, xmm6 + mulps xmm2, [GOTOFF(ebx,PD_0_382)] ; xmm2=z5 + mulps xmm1, [GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) + mulps xmm6, [GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) + addps xmm1, xmm2 ; xmm1=z2 + addps xmm6, xmm2 ; xmm6=z4 + + movaps xmm5, xmm0 + subps xmm0, xmm3 ; xmm0=z13 + addps xmm5, xmm3 ; xmm5=z11 + + movaps xmm7, xmm0 + movaps xmm4, xmm5 + subps xmm0, xmm1 ; xmm0=data3 + subps xmm5, xmm6 ; xmm5=data7 + addps xmm7, xmm1 ; xmm7=data5 + addps xmm4, xmm6 ; xmm4=data1 + + movaps XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4 + + add edx, 4*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) + mov ecx, DCTSIZE/4 + alignx 16, 7 +.columnloop: + + movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)] + + ; xmm0=(02 12 22 32), xmm2=(42 52 62 72) + ; xmm1=(03 13 23 33), xmm3=(43 53 63 73) + + movaps xmm4, xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0, xmm1 ; xmm0=(02 03 12 13) + unpckhps xmm4, xmm1 ; xmm4=(22 23 32 33) + movaps xmm5, xmm2 ; transpose coefficients(phase 1) + unpcklps xmm2, xmm3 ; xmm2=(42 43 52 53) + unpckhps xmm5, xmm3 ; xmm5=(62 63 72 73) + + movaps xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)] + + ; xmm6=(00 10 20 30), xmm1=(40 50 60 70) + ; xmm7=(01 11 21 31), xmm3=(41 51 61 71) + + movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 23 32 33) + movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(42 43 52 53) + + movaps xmm4, xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6, xmm7 ; xmm6=(00 01 10 11) + unpckhps xmm4, xmm7 ; xmm4=(20 21 30 31) + movaps xmm2, xmm1 ; transpose coefficients(phase 1) + unpcklps xmm1, xmm3 ; xmm1=(40 41 50 51) + unpckhps xmm2, xmm3 ; xmm2=(60 61 70 71) + + movaps xmm7, xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6, xmm0 ; xmm6=(00 01 02 03)=data0 + unpckhps2 xmm7, xmm0 ; xmm7=(10 11 12 13)=data1 + movaps xmm3, xmm2 ; transpose coefficients(phase 2) + unpcklps2 xmm2, xmm5 ; xmm2=(60 61 62 63)=data6 + unpckhps2 xmm3, xmm5 ; xmm3=(70 71 72 73)=data7 + + movaps xmm0, xmm7 + movaps xmm5, xmm6 + subps xmm7, xmm2 ; xmm7=data1-data6=tmp6 + subps xmm6, xmm3 ; xmm6=data0-data7=tmp7 + addps xmm0, xmm2 ; xmm0=data1+data6=tmp1 + addps xmm5, xmm3 ; xmm5=data0+data7=tmp0 + + movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 23 32 33) + movaps xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53) + movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 + movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movaps xmm7, xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4, xmm2 ; xmm4=(20 21 22 23)=data2 + unpckhps2 xmm7, xmm2 ; xmm7=(30 31 32 33)=data3 + movaps xmm6, xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1, xmm3 ; xmm1=(40 41 42 43)=data4 + unpckhps2 xmm6, xmm3 ; xmm6=(50 51 52 53)=data5 + + movaps xmm2, xmm7 + movaps xmm3, xmm4 + addps xmm7, xmm1 ; xmm7=data3+data4=tmp3 + addps xmm4, xmm6 ; xmm4=data2+data5=tmp2 + subps xmm2, xmm1 ; xmm2=data3-data4=tmp4 + subps xmm3, xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movaps xmm1, xmm5 + movaps xmm6, xmm0 + subps xmm5, xmm7 ; xmm5=tmp13 + subps xmm0, xmm4 ; xmm0=tmp12 + addps xmm1, xmm7 ; xmm1=tmp10 + addps xmm6, xmm4 ; xmm6=tmp11 + + addps xmm0, xmm5 + mulps xmm0, [GOTOFF(ebx,PD_0_707)] ; xmm0=z1 + + movaps xmm7, xmm1 + movaps xmm4, xmm5 + subps xmm1, xmm6 ; xmm1=data4 + subps xmm5, xmm0 ; xmm5=data6 + addps xmm7, xmm6 ; xmm7=data0 + addps xmm4, xmm0 ; xmm4=data2 + + movaps XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4 + + ; -- Odd part + + movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 + movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 + + addps xmm2, xmm3 ; xmm2=tmp10 + addps xmm3, xmm6 ; xmm3=tmp11 + addps xmm6, xmm0 ; xmm6=tmp12, xmm0=tmp7 + + mulps xmm3, [GOTOFF(ebx,PD_0_707)] ; xmm3=z3 + + movaps xmm1, xmm2 ; xmm1=tmp10 + subps xmm2, xmm6 + mulps xmm2, [GOTOFF(ebx,PD_0_382)] ; xmm2=z5 + mulps xmm1, [GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) + mulps xmm6, [GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) + addps xmm1, xmm2 ; xmm1=z2 + addps xmm6, xmm2 ; xmm6=z4 + + movaps xmm5, xmm0 + subps xmm0, xmm3 ; xmm0=z13 + addps xmm5, xmm3 ; xmm5=z11 + + movaps xmm7, xmm0 + movaps xmm4, xmm5 + subps xmm0, xmm1 ; xmm0=data3 + subps xmm5, xmm6 ; xmm5=data7 + addps xmm7, xmm1 ; xmm7=data5 + addps xmm4, xmm6 ; xmm4=data1 + + movaps XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4 + + add edx, byte 4*SIZEOF_FAST_FLOAT + dec ecx + jnz near .columnloop + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jfdctfst-mmx.asm b/simd/i386/jfdctfst-mmx.asm new file mode 100644 index 0000000..0271901 --- /dev/null +++ b/simd/i386/jfdctfst-mmx.asm @@ -0,0 +1,397 @@ +; +; jfdctfst.asm - fast integer FDCT (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the forward DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. + +%if CONST_BITS == 8 +F_0_382 equ 98 ; FIX(0.382683433) +F_0_541 equ 139 ; FIX(0.541196100) +F_0_707 equ 181 ; FIX(0.707106781) +F_1_306 equ 334 ; FIX(1.306562965) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_0_382 equ DESCALE( 410903207, 30 - CONST_BITS) ; FIX(0.382683433) +F_0_541 equ DESCALE( 581104887, 30 - CONST_BITS) ; FIX(0.541196100) +F_0_707 equ DESCALE( 759250124, 30 - CONST_BITS) ; FIX(0.707106781) +F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS) ; FIX(1.306562965) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 32 + GLOBAL_DATA(jconst_fdct_ifast_mmx) + +EXTN(jconst_fdct_ifast_mmx): + +PW_F0707 times 4 dw F_0_707 << CONST_SHIFT +PW_F0382 times 4 dw F_0_382 << CONST_SHIFT +PW_F0541 times 4 dw F_0_541 << CONST_SHIFT +PW_F1306 times 4 dw F_1_306 << CONST_SHIFT + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_ifast_mmx(DCTELEM *data) +; + +%define data(b) (b) + 8 ; DCTELEM *data + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 + + align 32 + GLOBAL_FUNCTION(jsimd_fdct_ifast_mmx) + +EXTN(jsimd_fdct_ifast_mmx): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16, 7 +.rowloop: + + movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + movq mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)] + + ; mm0=(20 21 22 23), mm2=(24 25 26 27) + ; mm1=(30 31 32 33), mm3=(34 35 36 37) + + movq mm4, mm0 ; transpose coefficients(phase 1) + punpcklwd mm0, mm1 ; mm0=(20 30 21 31) + punpckhwd mm4, mm1 ; mm4=(22 32 23 33) + movq mm5, mm2 ; transpose coefficients(phase 1) + punpcklwd mm2, mm3 ; mm2=(24 34 25 35) + punpckhwd mm5, mm3 ; mm5=(26 36 27 37) + + movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)] + + ; mm6=(00 01 02 03), mm1=(04 05 06 07) + ; mm7=(10 11 12 13), mm3=(14 15 16 17) + + movq MMWORD [wk(0)], mm4 ; wk(0)=(22 32 23 33) + movq MMWORD [wk(1)], mm2 ; wk(1)=(24 34 25 35) + + movq mm4, mm6 ; transpose coefficients(phase 1) + punpcklwd mm6, mm7 ; mm6=(00 10 01 11) + punpckhwd mm4, mm7 ; mm4=(02 12 03 13) + movq mm2, mm1 ; transpose coefficients(phase 1) + punpcklwd mm1, mm3 ; mm1=(04 14 05 15) + punpckhwd mm2, mm3 ; mm2=(06 16 07 17) + + movq mm7, mm6 ; transpose coefficients(phase 2) + punpckldq mm6, mm0 ; mm6=(00 10 20 30)=data0 + punpckhdq mm7, mm0 ; mm7=(01 11 21 31)=data1 + movq mm3, mm2 ; transpose coefficients(phase 2) + punpckldq mm2, mm5 ; mm2=(06 16 26 36)=data6 + punpckhdq mm3, mm5 ; mm3=(07 17 27 37)=data7 + + movq mm0, mm7 + movq mm5, mm6 + psubw mm7, mm2 ; mm7=data1-data6=tmp6 + psubw mm6, mm3 ; mm6=data0-data7=tmp7 + paddw mm0, mm2 ; mm0=data1+data6=tmp1 + paddw mm5, mm3 ; mm5=data0+data7=tmp0 + + movq mm2, MMWORD [wk(0)] ; mm2=(22 32 23 33) + movq mm3, MMWORD [wk(1)] ; mm3=(24 34 25 35) + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 + + movq mm7, mm4 ; transpose coefficients(phase 2) + punpckldq mm4, mm2 ; mm4=(02 12 22 32)=data2 + punpckhdq mm7, mm2 ; mm7=(03 13 23 33)=data3 + movq mm6, mm1 ; transpose coefficients(phase 2) + punpckldq mm1, mm3 ; mm1=(04 14 24 34)=data4 + punpckhdq mm6, mm3 ; mm6=(05 15 25 35)=data5 + + movq mm2, mm7 + movq mm3, mm4 + paddw mm7, mm1 ; mm7=data3+data4=tmp3 + paddw mm4, mm6 ; mm4=data2+data5=tmp2 + psubw mm2, mm1 ; mm2=data3-data4=tmp4 + psubw mm3, mm6 ; mm3=data2-data5=tmp5 + + ; -- Even part + + movq mm1, mm5 + movq mm6, mm0 + psubw mm5, mm7 ; mm5=tmp13 + psubw mm0, mm4 ; mm0=tmp12 + paddw mm1, mm7 ; mm1=tmp10 + paddw mm6, mm4 ; mm6=tmp11 + + paddw mm0, mm5 + psllw mm0, PRE_MULTIPLY_SCALE_BITS + pmulhw mm0, [GOTOFF(ebx,PW_F0707)] ; mm0=z1 + + movq mm7, mm1 + movq mm4, mm5 + psubw mm1, mm6 ; mm1=data4 + psubw mm5, mm0 ; mm5=data6 + paddw mm7, mm6 ; mm7=data0 + paddw mm4, mm0 ; mm4=data2 + + movq MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7 + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 + + ; -- Odd part + + movq mm6, MMWORD [wk(0)] ; mm6=tmp6 + movq mm0, MMWORD [wk(1)] ; mm0=tmp7 + + paddw mm2, mm3 ; mm2=tmp10 + paddw mm3, mm6 ; mm3=tmp11 + paddw mm6, mm0 ; mm6=tmp12, mm0=tmp7 + + psllw mm2, PRE_MULTIPLY_SCALE_BITS + psllw mm6, PRE_MULTIPLY_SCALE_BITS + + psllw mm3, PRE_MULTIPLY_SCALE_BITS + pmulhw mm3, [GOTOFF(ebx,PW_F0707)] ; mm3=z3 + + movq mm1, mm2 ; mm1=tmp10 + psubw mm2, mm6 + pmulhw mm2, [GOTOFF(ebx,PW_F0382)] ; mm2=z5 + pmulhw mm1, [GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610) + pmulhw mm6, [GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296) + paddw mm1, mm2 ; mm1=z2 + paddw mm6, mm2 ; mm6=z4 + + movq mm5, mm0 + psubw mm0, mm3 ; mm0=z13 + paddw mm5, mm3 ; mm5=z11 + + movq mm7, mm0 + movq mm4, mm5 + psubw mm0, mm1 ; mm0=data3 + psubw mm5, mm6 ; mm5=data7 + paddw mm7, mm1 ; mm7=data5 + paddw mm4, mm6 ; mm4=data1 + + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm7 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4 + + add edx, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16, 7 +.columnloop: + + movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)] + + ; mm0=(02 12 22 32), mm2=(42 52 62 72) + ; mm1=(03 13 23 33), mm3=(43 53 63 73) + + movq mm4, mm0 ; transpose coefficients(phase 1) + punpcklwd mm0, mm1 ; mm0=(02 03 12 13) + punpckhwd mm4, mm1 ; mm4=(22 23 32 33) + movq mm5, mm2 ; transpose coefficients(phase 1) + punpcklwd mm2, mm3 ; mm2=(42 43 52 53) + punpckhwd mm5, mm3 ; mm5=(62 63 72 73) + + movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)] + + ; mm6=(00 10 20 30), mm1=(40 50 60 70) + ; mm7=(01 11 21 31), mm3=(41 51 61 71) + + movq MMWORD [wk(0)], mm4 ; wk(0)=(22 23 32 33) + movq MMWORD [wk(1)], mm2 ; wk(1)=(42 43 52 53) + + movq mm4, mm6 ; transpose coefficients(phase 1) + punpcklwd mm6, mm7 ; mm6=(00 01 10 11) + punpckhwd mm4, mm7 ; mm4=(20 21 30 31) + movq mm2, mm1 ; transpose coefficients(phase 1) + punpcklwd mm1, mm3 ; mm1=(40 41 50 51) + punpckhwd mm2, mm3 ; mm2=(60 61 70 71) + + movq mm7, mm6 ; transpose coefficients(phase 2) + punpckldq mm6, mm0 ; mm6=(00 01 02 03)=data0 + punpckhdq mm7, mm0 ; mm7=(10 11 12 13)=data1 + movq mm3, mm2 ; transpose coefficients(phase 2) + punpckldq mm2, mm5 ; mm2=(60 61 62 63)=data6 + punpckhdq mm3, mm5 ; mm3=(70 71 72 73)=data7 + + movq mm0, mm7 + movq mm5, mm6 + psubw mm7, mm2 ; mm7=data1-data6=tmp6 + psubw mm6, mm3 ; mm6=data0-data7=tmp7 + paddw mm0, mm2 ; mm0=data1+data6=tmp1 + paddw mm5, mm3 ; mm5=data0+data7=tmp0 + + movq mm2, MMWORD [wk(0)] ; mm2=(22 23 32 33) + movq mm3, MMWORD [wk(1)] ; mm3=(42 43 52 53) + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 + + movq mm7, mm4 ; transpose coefficients(phase 2) + punpckldq mm4, mm2 ; mm4=(20 21 22 23)=data2 + punpckhdq mm7, mm2 ; mm7=(30 31 32 33)=data3 + movq mm6, mm1 ; transpose coefficients(phase 2) + punpckldq mm1, mm3 ; mm1=(40 41 42 43)=data4 + punpckhdq mm6, mm3 ; mm6=(50 51 52 53)=data5 + + movq mm2, mm7 + movq mm3, mm4 + paddw mm7, mm1 ; mm7=data3+data4=tmp3 + paddw mm4, mm6 ; mm4=data2+data5=tmp2 + psubw mm2, mm1 ; mm2=data3-data4=tmp4 + psubw mm3, mm6 ; mm3=data2-data5=tmp5 + + ; -- Even part + + movq mm1, mm5 + movq mm6, mm0 + psubw mm5, mm7 ; mm5=tmp13 + psubw mm0, mm4 ; mm0=tmp12 + paddw mm1, mm7 ; mm1=tmp10 + paddw mm6, mm4 ; mm6=tmp11 + + paddw mm0, mm5 + psllw mm0, PRE_MULTIPLY_SCALE_BITS + pmulhw mm0, [GOTOFF(ebx,PW_F0707)] ; mm0=z1 + + movq mm7, mm1 + movq mm4, mm5 + psubw mm1, mm6 ; mm1=data4 + psubw mm5, mm0 ; mm5=data6 + paddw mm7, mm6 ; mm7=data0 + paddw mm4, mm0 ; mm4=data2 + + movq MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7 + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 + + ; -- Odd part + + movq mm6, MMWORD [wk(0)] ; mm6=tmp6 + movq mm0, MMWORD [wk(1)] ; mm0=tmp7 + + paddw mm2, mm3 ; mm2=tmp10 + paddw mm3, mm6 ; mm3=tmp11 + paddw mm6, mm0 ; mm6=tmp12, mm0=tmp7 + + psllw mm2, PRE_MULTIPLY_SCALE_BITS + psllw mm6, PRE_MULTIPLY_SCALE_BITS + + psllw mm3, PRE_MULTIPLY_SCALE_BITS + pmulhw mm3, [GOTOFF(ebx,PW_F0707)] ; mm3=z3 + + movq mm1, mm2 ; mm1=tmp10 + psubw mm2, mm6 + pmulhw mm2, [GOTOFF(ebx,PW_F0382)] ; mm2=z5 + pmulhw mm1, [GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610) + pmulhw mm6, [GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296) + paddw mm1, mm2 ; mm1=z2 + paddw mm6, mm2 ; mm6=z4 + + movq mm5, mm0 + psubw mm0, mm3 ; mm0=z13 + paddw mm5, mm3 ; mm5=z11 + + movq mm7, mm0 + movq mm4, mm5 + psubw mm0, mm1 ; mm0=data3 + psubw mm5, mm6 ; mm5=data7 + paddw mm7, mm1 ; mm7=data5 + paddw mm4, mm6 ; mm4=data1 + + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm7 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4 + + add edx, byte 4*SIZEOF_DCTELEM + dec ecx + jnz near .columnloop + + emms ; empty MMX state + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jfdctfst-sse2.asm b/simd/i386/jfdctfst-sse2.asm new file mode 100644 index 0000000..f09dadd --- /dev/null +++ b/simd/i386/jfdctfst-sse2.asm @@ -0,0 +1,405 @@ +; +; jfdctfst.asm - fast integer FDCT (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the forward DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. + +%if CONST_BITS == 8 +F_0_382 equ 98 ; FIX(0.382683433) +F_0_541 equ 139 ; FIX(0.541196100) +F_0_707 equ 181 ; FIX(0.707106781) +F_1_306 equ 334 ; FIX(1.306562965) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_0_382 equ DESCALE( 410903207, 30 - CONST_BITS) ; FIX(0.382683433) +F_0_541 equ DESCALE( 581104887, 30 - CONST_BITS) ; FIX(0.541196100) +F_0_707 equ DESCALE( 759250124, 30 - CONST_BITS) ; FIX(0.707106781) +F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS) ; FIX(1.306562965) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 32 + GLOBAL_DATA(jconst_fdct_ifast_sse2) + +EXTN(jconst_fdct_ifast_sse2): + +PW_F0707 times 8 dw F_0_707 << CONST_SHIFT +PW_F0382 times 8 dw F_0_382 << CONST_SHIFT +PW_F0541 times 8 dw F_0_541 << CONST_SHIFT +PW_F1306 times 8 dw F_1_306 << CONST_SHIFT + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_ifast_sse2(DCTELEM *data) +; + +%define data(b) (b) + 8 ; DCTELEM *data + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 32 + GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2) + +EXTN(jsimd_fdct_ifast_sse2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movdqa xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + + ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) + ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) + + movdqa xmm4, xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0, xmm1 ; xmm0=(00 10 01 11 02 12 03 13) + punpckhwd xmm4, xmm1 ; xmm4=(04 14 05 15 06 16 07 17) + movdqa xmm5, xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2, xmm3 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm5, xmm3 ; xmm5=(24 34 25 35 26 36 27 37) + + movdqa xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)] + + ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) + ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) + + movdqa xmm2, xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6, xmm7 ; xmm6=(40 50 41 51 42 52 43 53) + punpckhwd xmm2, xmm7 ; xmm2=(44 54 45 55 46 56 47 57) + movdqa xmm5, xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1, xmm3 ; xmm1=(60 70 61 71 62 72 63 73) + punpckhwd xmm5, xmm3 ; xmm5=(64 74 65 75 66 76 67 77) + + movdqa xmm7, xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6, xmm1 ; xmm6=(40 50 60 70 41 51 61 71) + punpckhdq xmm7, xmm1 ; xmm7=(42 52 62 72 43 53 63 73) + movdqa xmm3, xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2, xmm5 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm3, xmm5 ; xmm3=(46 56 66 76 47 57 67 77) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(42 52 62 72 43 53 63 73) + movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=(44 54 64 74 45 55 65 75) + + movdqa xmm7, xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0, xmm1 ; xmm0=(00 10 20 30 01 11 21 31) + punpckhdq xmm7, xmm1 ; xmm7=(02 12 22 32 03 13 23 33) + movdqa xmm2, xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4, xmm5 ; xmm4=(04 14 24 34 05 15 25 35) + punpckhdq xmm2, xmm5 ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa xmm1, xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0, xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 + punpckhqdq xmm1, xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 + movdqa xmm5, xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2, xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 + punpckhqdq xmm5, xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 + + movdqa xmm6, xmm1 + movdqa xmm3, xmm0 + psubw xmm1, xmm2 ; xmm1=data1-data6=tmp6 + psubw xmm0, xmm5 ; xmm0=data0-data7=tmp7 + paddw xmm6, xmm2 ; xmm6=data1+data6=tmp1 + paddw xmm3, xmm5 ; xmm3=data0+data7=tmp0 + + movdqa xmm2, XMMWORD [wk(0)] ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(44 54 64 74 45 55 65 75) + movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 + + movdqa xmm1, xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7, xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 + punpckhqdq xmm1, xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 + movdqa xmm0, xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4, xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 + punpckhqdq xmm0, xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 + + movdqa xmm2, xmm1 + movdqa xmm5, xmm7 + paddw xmm1, xmm4 ; xmm1=data3+data4=tmp3 + paddw xmm7, xmm0 ; xmm7=data2+data5=tmp2 + psubw xmm2, xmm4 ; xmm2=data3-data4=tmp4 + psubw xmm5, xmm0 ; xmm5=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4, xmm3 + movdqa xmm0, xmm6 + psubw xmm3, xmm1 ; xmm3=tmp13 + psubw xmm6, xmm7 ; xmm6=tmp12 + paddw xmm4, xmm1 ; xmm4=tmp10 + paddw xmm0, xmm7 ; xmm0=tmp11 + + paddw xmm6, xmm3 + psllw xmm6, PRE_MULTIPLY_SCALE_BITS + pmulhw xmm6, [GOTOFF(ebx,PW_F0707)] ; xmm6=z1 + + movdqa xmm1, xmm4 + movdqa xmm7, xmm3 + psubw xmm4, xmm0 ; xmm4=data4 + psubw xmm3, xmm6 ; xmm3=data6 + paddw xmm1, xmm0 ; xmm1=data0 + paddw xmm7, xmm6 ; xmm7=data2 + + movdqa xmm0, XMMWORD [wk(0)] ; xmm0=tmp6 + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp7 + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=data4 + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=data6 + + ; -- Odd part + + paddw xmm2, xmm5 ; xmm2=tmp10 + paddw xmm5, xmm0 ; xmm5=tmp11 + paddw xmm0, xmm6 ; xmm0=tmp12, xmm6=tmp7 + + psllw xmm2, PRE_MULTIPLY_SCALE_BITS + psllw xmm0, PRE_MULTIPLY_SCALE_BITS + + psllw xmm5, PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5, [GOTOFF(ebx,PW_F0707)] ; xmm5=z3 + + movdqa xmm4, xmm2 ; xmm4=tmp10 + psubw xmm2, xmm0 + pmulhw xmm2, [GOTOFF(ebx,PW_F0382)] ; xmm2=z5 + pmulhw xmm4, [GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) + pmulhw xmm0, [GOTOFF(ebx,PW_F1306)] ; xmm0=MULTIPLY(tmp12,FIX_1_306562) + paddw xmm4, xmm2 ; xmm4=z2 + paddw xmm0, xmm2 ; xmm0=z4 + + movdqa xmm3, xmm6 + psubw xmm6, xmm5 ; xmm6=z13 + paddw xmm3, xmm5 ; xmm3=z11 + + movdqa xmm2, xmm6 + movdqa xmm5, xmm3 + psubw xmm6, xmm4 ; xmm6=data3 + psubw xmm3, xmm0 ; xmm3=data7 + paddw xmm2, xmm4 ; xmm2=data5 + paddw xmm5, xmm0 ; xmm5=data1 + + ; ---- Pass 2: process columns. + +; mov edx, POINTER [data(eax)] ; (DCTELEM *) + + ; xmm1=(00 10 20 30 40 50 60 70), xmm7=(02 12 22 32 42 52 62 72) + ; xmm5=(01 11 21 31 41 51 61 71), xmm6=(03 13 23 33 43 53 63 73) + + movdqa xmm4, xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1, xmm5 ; xmm1=(00 01 10 11 20 21 30 31) + punpckhwd xmm4, xmm5 ; xmm4=(40 41 50 51 60 61 70 71) + movdqa xmm0, xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7, xmm6 ; xmm7=(02 03 12 13 22 23 32 33) + punpckhwd xmm0, xmm6 ; xmm0=(42 43 52 53 62 63 72 73) + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=col4 + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=col6 + + ; xmm5=(04 14 24 34 44 54 64 74), xmm6=(06 16 26 36 46 56 66 76) + ; xmm2=(05 15 25 35 45 55 65 75), xmm3=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(02 03 12 13 22 23 32 33) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(42 43 52 53 62 63 72 73) + + movdqa xmm7, xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5, xmm2 ; xmm5=(04 05 14 15 24 25 34 35) + punpckhwd xmm7, xmm2 ; xmm7=(44 45 54 55 64 65 74 75) + movdqa xmm0, xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6, xmm3 ; xmm6=(06 07 16 17 26 27 36 37) + punpckhwd xmm0, xmm3 ; xmm0=(46 47 56 57 66 67 76 77) + + movdqa xmm2, xmm5 ; transpose coefficients(phase 2) + punpckldq xmm5, xmm6 ; xmm5=(04 05 06 07 14 15 16 17) + punpckhdq xmm2, xmm6 ; xmm2=(24 25 26 27 34 35 36 37) + movdqa xmm3, xmm7 ; transpose coefficients(phase 2) + punpckldq xmm7, xmm0 ; xmm7=(44 45 46 47 54 55 56 57) + punpckhdq xmm3, xmm0 ; xmm3=(64 65 66 67 74 75 76 77) + + movdqa xmm6, XMMWORD [wk(0)] ; xmm6=(02 03 12 13 22 23 32 33) + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(42 43 52 53 62 63 72 73) + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(24 25 26 27 34 35 36 37) + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(44 45 46 47 54 55 56 57) + + movdqa xmm2, xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1, xmm6 ; xmm1=(00 01 02 03 10 11 12 13) + punpckhdq xmm2, xmm6 ; xmm2=(20 21 22 23 30 31 32 33) + movdqa xmm7, xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4, xmm0 ; xmm4=(40 41 42 43 50 51 52 53) + punpckhdq xmm7, xmm0 ; xmm7=(60 61 62 63 70 71 72 73) + + movdqa xmm6, xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1, xmm5 ; xmm1=(00 01 02 03 04 05 06 07)=data0 + punpckhqdq xmm6, xmm5 ; xmm6=(10 11 12 13 14 15 16 17)=data1 + movdqa xmm0, xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7, xmm3 ; xmm7=(60 61 62 63 64 65 66 67)=data6 + punpckhqdq xmm0, xmm3 ; xmm0=(70 71 72 73 74 75 76 77)=data7 + + movdqa xmm5, xmm6 + movdqa xmm3, xmm1 + psubw xmm6, xmm7 ; xmm6=data1-data6=tmp6 + psubw xmm1, xmm0 ; xmm1=data0-data7=tmp7 + paddw xmm5, xmm7 ; xmm5=data1+data6=tmp1 + paddw xmm3, xmm0 ; xmm3=data0+data7=tmp0 + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(24 25 26 27 34 35 36 37) + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(44 45 46 47 54 55 56 57) + movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=tmp7 + + movdqa xmm6, xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2, xmm7 ; xmm2=(20 21 22 23 24 25 26 27)=data2 + punpckhqdq xmm6, xmm7 ; xmm6=(30 31 32 33 34 35 36 37)=data3 + movdqa xmm1, xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4, xmm0 ; xmm4=(40 41 42 43 44 45 46 47)=data4 + punpckhqdq xmm1, xmm0 ; xmm1=(50 51 52 53 54 55 56 57)=data5 + + movdqa xmm7, xmm6 + movdqa xmm0, xmm2 + paddw xmm6, xmm4 ; xmm6=data3+data4=tmp3 + paddw xmm2, xmm1 ; xmm2=data2+data5=tmp2 + psubw xmm7, xmm4 ; xmm7=data3-data4=tmp4 + psubw xmm0, xmm1 ; xmm0=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4, xmm3 + movdqa xmm1, xmm5 + psubw xmm3, xmm6 ; xmm3=tmp13 + psubw xmm5, xmm2 ; xmm5=tmp12 + paddw xmm4, xmm6 ; xmm4=tmp10 + paddw xmm1, xmm2 ; xmm1=tmp11 + + paddw xmm5, xmm3 + psllw xmm5, PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5, [GOTOFF(ebx,PW_F0707)] ; xmm5=z1 + + movdqa xmm6, xmm4 + movdqa xmm2, xmm3 + psubw xmm4, xmm1 ; xmm4=data4 + psubw xmm3, xmm5 ; xmm3=data6 + paddw xmm6, xmm1 ; xmm6=data0 + paddw xmm2, xmm5 ; xmm2=data2 + + movdqa XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm3 + movdqa XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm6 + movdqa XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm2 + + ; -- Odd part + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=tmp6 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 + + paddw xmm7, xmm0 ; xmm7=tmp10 + paddw xmm0, xmm1 ; xmm0=tmp11 + paddw xmm1, xmm5 ; xmm1=tmp12, xmm5=tmp7 + + psllw xmm7, PRE_MULTIPLY_SCALE_BITS + psllw xmm1, PRE_MULTIPLY_SCALE_BITS + + psllw xmm0, PRE_MULTIPLY_SCALE_BITS + pmulhw xmm0, [GOTOFF(ebx,PW_F0707)] ; xmm0=z3 + + movdqa xmm4, xmm7 ; xmm4=tmp10 + psubw xmm7, xmm1 + pmulhw xmm7, [GOTOFF(ebx,PW_F0382)] ; xmm7=z5 + pmulhw xmm4, [GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) + pmulhw xmm1, [GOTOFF(ebx,PW_F1306)] ; xmm1=MULTIPLY(tmp12,FIX_1_306562) + paddw xmm4, xmm7 ; xmm4=z2 + paddw xmm1, xmm7 ; xmm1=z4 + + movdqa xmm3, xmm5 + psubw xmm5, xmm0 ; xmm5=z13 + paddw xmm3, xmm0 ; xmm3=z11 + + movdqa xmm6, xmm5 + movdqa xmm2, xmm3 + psubw xmm5, xmm4 ; xmm5=data3 + psubw xmm3, xmm1 ; xmm3=data7 + paddw xmm6, xmm4 ; xmm6=data5 + paddw xmm2, xmm1 ; xmm2=data1 + + movdqa XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm5 + movdqa XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm3 + movdqa XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm6 + movdqa XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm2 + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jfdctint-avx2.asm b/simd/i386/jfdctint-avx2.asm new file mode 100644 index 0000000..ae258ee --- /dev/null +++ b/simd/i386/jfdctint-avx2.asm @@ -0,0 +1,333 @@ +; +; jfdctint.asm - accurate integer FDCT (AVX2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, 2018, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; forward DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jfdctint.c; see the jfdctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS - PASS1_BITS) +%define DESCALE_P2 (CONST_BITS + PASS1_BITS) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_0_298 equ DESCALE( 320652955, 30 - CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276, 30 - CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887, 30 - CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413, 30 - CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111, 30 - CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813, 30 - CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267, 30 - CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350, 30 - CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- +; In-place 8x8x16-bit matrix transpose using AVX2 instructions +; %1-%4: Input/output registers +; %5-%8: Temp registers + +%macro dotranspose 8 + ; %1=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47) + ; %2=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57) + ; %3=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67) + ; %4=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77) + + vpunpcklwd %5, %1, %2 + vpunpckhwd %6, %1, %2 + vpunpcklwd %7, %3, %4 + vpunpckhwd %8, %3, %4 + ; transpose coefficients(phase 1) + ; %5=(00 10 01 11 02 12 03 13 40 50 41 51 42 52 43 53) + ; %6=(04 14 05 15 06 16 07 17 44 54 45 55 46 56 47 57) + ; %7=(20 30 21 31 22 32 23 33 60 70 61 71 62 72 63 73) + ; %8=(24 34 25 35 26 36 27 37 64 74 65 75 66 76 67 77) + + vpunpckldq %1, %5, %7 + vpunpckhdq %2, %5, %7 + vpunpckldq %3, %6, %8 + vpunpckhdq %4, %6, %8 + ; transpose coefficients(phase 2) + ; %1=(00 10 20 30 01 11 21 31 40 50 60 70 41 51 61 71) + ; %2=(02 12 22 32 03 13 23 33 42 52 62 72 43 53 63 73) + ; %3=(04 14 24 34 05 15 25 35 44 54 64 74 45 55 65 75) + ; %4=(06 16 26 36 07 17 27 37 46 56 66 76 47 57 67 77) + + vpermq %1, %1, 0x8D + vpermq %2, %2, 0x8D + vpermq %3, %3, 0xD8 + vpermq %4, %4, 0xD8 + ; transpose coefficients(phase 3) + ; %1=(01 11 21 31 41 51 61 71 00 10 20 30 40 50 60 70) + ; %2=(03 13 23 33 43 53 63 73 02 12 22 32 42 52 62 72) + ; %3=(04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75) + ; %4=(06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77) +%endmacro + +; -------------------------------------------------------------------------- +; In-place 8x8x16-bit slow integer forward DCT using AVX2 instructions +; %1-%4: Input/output registers +; %5-%8: Temp registers +; %9: Pass (1 or 2) + +%macro dodct 9 + vpsubw %5, %1, %4 ; %5=data1_0-data6_7=tmp6_7 + vpaddw %6, %1, %4 ; %6=data1_0+data6_7=tmp1_0 + vpaddw %7, %2, %3 ; %7=data3_2+data4_5=tmp3_2 + vpsubw %8, %2, %3 ; %8=data3_2-data4_5=tmp4_5 + + ; -- Even part + + vperm2i128 %6, %6, %6, 0x01 ; %6=tmp0_1 + vpaddw %1, %6, %7 ; %1=tmp0_1+tmp3_2=tmp10_11 + vpsubw %6, %6, %7 ; %6=tmp0_1-tmp3_2=tmp13_12 + + vperm2i128 %7, %1, %1, 0x01 ; %7=tmp11_10 + vpsignw %1, %1, [GOTOFF(ebx, PW_1_NEG1)] ; %1=tmp10_neg11 + vpaddw %7, %7, %1 ; %7=(tmp10+tmp11)_(tmp10-tmp11) +%if %9 == 1 + vpsllw %1, %7, PASS1_BITS ; %1=data0_4 +%else + vpaddw %7, %7, [GOTOFF(ebx, PW_DESCALE_P2X)] + vpsraw %1, %7, PASS1_BITS ; %1=data0_4 +%endif + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + vperm2i128 %7, %6, %6, 0x01 ; %7=tmp12_13 + vpunpcklwd %2, %6, %7 + vpunpckhwd %6, %6, %7 + vpmaddwd %2, %2, [GOTOFF(ebx, PW_F130_F054_MF130_F054)] ; %2=data2_6L + vpmaddwd %6, %6, [GOTOFF(ebx, PW_F130_F054_MF130_F054)] ; %6=data2_6H + + vpaddd %2, %2, [GOTOFF(ebx, PD_DESCALE_P %+ %9)] + vpaddd %6, %6, [GOTOFF(ebx, PD_DESCALE_P %+ %9)] + vpsrad %2, %2, DESCALE_P %+ %9 + vpsrad %6, %6, DESCALE_P %+ %9 + + vpackssdw %3, %2, %6 ; %6=data2_6 + + ; -- Odd part + + vpaddw %7, %8, %5 ; %7=tmp4_5+tmp6_7=z3_4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + vperm2i128 %2, %7, %7, 0x01 ; %2=z4_3 + vpunpcklwd %6, %7, %2 + vpunpckhwd %7, %7, %2 + vpmaddwd %6, %6, [GOTOFF(ebx, PW_MF078_F117_F078_F117)] ; %6=z3_4L + vpmaddwd %7, %7, [GOTOFF(ebx, PW_MF078_F117_F078_F117)] ; %7=z3_4H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + vperm2i128 %4, %5, %5, 0x01 ; %4=tmp7_6 + vpunpcklwd %2, %8, %4 + vpunpckhwd %4, %8, %4 + vpmaddwd %2, %2, [GOTOFF(ebx, PW_MF060_MF089_MF050_MF256)] ; %2=tmp4_5L + vpmaddwd %4, %4, [GOTOFF(ebx, PW_MF060_MF089_MF050_MF256)] ; %4=tmp4_5H + + vpaddd %2, %2, %6 ; %2=data7_5L + vpaddd %4, %4, %7 ; %4=data7_5H + + vpaddd %2, %2, [GOTOFF(ebx, PD_DESCALE_P %+ %9)] + vpaddd %4, %4, [GOTOFF(ebx, PD_DESCALE_P %+ %9)] + vpsrad %2, %2, DESCALE_P %+ %9 + vpsrad %4, %4, DESCALE_P %+ %9 + + vpackssdw %4, %2, %4 ; %4=data7_5 + + vperm2i128 %2, %8, %8, 0x01 ; %2=tmp5_4 + vpunpcklwd %8, %5, %2 + vpunpckhwd %5, %5, %2 + vpmaddwd %8, %8, [GOTOFF(ebx, PW_F050_MF256_F060_MF089)] ; %8=tmp6_7L + vpmaddwd %5, %5, [GOTOFF(ebx, PW_F050_MF256_F060_MF089)] ; %5=tmp6_7H + + vpaddd %8, %8, %6 ; %8=data3_1L + vpaddd %5, %5, %7 ; %5=data3_1H + + vpaddd %8, %8, [GOTOFF(ebx, PD_DESCALE_P %+ %9)] + vpaddd %5, %5, [GOTOFF(ebx, PD_DESCALE_P %+ %9)] + vpsrad %8, %8, DESCALE_P %+ %9 + vpsrad %5, %5, DESCALE_P %+ %9 + + vpackssdw %2, %8, %5 ; %2=data3_1 +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_fdct_islow_avx2) + +EXTN(jconst_fdct_islow_avx2): + +PW_F130_F054_MF130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541 + times 4 dw (F_0_541 - F_1_847), F_0_541 +PW_MF078_F117_F078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175 + times 4 dw (F_1_175 - F_0_390), F_1_175 +PW_MF060_MF089_MF050_MF256 times 4 dw (F_0_298 - F_0_899), -F_0_899 + times 4 dw (F_2_053 - F_2_562), -F_2_562 +PW_F050_MF256_F060_MF089 times 4 dw (F_3_072 - F_2_562), -F_2_562 + times 4 dw (F_1_501 - F_0_899), -F_0_899 +PD_DESCALE_P1 times 8 dd 1 << (DESCALE_P1 - 1) +PD_DESCALE_P2 times 8 dd 1 << (DESCALE_P2 - 1) +PW_DESCALE_P2X times 16 dw 1 << (PASS1_BITS - 1) +PW_1_NEG1 times 8 dw 1 + times 8 dw -1 + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_islow_avx2(DCTELEM *data) +; + +%define data(b) (b) + 8 ; DCTELEM *data + + align 32 + GLOBAL_FUNCTION(jsimd_fdct_islow_avx2) + +EXTN(jsimd_fdct_islow_avx2): + push ebp + mov ebp, esp + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(ebp)] ; (DCTELEM *) + + vmovdqu ymm4, YMMWORD [YMMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + vmovdqu ymm5, YMMWORD [YMMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + vmovdqu ymm6, YMMWORD [YMMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + vmovdqu ymm7, YMMWORD [YMMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + ; ymm4=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + ; ymm5=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + ; ymm6=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + ; ymm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + vperm2i128 ymm0, ymm4, ymm6, 0x20 + vperm2i128 ymm1, ymm4, ymm6, 0x31 + vperm2i128 ymm2, ymm5, ymm7, 0x20 + vperm2i128 ymm3, ymm5, ymm7, 0x31 + ; ymm0=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47) + ; ymm1=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57) + ; ymm2=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67) + ; ymm3=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77) + + dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7 + + dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1 + ; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm3=data7_5 + + ; ---- Pass 2: process columns. + + vperm2i128 ymm4, ymm1, ymm3, 0x20 ; ymm4=data3_7 + vperm2i128 ymm1, ymm1, ymm3, 0x31 ; ymm1=data1_5 + + dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7 + + dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2 + ; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm4=data7_5 + + vperm2i128 ymm3, ymm0, ymm1, 0x30 ; ymm3=data0_1 + vperm2i128 ymm5, ymm2, ymm1, 0x20 ; ymm5=data2_3 + vperm2i128 ymm6, ymm0, ymm4, 0x31 ; ymm6=data4_5 + vperm2i128 ymm7, ymm2, ymm4, 0x21 ; ymm7=data6_7 + + vmovdqu YMMWORD [YMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], ymm3 + vmovdqu YMMWORD [YMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], ymm5 + vmovdqu YMMWORD [YMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], ymm6 + vmovdqu YMMWORD [YMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], ymm7 + + vzeroupper +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jfdctint-mmx.asm b/simd/i386/jfdctint-mmx.asm new file mode 100644 index 0000000..c6bd959 --- /dev/null +++ b/simd/i386/jfdctint-mmx.asm @@ -0,0 +1,622 @@ +; +; jfdctint.asm - accurate integer FDCT (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; forward DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jfdctint.c; see the jfdctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS - PASS1_BITS) +%define DESCALE_P2 (CONST_BITS + PASS1_BITS) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_0_298 equ DESCALE( 320652955, 30 - CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276, 30 - CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887, 30 - CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413, 30 - CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111, 30 - CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813, 30 - CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267, 30 - CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350, 30 - CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_fdct_islow_mmx) + +EXTN(jconst_fdct_islow_mmx): + +PW_F130_F054 times 2 dw (F_0_541 + F_0_765), F_0_541 +PW_F054_MF130 times 2 dw F_0_541, (F_0_541 - F_1_847) +PW_MF078_F117 times 2 dw (F_1_175 - F_1_961), F_1_175 +PW_F117_F078 times 2 dw F_1_175, (F_1_175 - F_0_390) +PW_MF060_MF089 times 2 dw (F_0_298 - F_0_899), -F_0_899 +PW_MF089_F060 times 2 dw -F_0_899, (F_1_501 - F_0_899) +PW_MF050_MF256 times 2 dw (F_2_053 - F_2_562), -F_2_562 +PW_MF256_F050 times 2 dw -F_2_562, (F_3_072 - F_2_562) +PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1 - 1) +PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2 - 1) +PW_DESCALE_P2X times 4 dw 1 << (PASS1_BITS - 1) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_islow_mmx(DCTELEM *data) +; + +%define data(b) (b) + 8 ; DCTELEM *data + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_MMWORD ; mmword wk[WK_NUM] +%define WK_NUM 2 + + align 32 + GLOBAL_FUNCTION(jsimd_fdct_islow_mmx) + +EXTN(jsimd_fdct_islow_mmx): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16, 7 +.rowloop: + + movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + movq mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)] + + ; mm0=(20 21 22 23), mm2=(24 25 26 27) + ; mm1=(30 31 32 33), mm3=(34 35 36 37) + + movq mm4, mm0 ; transpose coefficients(phase 1) + punpcklwd mm0, mm1 ; mm0=(20 30 21 31) + punpckhwd mm4, mm1 ; mm4=(22 32 23 33) + movq mm5, mm2 ; transpose coefficients(phase 1) + punpcklwd mm2, mm3 ; mm2=(24 34 25 35) + punpckhwd mm5, mm3 ; mm5=(26 36 27 37) + + movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)] + + ; mm6=(00 01 02 03), mm1=(04 05 06 07) + ; mm7=(10 11 12 13), mm3=(14 15 16 17) + + movq MMWORD [wk(0)], mm4 ; wk(0)=(22 32 23 33) + movq MMWORD [wk(1)], mm2 ; wk(1)=(24 34 25 35) + + movq mm4, mm6 ; transpose coefficients(phase 1) + punpcklwd mm6, mm7 ; mm6=(00 10 01 11) + punpckhwd mm4, mm7 ; mm4=(02 12 03 13) + movq mm2, mm1 ; transpose coefficients(phase 1) + punpcklwd mm1, mm3 ; mm1=(04 14 05 15) + punpckhwd mm2, mm3 ; mm2=(06 16 07 17) + + movq mm7, mm6 ; transpose coefficients(phase 2) + punpckldq mm6, mm0 ; mm6=(00 10 20 30)=data0 + punpckhdq mm7, mm0 ; mm7=(01 11 21 31)=data1 + movq mm3, mm2 ; transpose coefficients(phase 2) + punpckldq mm2, mm5 ; mm2=(06 16 26 36)=data6 + punpckhdq mm3, mm5 ; mm3=(07 17 27 37)=data7 + + movq mm0, mm7 + movq mm5, mm6 + psubw mm7, mm2 ; mm7=data1-data6=tmp6 + psubw mm6, mm3 ; mm6=data0-data7=tmp7 + paddw mm0, mm2 ; mm0=data1+data6=tmp1 + paddw mm5, mm3 ; mm5=data0+data7=tmp0 + + movq mm2, MMWORD [wk(0)] ; mm2=(22 32 23 33) + movq mm3, MMWORD [wk(1)] ; mm3=(24 34 25 35) + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 + + movq mm7, mm4 ; transpose coefficients(phase 2) + punpckldq mm4, mm2 ; mm4=(02 12 22 32)=data2 + punpckhdq mm7, mm2 ; mm7=(03 13 23 33)=data3 + movq mm6, mm1 ; transpose coefficients(phase 2) + punpckldq mm1, mm3 ; mm1=(04 14 24 34)=data4 + punpckhdq mm6, mm3 ; mm6=(05 15 25 35)=data5 + + movq mm2, mm7 + movq mm3, mm4 + paddw mm7, mm1 ; mm7=data3+data4=tmp3 + paddw mm4, mm6 ; mm4=data2+data5=tmp2 + psubw mm2, mm1 ; mm2=data3-data4=tmp4 + psubw mm3, mm6 ; mm3=data2-data5=tmp5 + + ; -- Even part + + movq mm1, mm5 + movq mm6, mm0 + paddw mm5, mm7 ; mm5=tmp10 + paddw mm0, mm4 ; mm0=tmp11 + psubw mm1, mm7 ; mm1=tmp13 + psubw mm6, mm4 ; mm6=tmp12 + + movq mm7, mm5 + paddw mm5, mm0 ; mm5=tmp10+tmp11 + psubw mm7, mm0 ; mm7=tmp10-tmp11 + + psllw mm5, PASS1_BITS ; mm5=data0 + psllw mm7, PASS1_BITS ; mm7=data4 + + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm7 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movq mm4, mm1 ; mm1=tmp13 + movq mm0, mm1 + punpcklwd mm4, mm6 ; mm6=tmp12 + punpckhwd mm0, mm6 + movq mm1, mm4 + movq mm6, mm0 + pmaddwd mm4, [GOTOFF(ebx,PW_F130_F054)] ; mm4=data2L + pmaddwd mm0, [GOTOFF(ebx,PW_F130_F054)] ; mm0=data2H + pmaddwd mm1, [GOTOFF(ebx,PW_F054_MF130)] ; mm1=data6L + pmaddwd mm6, [GOTOFF(ebx,PW_F054_MF130)] ; mm6=data6H + + paddd mm4, [GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm0, [GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm4, DESCALE_P1 + psrad mm0, DESCALE_P1 + paddd mm1, [GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm6, [GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm1, DESCALE_P1 + psrad mm6, DESCALE_P1 + + packssdw mm4, mm0 ; mm4=data2 + packssdw mm1, mm6 ; mm1=data6 + + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm1 + + ; -- Odd part + + movq mm5, MMWORD [wk(0)] ; mm5=tmp6 + movq mm7, MMWORD [wk(1)] ; mm7=tmp7 + + movq mm0, mm2 ; mm2=tmp4 + movq mm6, mm3 ; mm3=tmp5 + paddw mm0, mm5 ; mm0=z3 + paddw mm6, mm7 ; mm6=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movq mm4, mm0 + movq mm1, mm0 + punpcklwd mm4, mm6 + punpckhwd mm1, mm6 + movq mm0, mm4 + movq mm6, mm1 + pmaddwd mm4, [GOTOFF(ebx,PW_MF078_F117)] ; mm4=z3L + pmaddwd mm1, [GOTOFF(ebx,PW_MF078_F117)] ; mm1=z3H + pmaddwd mm0, [GOTOFF(ebx,PW_F117_F078)] ; mm0=z4L + pmaddwd mm6, [GOTOFF(ebx,PW_F117_F078)] ; mm6=z4H + + movq MMWORD [wk(0)], mm4 ; wk(0)=z3L + movq MMWORD [wk(1)], mm1 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movq mm4, mm2 + movq mm1, mm2 + punpcklwd mm4, mm7 + punpckhwd mm1, mm7 + movq mm2, mm4 + movq mm7, mm1 + pmaddwd mm4, [GOTOFF(ebx,PW_MF060_MF089)] ; mm4=tmp4L + pmaddwd mm1, [GOTOFF(ebx,PW_MF060_MF089)] ; mm1=tmp4H + pmaddwd mm2, [GOTOFF(ebx,PW_MF089_F060)] ; mm2=tmp7L + pmaddwd mm7, [GOTOFF(ebx,PW_MF089_F060)] ; mm7=tmp7H + + paddd mm4, MMWORD [wk(0)] ; mm4=data7L + paddd mm1, MMWORD [wk(1)] ; mm1=data7H + paddd mm2, mm0 ; mm2=data1L + paddd mm7, mm6 ; mm7=data1H + + paddd mm4, [GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm1, [GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm4, DESCALE_P1 + psrad mm1, DESCALE_P1 + paddd mm2, [GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm7, [GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm2, DESCALE_P1 + psrad mm7, DESCALE_P1 + + packssdw mm4, mm1 ; mm4=data7 + packssdw mm2, mm7 ; mm2=data1 + + movq MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2 + + movq mm1, mm3 + movq mm7, mm3 + punpcklwd mm1, mm5 + punpckhwd mm7, mm5 + movq mm3, mm1 + movq mm5, mm7 + pmaddwd mm1, [GOTOFF(ebx,PW_MF050_MF256)] ; mm1=tmp5L + pmaddwd mm7, [GOTOFF(ebx,PW_MF050_MF256)] ; mm7=tmp5H + pmaddwd mm3, [GOTOFF(ebx,PW_MF256_F050)] ; mm3=tmp6L + pmaddwd mm5, [GOTOFF(ebx,PW_MF256_F050)] ; mm5=tmp6H + + paddd mm1, mm0 ; mm1=data5L + paddd mm7, mm6 ; mm7=data5H + paddd mm3, MMWORD [wk(0)] ; mm3=data3L + paddd mm5, MMWORD [wk(1)] ; mm5=data3H + + paddd mm1, [GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm7, [GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm1, DESCALE_P1 + psrad mm7, DESCALE_P1 + paddd mm3, [GOTOFF(ebx,PD_DESCALE_P1)] + paddd mm5, [GOTOFF(ebx,PD_DESCALE_P1)] + psrad mm3, DESCALE_P1 + psrad mm5, DESCALE_P1 + + packssdw mm1, mm7 ; mm1=data5 + packssdw mm3, mm5 ; mm3=data3 + + movq MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3 + + add edx, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16, 7 +.columnloop: + + movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)] + + ; mm0=(02 12 22 32), mm2=(42 52 62 72) + ; mm1=(03 13 23 33), mm3=(43 53 63 73) + + movq mm4, mm0 ; transpose coefficients(phase 1) + punpcklwd mm0, mm1 ; mm0=(02 03 12 13) + punpckhwd mm4, mm1 ; mm4=(22 23 32 33) + movq mm5, mm2 ; transpose coefficients(phase 1) + punpcklwd mm2, mm3 ; mm2=(42 43 52 53) + punpckhwd mm5, mm3 ; mm5=(62 63 72 73) + + movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movq mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)] + + ; mm6=(00 10 20 30), mm1=(40 50 60 70) + ; mm7=(01 11 21 31), mm3=(41 51 61 71) + + movq MMWORD [wk(0)], mm4 ; wk(0)=(22 23 32 33) + movq MMWORD [wk(1)], mm2 ; wk(1)=(42 43 52 53) + + movq mm4, mm6 ; transpose coefficients(phase 1) + punpcklwd mm6, mm7 ; mm6=(00 01 10 11) + punpckhwd mm4, mm7 ; mm4=(20 21 30 31) + movq mm2, mm1 ; transpose coefficients(phase 1) + punpcklwd mm1, mm3 ; mm1=(40 41 50 51) + punpckhwd mm2, mm3 ; mm2=(60 61 70 71) + + movq mm7, mm6 ; transpose coefficients(phase 2) + punpckldq mm6, mm0 ; mm6=(00 01 02 03)=data0 + punpckhdq mm7, mm0 ; mm7=(10 11 12 13)=data1 + movq mm3, mm2 ; transpose coefficients(phase 2) + punpckldq mm2, mm5 ; mm2=(60 61 62 63)=data6 + punpckhdq mm3, mm5 ; mm3=(70 71 72 73)=data7 + + movq mm0, mm7 + movq mm5, mm6 + psubw mm7, mm2 ; mm7=data1-data6=tmp6 + psubw mm6, mm3 ; mm6=data0-data7=tmp7 + paddw mm0, mm2 ; mm0=data1+data6=tmp1 + paddw mm5, mm3 ; mm5=data0+data7=tmp0 + + movq mm2, MMWORD [wk(0)] ; mm2=(22 23 32 33) + movq mm3, MMWORD [wk(1)] ; mm3=(42 43 52 53) + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 + movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 + + movq mm7, mm4 ; transpose coefficients(phase 2) + punpckldq mm4, mm2 ; mm4=(20 21 22 23)=data2 + punpckhdq mm7, mm2 ; mm7=(30 31 32 33)=data3 + movq mm6, mm1 ; transpose coefficients(phase 2) + punpckldq mm1, mm3 ; mm1=(40 41 42 43)=data4 + punpckhdq mm6, mm3 ; mm6=(50 51 52 53)=data5 + + movq mm2, mm7 + movq mm3, mm4 + paddw mm7, mm1 ; mm7=data3+data4=tmp3 + paddw mm4, mm6 ; mm4=data2+data5=tmp2 + psubw mm2, mm1 ; mm2=data3-data4=tmp4 + psubw mm3, mm6 ; mm3=data2-data5=tmp5 + + ; -- Even part + + movq mm1, mm5 + movq mm6, mm0 + paddw mm5, mm7 ; mm5=tmp10 + paddw mm0, mm4 ; mm0=tmp11 + psubw mm1, mm7 ; mm1=tmp13 + psubw mm6, mm4 ; mm6=tmp12 + + movq mm7, mm5 + paddw mm5, mm0 ; mm5=tmp10+tmp11 + psubw mm7, mm0 ; mm7=tmp10-tmp11 + + paddw mm5, [GOTOFF(ebx,PW_DESCALE_P2X)] + paddw mm7, [GOTOFF(ebx,PW_DESCALE_P2X)] + psraw mm5, PASS1_BITS ; mm5=data0 + psraw mm7, PASS1_BITS ; mm7=data4 + + movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5 + movq MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm7 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movq mm4, mm1 ; mm1=tmp13 + movq mm0, mm1 + punpcklwd mm4, mm6 ; mm6=tmp12 + punpckhwd mm0, mm6 + movq mm1, mm4 + movq mm6, mm0 + pmaddwd mm4, [GOTOFF(ebx,PW_F130_F054)] ; mm4=data2L + pmaddwd mm0, [GOTOFF(ebx,PW_F130_F054)] ; mm0=data2H + pmaddwd mm1, [GOTOFF(ebx,PW_F054_MF130)] ; mm1=data6L + pmaddwd mm6, [GOTOFF(ebx,PW_F054_MF130)] ; mm6=data6H + + paddd mm4, [GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm0, [GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm4, DESCALE_P2 + psrad mm0, DESCALE_P2 + paddd mm1, [GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm6, [GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm1, DESCALE_P2 + psrad mm6, DESCALE_P2 + + packssdw mm4, mm0 ; mm4=data2 + packssdw mm1, mm6 ; mm1=data6 + + movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm1 + + ; -- Odd part + + movq mm5, MMWORD [wk(0)] ; mm5=tmp6 + movq mm7, MMWORD [wk(1)] ; mm7=tmp7 + + movq mm0, mm2 ; mm2=tmp4 + movq mm6, mm3 ; mm3=tmp5 + paddw mm0, mm5 ; mm0=z3 + paddw mm6, mm7 ; mm6=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movq mm4, mm0 + movq mm1, mm0 + punpcklwd mm4, mm6 + punpckhwd mm1, mm6 + movq mm0, mm4 + movq mm6, mm1 + pmaddwd mm4, [GOTOFF(ebx,PW_MF078_F117)] ; mm4=z3L + pmaddwd mm1, [GOTOFF(ebx,PW_MF078_F117)] ; mm1=z3H + pmaddwd mm0, [GOTOFF(ebx,PW_F117_F078)] ; mm0=z4L + pmaddwd mm6, [GOTOFF(ebx,PW_F117_F078)] ; mm6=z4H + + movq MMWORD [wk(0)], mm4 ; wk(0)=z3L + movq MMWORD [wk(1)], mm1 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movq mm4, mm2 + movq mm1, mm2 + punpcklwd mm4, mm7 + punpckhwd mm1, mm7 + movq mm2, mm4 + movq mm7, mm1 + pmaddwd mm4, [GOTOFF(ebx,PW_MF060_MF089)] ; mm4=tmp4L + pmaddwd mm1, [GOTOFF(ebx,PW_MF060_MF089)] ; mm1=tmp4H + pmaddwd mm2, [GOTOFF(ebx,PW_MF089_F060)] ; mm2=tmp7L + pmaddwd mm7, [GOTOFF(ebx,PW_MF089_F060)] ; mm7=tmp7H + + paddd mm4, MMWORD [wk(0)] ; mm4=data7L + paddd mm1, MMWORD [wk(1)] ; mm1=data7H + paddd mm2, mm0 ; mm2=data1L + paddd mm7, mm6 ; mm7=data1H + + paddd mm4, [GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm1, [GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm4, DESCALE_P2 + psrad mm1, DESCALE_P2 + paddd mm2, [GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm7, [GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm2, DESCALE_P2 + psrad mm7, DESCALE_P2 + + packssdw mm4, mm1 ; mm4=data7 + packssdw mm2, mm7 ; mm2=data1 + + movq MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2 + + movq mm1, mm3 + movq mm7, mm3 + punpcklwd mm1, mm5 + punpckhwd mm7, mm5 + movq mm3, mm1 + movq mm5, mm7 + pmaddwd mm1, [GOTOFF(ebx,PW_MF050_MF256)] ; mm1=tmp5L + pmaddwd mm7, [GOTOFF(ebx,PW_MF050_MF256)] ; mm7=tmp5H + pmaddwd mm3, [GOTOFF(ebx,PW_MF256_F050)] ; mm3=tmp6L + pmaddwd mm5, [GOTOFF(ebx,PW_MF256_F050)] ; mm5=tmp6H + + paddd mm1, mm0 ; mm1=data5L + paddd mm7, mm6 ; mm7=data5H + paddd mm3, MMWORD [wk(0)] ; mm3=data3L + paddd mm5, MMWORD [wk(1)] ; mm5=data3H + + paddd mm1, [GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm7, [GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm1, DESCALE_P2 + psrad mm7, DESCALE_P2 + paddd mm3, [GOTOFF(ebx,PD_DESCALE_P2)] + paddd mm5, [GOTOFF(ebx,PD_DESCALE_P2)] + psrad mm3, DESCALE_P2 + psrad mm5, DESCALE_P2 + + packssdw mm1, mm7 ; mm1=data5 + packssdw mm3, mm5 ; mm3=data3 + + movq MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3 + + add edx, byte 4*SIZEOF_DCTELEM + dec ecx + jnz near .columnloop + + emms ; empty MMX state + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jfdctint-sse2.asm b/simd/i386/jfdctint-sse2.asm new file mode 100644 index 0000000..d67dcc1 --- /dev/null +++ b/simd/i386/jfdctint-sse2.asm @@ -0,0 +1,635 @@ +; +; jfdctint.asm - accurate integer FDCT (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; forward DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jfdctint.c; see the jfdctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS - PASS1_BITS) +%define DESCALE_P2 (CONST_BITS + PASS1_BITS) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_0_298 equ DESCALE( 320652955, 30 - CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276, 30 - CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887, 30 - CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413, 30 - CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111, 30 - CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813, 30 - CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267, 30 - CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350, 30 - CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_fdct_islow_sse2) + +EXTN(jconst_fdct_islow_sse2): + +PW_F130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541 +PW_F054_MF130 times 4 dw F_0_541, (F_0_541 - F_1_847) +PW_MF078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175 +PW_F117_F078 times 4 dw F_1_175, (F_1_175 - F_0_390) +PW_MF060_MF089 times 4 dw (F_0_298 - F_0_899), -F_0_899 +PW_MF089_F060 times 4 dw -F_0_899, (F_1_501 - F_0_899) +PW_MF050_MF256 times 4 dw (F_2_053 - F_2_562), -F_2_562 +PW_MF256_F050 times 4 dw -F_2_562, (F_3_072 - F_2_562) +PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1) +PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1) +PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_islow_sse2(DCTELEM *data) +; + +%define data(b) (b) + 8 ; DCTELEM *data + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 6 + + align 32 + GLOBAL_FUNCTION(jsimd_fdct_islow_sse2) + +EXTN(jsimd_fdct_islow_sse2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved +; push esi ; unused +; push edi ; unused + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process rows. + + mov edx, POINTER [data(eax)] ; (DCTELEM *) + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)] + movdqa xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)] + + ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) + ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) + + movdqa xmm4, xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0, xmm1 ; xmm0=(00 10 01 11 02 12 03 13) + punpckhwd xmm4, xmm1 ; xmm4=(04 14 05 15 06 16 07 17) + movdqa xmm5, xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2, xmm3 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm5, xmm3 ; xmm5=(24 34 25 35 26 36 27 37) + + movdqa xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)] + + ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) + ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) + + movdqa xmm2, xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6, xmm7 ; xmm6=(40 50 41 51 42 52 43 53) + punpckhwd xmm2, xmm7 ; xmm2=(44 54 45 55 46 56 47 57) + movdqa xmm5, xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1, xmm3 ; xmm1=(60 70 61 71 62 72 63 73) + punpckhwd xmm5, xmm3 ; xmm5=(64 74 65 75 66 76 67 77) + + movdqa xmm7, xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6, xmm1 ; xmm6=(40 50 60 70 41 51 61 71) + punpckhdq xmm7, xmm1 ; xmm7=(42 52 62 72 43 53 63 73) + movdqa xmm3, xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2, xmm5 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm3, xmm5 ; xmm3=(46 56 66 76 47 57 67 77) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) + movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=(42 52 62 72 43 53 63 73) + movdqa XMMWORD [wk(3)], xmm2 ; wk(3)=(44 54 64 74 45 55 65 75) + + movdqa xmm7, xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0, xmm1 ; xmm0=(00 10 20 30 01 11 21 31) + punpckhdq xmm7, xmm1 ; xmm7=(02 12 22 32 03 13 23 33) + movdqa xmm2, xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4, xmm5 ; xmm4=(04 14 24 34 05 15 25 35) + punpckhdq xmm2, xmm5 ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa xmm1, xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0, xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 + punpckhqdq xmm1, xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 + movdqa xmm5, xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2, xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 + punpckhqdq xmm5, xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 + + movdqa xmm6, xmm1 + movdqa xmm3, xmm0 + psubw xmm1, xmm2 ; xmm1=data1-data6=tmp6 + psubw xmm0, xmm5 ; xmm0=data0-data7=tmp7 + paddw xmm6, xmm2 ; xmm6=data1+data6=tmp1 + paddw xmm3, xmm5 ; xmm3=data0+data7=tmp0 + + movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5, XMMWORD [wk(3)] ; xmm5=(44 54 64 74 45 55 65 75) + movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 + + movdqa xmm1, xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7, xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 + punpckhqdq xmm1, xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 + movdqa xmm0, xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4, xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 + punpckhqdq xmm0, xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 + + movdqa xmm2, xmm1 + movdqa xmm5, xmm7 + paddw xmm1, xmm4 ; xmm1=data3+data4=tmp3 + paddw xmm7, xmm0 ; xmm7=data2+data5=tmp2 + psubw xmm2, xmm4 ; xmm2=data3-data4=tmp4 + psubw xmm5, xmm0 ; xmm5=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4, xmm3 + movdqa xmm0, xmm6 + paddw xmm3, xmm1 ; xmm3=tmp10 + paddw xmm6, xmm7 ; xmm6=tmp11 + psubw xmm4, xmm1 ; xmm4=tmp13 + psubw xmm0, xmm7 ; xmm0=tmp12 + + movdqa xmm1, xmm3 + paddw xmm3, xmm6 ; xmm3=tmp10+tmp11 + psubw xmm1, xmm6 ; xmm1=tmp10-tmp11 + + psllw xmm3, PASS1_BITS ; xmm3=data0 + psllw xmm1, PASS1_BITS ; xmm1=data4 + + movdqa XMMWORD [wk(2)], xmm3 ; wk(2)=data0 + movdqa XMMWORD [wk(3)], xmm1 ; wk(3)=data4 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movdqa xmm7, xmm4 ; xmm4=tmp13 + movdqa xmm6, xmm4 + punpcklwd xmm7, xmm0 ; xmm0=tmp12 + punpckhwd xmm6, xmm0 + movdqa xmm4, xmm7 + movdqa xmm0, xmm6 + pmaddwd xmm7, [GOTOFF(ebx,PW_F130_F054)] ; xmm7=data2L + pmaddwd xmm6, [GOTOFF(ebx,PW_F130_F054)] ; xmm6=data2H + pmaddwd xmm4, [GOTOFF(ebx,PW_F054_MF130)] ; xmm4=data6L + pmaddwd xmm0, [GOTOFF(ebx,PW_F054_MF130)] ; xmm0=data6H + + paddd xmm7, [GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm6, [GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm7, DESCALE_P1 + psrad xmm6, DESCALE_P1 + paddd xmm4, [GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm0, [GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm4, DESCALE_P1 + psrad xmm0, DESCALE_P1 + + packssdw xmm7, xmm6 ; xmm7=data2 + packssdw xmm4, xmm0 ; xmm4=data6 + + movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=data2 + movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=data6 + + ; -- Odd part + + movdqa xmm3, XMMWORD [wk(0)] ; xmm3=tmp6 + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp7 + + movdqa xmm6, xmm2 ; xmm2=tmp4 + movdqa xmm0, xmm5 ; xmm5=tmp5 + paddw xmm6, xmm3 ; xmm6=z3 + paddw xmm0, xmm1 ; xmm0=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm7, xmm6 + movdqa xmm4, xmm6 + punpcklwd xmm7, xmm0 + punpckhwd xmm4, xmm0 + movdqa xmm6, xmm7 + movdqa xmm0, xmm4 + pmaddwd xmm7, [GOTOFF(ebx,PW_MF078_F117)] ; xmm7=z3L + pmaddwd xmm4, [GOTOFF(ebx,PW_MF078_F117)] ; xmm4=z3H + pmaddwd xmm6, [GOTOFF(ebx,PW_F117_F078)] ; xmm6=z4L + pmaddwd xmm0, [GOTOFF(ebx,PW_F117_F078)] ; xmm0=z4H + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=z3L + movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movdqa xmm7, xmm2 + movdqa xmm4, xmm2 + punpcklwd xmm7, xmm1 + punpckhwd xmm4, xmm1 + movdqa xmm2, xmm7 + movdqa xmm1, xmm4 + pmaddwd xmm7, [GOTOFF(ebx,PW_MF060_MF089)] ; xmm7=tmp4L + pmaddwd xmm4, [GOTOFF(ebx,PW_MF060_MF089)] ; xmm4=tmp4H + pmaddwd xmm2, [GOTOFF(ebx,PW_MF089_F060)] ; xmm2=tmp7L + pmaddwd xmm1, [GOTOFF(ebx,PW_MF089_F060)] ; xmm1=tmp7H + + paddd xmm7, XMMWORD [wk(0)] ; xmm7=data7L + paddd xmm4, XMMWORD [wk(1)] ; xmm4=data7H + paddd xmm2, xmm6 ; xmm2=data1L + paddd xmm1, xmm0 ; xmm1=data1H + + paddd xmm7, [GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm4, [GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm7, DESCALE_P1 + psrad xmm4, DESCALE_P1 + paddd xmm2, [GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm1, [GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm2, DESCALE_P1 + psrad xmm1, DESCALE_P1 + + packssdw xmm7, xmm4 ; xmm7=data7 + packssdw xmm2, xmm1 ; xmm2=data1 + + movdqa xmm4, xmm5 + movdqa xmm1, xmm5 + punpcklwd xmm4, xmm3 + punpckhwd xmm1, xmm3 + movdqa xmm5, xmm4 + movdqa xmm3, xmm1 + pmaddwd xmm4, [GOTOFF(ebx,PW_MF050_MF256)] ; xmm4=tmp5L + pmaddwd xmm1, [GOTOFF(ebx,PW_MF050_MF256)] ; xmm1=tmp5H + pmaddwd xmm5, [GOTOFF(ebx,PW_MF256_F050)] ; xmm5=tmp6L + pmaddwd xmm3, [GOTOFF(ebx,PW_MF256_F050)] ; xmm3=tmp6H + + paddd xmm4, xmm6 ; xmm4=data5L + paddd xmm1, xmm0 ; xmm1=data5H + paddd xmm5, XMMWORD [wk(0)] ; xmm5=data3L + paddd xmm3, XMMWORD [wk(1)] ; xmm3=data3H + + paddd xmm4, [GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm1, [GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm4, DESCALE_P1 + psrad xmm1, DESCALE_P1 + paddd xmm5, [GOTOFF(ebx,PD_DESCALE_P1)] + paddd xmm3, [GOTOFF(ebx,PD_DESCALE_P1)] + psrad xmm5, DESCALE_P1 + psrad xmm3, DESCALE_P1 + + packssdw xmm4, xmm1 ; xmm4=data5 + packssdw xmm5, xmm3 ; xmm5=data3 + + ; ---- Pass 2: process columns. + +; mov edx, POINTER [data(eax)] ; (DCTELEM *) + + movdqa xmm6, XMMWORD [wk(2)] ; xmm6=col0 + movdqa xmm0, XMMWORD [wk(4)] ; xmm0=col2 + + ; xmm6=(00 10 20 30 40 50 60 70), xmm0=(02 12 22 32 42 52 62 72) + ; xmm2=(01 11 21 31 41 51 61 71), xmm5=(03 13 23 33 43 53 63 73) + + movdqa xmm1, xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6, xmm2 ; xmm6=(00 01 10 11 20 21 30 31) + punpckhwd xmm1, xmm2 ; xmm1=(40 41 50 51 60 61 70 71) + movdqa xmm3, xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0, xmm5 ; xmm0=(02 03 12 13 22 23 32 33) + punpckhwd xmm3, xmm5 ; xmm3=(42 43 52 53 62 63 72 73) + + movdqa xmm2, XMMWORD [wk(3)] ; xmm2=col4 + movdqa xmm5, XMMWORD [wk(5)] ; xmm5=col6 + + ; xmm2=(04 14 24 34 44 54 64 74), xmm5=(06 16 26 36 46 56 66 76) + ; xmm4=(05 15 25 35 45 55 65 75), xmm7=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=(02 03 12 13 22 23 32 33) + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(42 43 52 53 62 63 72 73) + + movdqa xmm0, xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2, xmm4 ; xmm2=(04 05 14 15 24 25 34 35) + punpckhwd xmm0, xmm4 ; xmm0=(44 45 54 55 64 65 74 75) + movdqa xmm3, xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5, xmm7 ; xmm5=(06 07 16 17 26 27 36 37) + punpckhwd xmm3, xmm7 ; xmm3=(46 47 56 57 66 67 76 77) + + movdqa xmm4, xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2, xmm5 ; xmm2=(04 05 06 07 14 15 16 17) + punpckhdq xmm4, xmm5 ; xmm4=(24 25 26 27 34 35 36 37) + movdqa xmm7, xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0, xmm3 ; xmm0=(44 45 46 47 54 55 56 57) + punpckhdq xmm7, xmm3 ; xmm7=(64 65 66 67 74 75 76 77) + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=(02 03 12 13 22 23 32 33) + movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53 62 63 72 73) + movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=(24 25 26 27 34 35 36 37) + movdqa XMMWORD [wk(3)], xmm0 ; wk(3)=(44 45 46 47 54 55 56 57) + + movdqa xmm4, xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6, xmm5 ; xmm6=(00 01 02 03 10 11 12 13) + punpckhdq xmm4, xmm5 ; xmm4=(20 21 22 23 30 31 32 33) + movdqa xmm0, xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1, xmm3 ; xmm1=(40 41 42 43 50 51 52 53) + punpckhdq xmm0, xmm3 ; xmm0=(60 61 62 63 70 71 72 73) + + movdqa xmm5, xmm6 ; transpose coefficients(phase 3) + punpcklqdq xmm6, xmm2 ; xmm6=(00 01 02 03 04 05 06 07)=data0 + punpckhqdq xmm5, xmm2 ; xmm5=(10 11 12 13 14 15 16 17)=data1 + movdqa xmm3, xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0, xmm7 ; xmm0=(60 61 62 63 64 65 66 67)=data6 + punpckhqdq xmm3, xmm7 ; xmm3=(70 71 72 73 74 75 76 77)=data7 + + movdqa xmm2, xmm5 + movdqa xmm7, xmm6 + psubw xmm5, xmm0 ; xmm5=data1-data6=tmp6 + psubw xmm6, xmm3 ; xmm6=data0-data7=tmp7 + paddw xmm2, xmm0 ; xmm2=data1+data6=tmp1 + paddw xmm7, xmm3 ; xmm7=data0+data7=tmp0 + + movdqa xmm0, XMMWORD [wk(2)] ; xmm0=(24 25 26 27 34 35 36 37) + movdqa xmm3, XMMWORD [wk(3)] ; xmm3=(44 45 46 47 54 55 56 57) + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movdqa xmm5, xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4, xmm0 ; xmm4=(20 21 22 23 24 25 26 27)=data2 + punpckhqdq xmm5, xmm0 ; xmm5=(30 31 32 33 34 35 36 37)=data3 + movdqa xmm6, xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1, xmm3 ; xmm1=(40 41 42 43 44 45 46 47)=data4 + punpckhqdq xmm6, xmm3 ; xmm6=(50 51 52 53 54 55 56 57)=data5 + + movdqa xmm0, xmm5 + movdqa xmm3, xmm4 + paddw xmm5, xmm1 ; xmm5=data3+data4=tmp3 + paddw xmm4, xmm6 ; xmm4=data2+data5=tmp2 + psubw xmm0, xmm1 ; xmm0=data3-data4=tmp4 + psubw xmm3, xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm1, xmm7 + movdqa xmm6, xmm2 + paddw xmm7, xmm5 ; xmm7=tmp10 + paddw xmm2, xmm4 ; xmm2=tmp11 + psubw xmm1, xmm5 ; xmm1=tmp13 + psubw xmm6, xmm4 ; xmm6=tmp12 + + movdqa xmm5, xmm7 + paddw xmm7, xmm2 ; xmm7=tmp10+tmp11 + psubw xmm5, xmm2 ; xmm5=tmp10-tmp11 + + paddw xmm7, [GOTOFF(ebx,PW_DESCALE_P2X)] + paddw xmm5, [GOTOFF(ebx,PW_DESCALE_P2X)] + psraw xmm7, PASS1_BITS ; xmm7=data0 + psraw xmm5, PASS1_BITS ; xmm5=data4 + + movdqa XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm7 + movdqa XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm5 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movdqa xmm4, xmm1 ; xmm1=tmp13 + movdqa xmm2, xmm1 + punpcklwd xmm4, xmm6 ; xmm6=tmp12 + punpckhwd xmm2, xmm6 + movdqa xmm1, xmm4 + movdqa xmm6, xmm2 + pmaddwd xmm4, [GOTOFF(ebx,PW_F130_F054)] ; xmm4=data2L + pmaddwd xmm2, [GOTOFF(ebx,PW_F130_F054)] ; xmm2=data2H + pmaddwd xmm1, [GOTOFF(ebx,PW_F054_MF130)] ; xmm1=data6L + pmaddwd xmm6, [GOTOFF(ebx,PW_F054_MF130)] ; xmm6=data6H + + paddd xmm4, [GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm2, [GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm4, DESCALE_P2 + psrad xmm2, DESCALE_P2 + paddd xmm1, [GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm6, [GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm1, DESCALE_P2 + psrad xmm6, DESCALE_P2 + + packssdw xmm4, xmm2 ; xmm4=data2 + packssdw xmm1, xmm6 ; xmm1=data6 + + movdqa XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm1 + + ; -- Odd part + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp6 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 + + movdqa xmm2, xmm0 ; xmm0=tmp4 + movdqa xmm6, xmm3 ; xmm3=tmp5 + paddw xmm2, xmm7 ; xmm2=z3 + paddw xmm6, xmm5 ; xmm6=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm4, xmm2 + movdqa xmm1, xmm2 + punpcklwd xmm4, xmm6 + punpckhwd xmm1, xmm6 + movdqa xmm2, xmm4 + movdqa xmm6, xmm1 + pmaddwd xmm4, [GOTOFF(ebx,PW_MF078_F117)] ; xmm4=z3L + pmaddwd xmm1, [GOTOFF(ebx,PW_MF078_F117)] ; xmm1=z3H + pmaddwd xmm2, [GOTOFF(ebx,PW_F117_F078)] ; xmm2=z4L + pmaddwd xmm6, [GOTOFF(ebx,PW_F117_F078)] ; xmm6=z4H + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=z3L + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movdqa xmm4, xmm0 + movdqa xmm1, xmm0 + punpcklwd xmm4, xmm5 + punpckhwd xmm1, xmm5 + movdqa xmm0, xmm4 + movdqa xmm5, xmm1 + pmaddwd xmm4, [GOTOFF(ebx,PW_MF060_MF089)] ; xmm4=tmp4L + pmaddwd xmm1, [GOTOFF(ebx,PW_MF060_MF089)] ; xmm1=tmp4H + pmaddwd xmm0, [GOTOFF(ebx,PW_MF089_F060)] ; xmm0=tmp7L + pmaddwd xmm5, [GOTOFF(ebx,PW_MF089_F060)] ; xmm5=tmp7H + + paddd xmm4, XMMWORD [wk(0)] ; xmm4=data7L + paddd xmm1, XMMWORD [wk(1)] ; xmm1=data7H + paddd xmm0, xmm2 ; xmm0=data1L + paddd xmm5, xmm6 ; xmm5=data1H + + paddd xmm4, [GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm1, [GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm4, DESCALE_P2 + psrad xmm1, DESCALE_P2 + paddd xmm0, [GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm5, [GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm0, DESCALE_P2 + psrad xmm5, DESCALE_P2 + + packssdw xmm4, xmm1 ; xmm4=data7 + packssdw xmm0, xmm5 ; xmm0=data1 + + movdqa XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm0 + + movdqa xmm1, xmm3 + movdqa xmm5, xmm3 + punpcklwd xmm1, xmm7 + punpckhwd xmm5, xmm7 + movdqa xmm3, xmm1 + movdqa xmm7, xmm5 + pmaddwd xmm1, [GOTOFF(ebx,PW_MF050_MF256)] ; xmm1=tmp5L + pmaddwd xmm5, [GOTOFF(ebx,PW_MF050_MF256)] ; xmm5=tmp5H + pmaddwd xmm3, [GOTOFF(ebx,PW_MF256_F050)] ; xmm3=tmp6L + pmaddwd xmm7, [GOTOFF(ebx,PW_MF256_F050)] ; xmm7=tmp6H + + paddd xmm1, xmm2 ; xmm1=data5L + paddd xmm5, xmm6 ; xmm5=data5H + paddd xmm3, XMMWORD [wk(0)] ; xmm3=data3L + paddd xmm7, XMMWORD [wk(1)] ; xmm7=data3H + + paddd xmm1, [GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm5, [GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm1, DESCALE_P2 + psrad xmm5, DESCALE_P2 + paddd xmm3, [GOTOFF(ebx,PD_DESCALE_P2)] + paddd xmm7, [GOTOFF(ebx,PD_DESCALE_P2)] + psrad xmm3, DESCALE_P2 + psrad xmm7, DESCALE_P2 + + packssdw xmm1, xmm5 ; xmm1=data5 + packssdw xmm3, xmm7 ; xmm3=data3 + + movdqa XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm3 + +; pop edi ; unused +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jidctflt-3dn.asm b/simd/i386/jidctflt-3dn.asm new file mode 100644 index 0000000..73aa18d --- /dev/null +++ b/simd/i386/jidctflt-3dn.asm @@ -0,0 +1,453 @@ +; +; jidctflt.asm - floating-point IDCT (3DNow! & MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the inverse DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jidctflt.c; see the jidctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_idct_float_3dnow) + +EXTN(jconst_idct_float_3dnow): + +PD_1_414 times 2 dd 1.414213562373095048801689 +PD_1_847 times 2 dd 1.847759065022573512256366 +PD_1_082 times 2 dd 1.082392200292393968799446 +PD_2_613 times 2 dd 2.613125929752753055713286 +PD_RNDINT_MAGIC times 2 dd 100663296.0 ; (float)(0x00C00000 << 3) +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_float_3dnow(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b) + 8 ; void *dct_table +%define coef_block(b) (b) + 12 ; JCOEFPTR coef_block +%define output_buf(b) (b) + 16 ; JSAMPARRAY output_buf +%define output_col(b) (b) + 20 ; JDIMENSION output_col + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_MMWORD + ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0) - DCTSIZE2 * SIZEOF_FAST_FLOAT + ; FAST_FLOAT workspace[DCTSIZE2] + + align 32 + GLOBAL_FUNCTION(jsimd_idct_float_3dnow) + +EXTN(jsimd_idct_float_3dnow): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; FAST_FLOAT *wsptr + mov ecx, DCTSIZE/2 ; ctr + alignx 16, 7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + pushpic ebx ; save GOT address + mov ebx, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)] + mov eax, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)] + or ebx, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)] + or ebx, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)] + or eax, ebx + poppic ebx ; restore GOT address + jnz short .columnDCT + + ; -- AC terms all zero + + movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)] + + punpcklwd mm0, mm0 + psrad mm0, (DWORD_BIT-WORD_BIT) + pi2fd mm0, mm0 + + pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movq mm1, mm0 + punpckldq mm0, mm0 + punpckhdq mm1, mm1 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm1 + movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm1 + movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1 + jmp near .nextcolumn + alignx 16, 7 +%endif +.columnDCT: + + ; -- Even part + + movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)] + movd mm1, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + movd mm2, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)] + movd mm3, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)] + + punpcklwd mm0, mm0 + punpcklwd mm1, mm1 + psrad mm0, (DWORD_BIT-WORD_BIT) + psrad mm1, (DWORD_BIT-WORD_BIT) + pi2fd mm0, mm0 + pi2fd mm1, mm1 + + pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + pfmul mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + punpcklwd mm2, mm2 + punpcklwd mm3, mm3 + psrad mm2, (DWORD_BIT-WORD_BIT) + psrad mm3, (DWORD_BIT-WORD_BIT) + pi2fd mm2, mm2 + pi2fd mm3, mm3 + + pfmul mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + pfmul mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movq mm4, mm0 + movq mm5, mm1 + pfsub mm0, mm2 ; mm0=tmp11 + pfsub mm1, mm3 + pfadd mm4, mm2 ; mm4=tmp10 + pfadd mm5, mm3 ; mm5=tmp13 + + pfmul mm1, [GOTOFF(ebx,PD_1_414)] + pfsub mm1, mm5 ; mm1=tmp12 + + movq mm6, mm4 + movq mm7, mm0 + pfsub mm4, mm5 ; mm4=tmp3 + pfsub mm0, mm1 ; mm0=tmp2 + pfadd mm6, mm5 ; mm6=tmp0 + pfadd mm7, mm1 ; mm7=tmp1 + + movq MMWORD [wk(1)], mm4 ; tmp3 + movq MMWORD [wk(0)], mm0 ; tmp2 + + ; -- Odd part + + movd mm2, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + movd mm3, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)] + movd mm5, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)] + movd mm1, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)] + + punpcklwd mm2, mm2 + punpcklwd mm3, mm3 + psrad mm2, (DWORD_BIT-WORD_BIT) + psrad mm3, (DWORD_BIT-WORD_BIT) + pi2fd mm2, mm2 + pi2fd mm3, mm3 + + pfmul mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + pfmul mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + punpcklwd mm5, mm5 + punpcklwd mm1, mm1 + psrad mm5, (DWORD_BIT-WORD_BIT) + psrad mm1, (DWORD_BIT-WORD_BIT) + pi2fd mm5, mm5 + pi2fd mm1, mm1 + + pfmul mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + pfmul mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movq mm4, mm2 + movq mm0, mm5 + pfadd mm2, mm1 ; mm2=z11 + pfadd mm5, mm3 ; mm5=z13 + pfsub mm4, mm1 ; mm4=z12 + pfsub mm0, mm3 ; mm0=z10 + + movq mm1, mm2 + pfsub mm2, mm5 + pfadd mm1, mm5 ; mm1=tmp7 + + pfmul mm2, [GOTOFF(ebx,PD_1_414)] ; mm2=tmp11 + + movq mm3, mm0 + pfadd mm0, mm4 + pfmul mm0, [GOTOFF(ebx,PD_1_847)] ; mm0=z5 + pfmul mm3, [GOTOFF(ebx,PD_2_613)] ; mm3=(z10 * 2.613125930) + pfmul mm4, [GOTOFF(ebx,PD_1_082)] ; mm4=(z12 * 1.082392200) + pfsubr mm3, mm0 ; mm3=tmp12 + pfsub mm4, mm0 ; mm4=tmp10 + + ; -- Final output stage + + pfsub mm3, mm1 ; mm3=tmp6 + movq mm5, mm6 + movq mm0, mm7 + pfadd mm6, mm1 ; mm6=data0=(00 01) + pfadd mm7, mm3 ; mm7=data1=(10 11) + pfsub mm5, mm1 ; mm5=data7=(70 71) + pfsub mm0, mm3 ; mm0=data6=(60 61) + pfsub mm2, mm3 ; mm2=tmp5 + + movq mm1, mm6 ; transpose coefficients + punpckldq mm6, mm7 ; mm6=(00 10) + punpckhdq mm1, mm7 ; mm1=(01 11) + movq mm3, mm0 ; transpose coefficients + punpckldq mm0, mm5 ; mm0=(60 70) + punpckhdq mm3, mm5 ; mm3=(61 71) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm6 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1 + movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm3 + + movq mm7, MMWORD [wk(0)] ; mm7=tmp2 + movq mm5, MMWORD [wk(1)] ; mm5=tmp3 + + pfadd mm4, mm2 ; mm4=tmp4 + movq mm6, mm7 + movq mm1, mm5 + pfadd mm7, mm2 ; mm7=data2=(20 21) + pfadd mm5, mm4 ; mm5=data4=(40 41) + pfsub mm6, mm2 ; mm6=data5=(50 51) + pfsub mm1, mm4 ; mm1=data3=(30 31) + + movq mm0, mm7 ; transpose coefficients + punpckldq mm7, mm1 ; mm7=(20 30) + punpckhdq mm0, mm1 ; mm0=(21 31) + movq mm3, mm5 ; transpose coefficients + punpckldq mm5, mm6 ; mm5=(40 50) + punpckhdq mm3, mm6 ; mm3=(41 51) + + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm7 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm0 + movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5 + movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm3 + +.nextcolumn: + add esi, byte 2*SIZEOF_JCOEF ; coef_block + add edx, byte 2*SIZEOF_FLOAT_MULT_TYPE ; quantptr + add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; -- Prefetch the next coefficient block + + prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] + prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] + prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] + prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; FAST_FLOAT *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/2 ; ctr + alignx 16, 7 +.rowloop: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)] + + movq mm4, mm0 + movq mm5, mm1 + pfsub mm0, mm2 ; mm0=tmp11 + pfsub mm1, mm3 + pfadd mm4, mm2 ; mm4=tmp10 + pfadd mm5, mm3 ; mm5=tmp13 + + pfmul mm1, [GOTOFF(ebx,PD_1_414)] + pfsub mm1, mm5 ; mm1=tmp12 + + movq mm6, mm4 + movq mm7, mm0 + pfsub mm4, mm5 ; mm4=tmp3 + pfsub mm0, mm1 ; mm0=tmp2 + pfadd mm6, mm5 ; mm6=tmp0 + pfadd mm7, mm1 ; mm7=tmp1 + + movq MMWORD [wk(1)], mm4 ; tmp3 + movq MMWORD [wk(0)], mm0 ; tmp2 + + ; -- Odd part + + movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)] + + movq mm4, mm2 + movq mm0, mm5 + pfadd mm2, mm1 ; mm2=z11 + pfadd mm5, mm3 ; mm5=z13 + pfsub mm4, mm1 ; mm4=z12 + pfsub mm0, mm3 ; mm0=z10 + + movq mm1, mm2 + pfsub mm2, mm5 + pfadd mm1, mm5 ; mm1=tmp7 + + pfmul mm2, [GOTOFF(ebx,PD_1_414)] ; mm2=tmp11 + + movq mm3, mm0 + pfadd mm0, mm4 + pfmul mm0, [GOTOFF(ebx,PD_1_847)] ; mm0=z5 + pfmul mm3, [GOTOFF(ebx,PD_2_613)] ; mm3=(z10 * 2.613125930) + pfmul mm4, [GOTOFF(ebx,PD_1_082)] ; mm4=(z12 * 1.082392200) + pfsubr mm3, mm0 ; mm3=tmp12 + pfsub mm4, mm0 ; mm4=tmp10 + + ; -- Final output stage + + pfsub mm3, mm1 ; mm3=tmp6 + movq mm5, mm6 + movq mm0, mm7 + pfadd mm6, mm1 ; mm6=data0=(00 10) + pfadd mm7, mm3 ; mm7=data1=(01 11) + pfsub mm5, mm1 ; mm5=data7=(07 17) + pfsub mm0, mm3 ; mm0=data6=(06 16) + pfsub mm2, mm3 ; mm2=tmp5 + + movq mm1, [GOTOFF(ebx,PD_RNDINT_MAGIC)] ; mm1=[PD_RNDINT_MAGIC] + pcmpeqd mm3, mm3 + psrld mm3, WORD_BIT ; mm3={0xFFFF 0x0000 0xFFFF 0x0000} + + pfadd mm6, mm1 ; mm6=roundint(data0/8)=(00 ** 10 **) + pfadd mm7, mm1 ; mm7=roundint(data1/8)=(01 ** 11 **) + pfadd mm0, mm1 ; mm0=roundint(data6/8)=(06 ** 16 **) + pfadd mm5, mm1 ; mm5=roundint(data7/8)=(07 ** 17 **) + + pand mm6, mm3 ; mm6=(00 -- 10 --) + pslld mm7, WORD_BIT ; mm7=(-- 01 -- 11) + pand mm0, mm3 ; mm0=(06 -- 16 --) + pslld mm5, WORD_BIT ; mm5=(-- 07 -- 17) + por mm6, mm7 ; mm6=(00 01 10 11) + por mm0, mm5 ; mm0=(06 07 16 17) + + movq mm1, MMWORD [wk(0)] ; mm1=tmp2 + movq mm3, MMWORD [wk(1)] ; mm3=tmp3 + + pfadd mm4, mm2 ; mm4=tmp4 + movq mm7, mm1 + movq mm5, mm3 + pfadd mm1, mm2 ; mm1=data2=(02 12) + pfadd mm3, mm4 ; mm3=data4=(04 14) + pfsub mm7, mm2 ; mm7=data5=(05 15) + pfsub mm5, mm4 ; mm5=data3=(03 13) + + movq mm2, [GOTOFF(ebx,PD_RNDINT_MAGIC)] ; mm2=[PD_RNDINT_MAGIC] + pcmpeqd mm4, mm4 + psrld mm4, WORD_BIT ; mm4={0xFFFF 0x0000 0xFFFF 0x0000} + + pfadd mm3, mm2 ; mm3=roundint(data4/8)=(04 ** 14 **) + pfadd mm7, mm2 ; mm7=roundint(data5/8)=(05 ** 15 **) + pfadd mm1, mm2 ; mm1=roundint(data2/8)=(02 ** 12 **) + pfadd mm5, mm2 ; mm5=roundint(data3/8)=(03 ** 13 **) + + pand mm3, mm4 ; mm3=(04 -- 14 --) + pslld mm7, WORD_BIT ; mm7=(-- 05 -- 15) + pand mm1, mm4 ; mm1=(02 -- 12 --) + pslld mm5, WORD_BIT ; mm5=(-- 03 -- 13) + por mm3, mm7 ; mm3=(04 05 14 15) + por mm1, mm5 ; mm1=(02 03 12 13) + + movq mm2, [GOTOFF(ebx,PB_CENTERJSAMP)] ; mm2=[PB_CENTERJSAMP] + + packsswb mm6, mm3 ; mm6=(00 01 10 11 04 05 14 15) + packsswb mm1, mm0 ; mm1=(02 03 12 13 06 07 16 17) + paddb mm6, mm2 + paddb mm1, mm2 + + movq mm4, mm6 ; transpose coefficients(phase 2) + punpcklwd mm6, mm1 ; mm6=(00 01 02 03 10 11 12 13) + punpckhwd mm4, mm1 ; mm4=(04 05 06 07 14 15 16 17) + + movq mm7, mm6 ; transpose coefficients(phase 3) + punpckldq mm6, mm4 ; mm6=(00 01 02 03 04 05 06 07) + punpckhdq mm7, mm4 ; mm7=(10 11 12 13 14 15 16 17) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7 + + poppic ebx ; restore GOT address + + add esi, byte 2*SIZEOF_FAST_FLOAT ; wsptr + add edi, byte 2*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + femms ; empty MMX/3DNow! state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jidctflt-sse.asm b/simd/i386/jidctflt-sse.asm new file mode 100644 index 0000000..386650f --- /dev/null +++ b/simd/i386/jidctflt-sse.asm @@ -0,0 +1,573 @@ +; +; jidctflt.asm - floating-point IDCT (SSE & MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the inverse DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jidctflt.c; see the jidctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1, %2, 0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1, %2, 0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_idct_float_sse) + +EXTN(jconst_idct_float_sse): + +PD_1_414 times 4 dd 1.414213562373095048801689 +PD_1_847 times 4 dd 1.847759065022573512256366 +PD_1_082 times 4 dd 1.082392200292393968799446 +PD_M2_613 times 4 dd -2.613125929752753055713286 +PD_0_125 times 4 dd 0.125 ; 1/8 +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_float_sse(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b) + 8 ; void *dct_table +%define coef_block(b) (b) + 12 ; JCOEFPTR coef_block +%define output_buf(b) (b) + 16 ; JSAMPARRAY output_buf +%define output_col(b) (b) + 20 ; JDIMENSION output_col + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0) - DCTSIZE2 * SIZEOF_FAST_FLOAT + ; FAST_FLOAT workspace[DCTSIZE2] + + align 32 + GLOBAL_FUNCTION(jsimd_idct_float_sse) + +EXTN(jsimd_idct_float_sse): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; FAST_FLOAT *wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16, 7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm1, mm0 + packsswb mm1, mm1 + movd eax, mm1 + test eax, eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + + punpckhwd mm1, mm0 ; mm1=(** 02 ** 03) + punpcklwd mm0, mm0 ; mm0=(00 00 01 01) + psrad mm1, (DWORD_BIT-WORD_BIT) ; mm1=in0H=(02 03) + psrad mm0, (DWORD_BIT-WORD_BIT) ; mm0=in0L=(00 01) + cvtpi2ps xmm3, mm1 ; xmm3=(02 03 ** **) + cvtpi2ps xmm0, mm0 ; xmm0=(00 01 ** **) + movlhps xmm0, xmm3 ; xmm0=in0=(00 01 02 03) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm1, xmm0 + movaps xmm2, xmm0 + movaps xmm3, xmm0 + + shufps xmm0, xmm0, 0x00 ; xmm0=(00 00 00 00) + shufps xmm1, xmm1, 0x55 ; xmm1=(01 01 01 01) + shufps xmm2, xmm2, 0xAA ; xmm2=(02 02 02 02) + shufps xmm3, xmm3, 0xFF ; xmm3=(03 03 03 03) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 + jmp near .nextcolumn + alignx 16, 7 +%endif +.columnDCT: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + punpckhwd mm4, mm0 ; mm4=(** 02 ** 03) + punpcklwd mm0, mm0 ; mm0=(00 00 01 01) + punpckhwd mm5, mm1 ; mm5=(** 22 ** 23) + punpcklwd mm1, mm1 ; mm1=(20 20 21 21) + + psrad mm4, (DWORD_BIT-WORD_BIT) ; mm4=in0H=(02 03) + psrad mm0, (DWORD_BIT-WORD_BIT) ; mm0=in0L=(00 01) + cvtpi2ps xmm4, mm4 ; xmm4=(02 03 ** **) + cvtpi2ps xmm0, mm0 ; xmm0=(00 01 ** **) + psrad mm5, (DWORD_BIT-WORD_BIT) ; mm5=in2H=(22 23) + psrad mm1, (DWORD_BIT-WORD_BIT) ; mm1=in2L=(20 21) + cvtpi2ps xmm5, mm5 ; xmm5=(22 23 ** **) + cvtpi2ps xmm1, mm1 ; xmm1=(20 21 ** **) + + punpckhwd mm6, mm2 ; mm6=(** 42 ** 43) + punpcklwd mm2, mm2 ; mm2=(40 40 41 41) + punpckhwd mm7, mm3 ; mm7=(** 62 ** 63) + punpcklwd mm3, mm3 ; mm3=(60 60 61 61) + + psrad mm6, (DWORD_BIT-WORD_BIT) ; mm6=in4H=(42 43) + psrad mm2, (DWORD_BIT-WORD_BIT) ; mm2=in4L=(40 41) + cvtpi2ps xmm6, mm6 ; xmm6=(42 43 ** **) + cvtpi2ps xmm2, mm2 ; xmm2=(40 41 ** **) + psrad mm7, (DWORD_BIT-WORD_BIT) ; mm7=in6H=(62 63) + psrad mm3, (DWORD_BIT-WORD_BIT) ; mm3=in6L=(60 61) + cvtpi2ps xmm7, mm7 ; xmm7=(62 63 ** **) + cvtpi2ps xmm3, mm3 ; xmm3=(60 61 ** **) + + movlhps xmm0, xmm4 ; xmm0=in0=(00 01 02 03) + movlhps xmm1, xmm5 ; xmm1=in2=(20 21 22 23) + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movlhps xmm2, xmm6 ; xmm2=in4=(40 41 42 43) + movlhps xmm3, xmm7 ; xmm3=in6=(60 61 62 63) + mulps xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4, xmm0 + movaps xmm5, xmm1 + subps xmm0, xmm2 ; xmm0=tmp11 + subps xmm1, xmm3 + addps xmm4, xmm2 ; xmm4=tmp10 + addps xmm5, xmm3 ; xmm5=tmp13 + + mulps xmm1, [GOTOFF(ebx,PD_1_414)] + subps xmm1, xmm5 ; xmm1=tmp12 + + movaps xmm6, xmm4 + movaps xmm7, xmm0 + subps xmm4, xmm5 ; xmm4=tmp3 + subps xmm0, xmm1 ; xmm0=tmp2 + addps xmm6, xmm5 ; xmm6=tmp0 + addps xmm7, xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + punpckhwd mm6, mm4 ; mm6=(** 12 ** 13) + punpcklwd mm4, mm4 ; mm4=(10 10 11 11) + punpckhwd mm2, mm0 ; mm2=(** 32 ** 33) + punpcklwd mm0, mm0 ; mm0=(30 30 31 31) + + psrad mm6, (DWORD_BIT-WORD_BIT) ; mm6=in1H=(12 13) + psrad mm4, (DWORD_BIT-WORD_BIT) ; mm4=in1L=(10 11) + cvtpi2ps xmm4, mm6 ; xmm4=(12 13 ** **) + cvtpi2ps xmm2, mm4 ; xmm2=(10 11 ** **) + psrad mm2, (DWORD_BIT-WORD_BIT) ; mm2=in3H=(32 33) + psrad mm0, (DWORD_BIT-WORD_BIT) ; mm0=in3L=(30 31) + cvtpi2ps xmm0, mm2 ; xmm0=(32 33 ** **) + cvtpi2ps xmm3, mm0 ; xmm3=(30 31 ** **) + + punpckhwd mm7, mm5 ; mm7=(** 52 ** 53) + punpcklwd mm5, mm5 ; mm5=(50 50 51 51) + punpckhwd mm3, mm1 ; mm3=(** 72 ** 73) + punpcklwd mm1, mm1 ; mm1=(70 70 71 71) + + movlhps xmm2, xmm4 ; xmm2=in1=(10 11 12 13) + movlhps xmm3, xmm0 ; xmm3=in3=(30 31 32 33) + + psrad mm7, (DWORD_BIT-WORD_BIT) ; mm7=in5H=(52 53) + psrad mm5, (DWORD_BIT-WORD_BIT) ; mm5=in5L=(50 51) + cvtpi2ps xmm4, mm7 ; xmm4=(52 53 ** **) + cvtpi2ps xmm5, mm5 ; xmm5=(50 51 ** **) + psrad mm3, (DWORD_BIT-WORD_BIT) ; mm3=in7H=(72 73) + psrad mm1, (DWORD_BIT-WORD_BIT) ; mm1=in7L=(70 71) + cvtpi2ps xmm0, mm3 ; xmm0=(72 73 ** **) + cvtpi2ps xmm1, mm1 ; xmm1=(70 71 ** **) + + mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movlhps xmm5, xmm4 ; xmm5=in5=(50 51 52 53) + movlhps xmm1, xmm0 ; xmm1=in7=(70 71 72 73) + mulps xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4, xmm2 + movaps xmm0, xmm5 + addps xmm2, xmm1 ; xmm2=z11 + addps xmm5, xmm3 ; xmm5=z13 + subps xmm4, xmm1 ; xmm4=z12 + subps xmm0, xmm3 ; xmm0=z10 + + movaps xmm1, xmm2 + subps xmm2, xmm5 + addps xmm1, xmm5 ; xmm1=tmp7 + + mulps xmm2, [GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 + + movaps xmm3, xmm0 + addps xmm0, xmm4 + mulps xmm0, [GOTOFF(ebx,PD_1_847)] ; xmm0=z5 + mulps xmm3, [GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) + mulps xmm4, [GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) + addps xmm3, xmm0 ; xmm3=tmp12 + subps xmm4, xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3, xmm1 ; xmm3=tmp6 + movaps xmm5, xmm6 + movaps xmm0, xmm7 + addps xmm6, xmm1 ; xmm6=data0=(00 01 02 03) + addps xmm7, xmm3 ; xmm7=data1=(10 11 12 13) + subps xmm5, xmm1 ; xmm5=data7=(70 71 72 73) + subps xmm0, xmm3 ; xmm0=data6=(60 61 62 63) + subps xmm2, xmm3 ; xmm2=tmp5 + + movaps xmm1, xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6, xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm1, xmm7 ; xmm1=(02 12 03 13) + movaps xmm3, xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0, xmm5 ; xmm0=(60 70 61 71) + unpckhps xmm3, xmm5 ; xmm3=(62 72 63 73) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movaps xmm5, XMMWORD [wk(1)] ; xmm5=tmp3 + + movaps XMMWORD [wk(0)], xmm0 ; wk(0)=(60 70 61 71) + movaps XMMWORD [wk(1)], xmm3 ; wk(1)=(62 72 63 73) + + addps xmm4, xmm2 ; xmm4=tmp4 + movaps xmm0, xmm7 + movaps xmm3, xmm5 + addps xmm7, xmm2 ; xmm7=data2=(20 21 22 23) + addps xmm5, xmm4 ; xmm5=data4=(40 41 42 43) + subps xmm0, xmm2 ; xmm0=data5=(50 51 52 53) + subps xmm3, xmm4 ; xmm3=data3=(30 31 32 33) + + movaps xmm2, xmm7 ; transpose coefficients(phase 1) + unpcklps xmm7, xmm3 ; xmm7=(20 30 21 31) + unpckhps xmm2, xmm3 ; xmm2=(22 32 23 33) + movaps xmm4, xmm5 ; transpose coefficients(phase 1) + unpcklps xmm5, xmm0 ; xmm5=(40 50 41 51) + unpckhps xmm4, xmm0 ; xmm4=(42 52 43 53) + + movaps xmm3, xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6, xmm7 ; xmm6=(00 10 20 30) + unpckhps2 xmm3, xmm7 ; xmm3=(01 11 21 31) + movaps xmm0, xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1, xmm2 ; xmm1=(02 12 22 32) + unpckhps2 xmm0, xmm2 ; xmm0=(03 13 23 33) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71) + movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + + movaps xmm6, xmm5 ; transpose coefficients(phase 2) + unpcklps2 xmm5, xmm7 ; xmm5=(40 50 60 70) + unpckhps2 xmm6, xmm7 ; xmm6=(41 51 61 71) + movaps xmm3, xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4, xmm2 ; xmm4=(42 52 62 72) + unpckhps2 xmm3, xmm2 ; xmm3=(43 53 63 73) + + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4 + movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_FLOAT_MULT_TYPE ; quantptr + add edi, 4*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; FAST_FLOAT *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/4 ; ctr + alignx 16, 7 +.rowloop: + + ; -- Even part + + movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)] + + movaps xmm4, xmm0 + movaps xmm5, xmm1 + subps xmm0, xmm2 ; xmm0=tmp11 + subps xmm1, xmm3 + addps xmm4, xmm2 ; xmm4=tmp10 + addps xmm5, xmm3 ; xmm5=tmp13 + + mulps xmm1, [GOTOFF(ebx,PD_1_414)] + subps xmm1, xmm5 ; xmm1=tmp12 + + movaps xmm6, xmm4 + movaps xmm7, xmm0 + subps xmm4, xmm5 ; xmm4=tmp3 + subps xmm0, xmm1 ; xmm0=tmp2 + addps xmm6, xmm5 ; xmm6=tmp0 + addps xmm7, xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)] + + movaps xmm4, xmm2 + movaps xmm0, xmm5 + addps xmm2, xmm1 ; xmm2=z11 + addps xmm5, xmm3 ; xmm5=z13 + subps xmm4, xmm1 ; xmm4=z12 + subps xmm0, xmm3 ; xmm0=z10 + + movaps xmm1, xmm2 + subps xmm2, xmm5 + addps xmm1, xmm5 ; xmm1=tmp7 + + mulps xmm2, [GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 + + movaps xmm3, xmm0 + addps xmm0, xmm4 + mulps xmm0, [GOTOFF(ebx,PD_1_847)] ; xmm0=z5 + mulps xmm3, [GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) + mulps xmm4, [GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) + addps xmm3, xmm0 ; xmm3=tmp12 + subps xmm4, xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3, xmm1 ; xmm3=tmp6 + movaps xmm5, xmm6 + movaps xmm0, xmm7 + addps xmm6, xmm1 ; xmm6=data0=(00 10 20 30) + addps xmm7, xmm3 ; xmm7=data1=(01 11 21 31) + subps xmm5, xmm1 ; xmm5=data7=(07 17 27 37) + subps xmm0, xmm3 ; xmm0=data6=(06 16 26 36) + subps xmm2, xmm3 ; xmm2=tmp5 + + movaps xmm1, [GOTOFF(ebx,PD_0_125)] ; xmm1=[PD_0_125] + + mulps xmm6, xmm1 ; descale(1/8) + mulps xmm7, xmm1 ; descale(1/8) + mulps xmm5, xmm1 ; descale(1/8) + mulps xmm0, xmm1 ; descale(1/8) + + movhlps xmm3, xmm6 + movhlps xmm1, xmm7 + cvtps2pi mm0, xmm6 ; round to int32, mm0=data0L=(00 10) + cvtps2pi mm1, xmm7 ; round to int32, mm1=data1L=(01 11) + cvtps2pi mm2, xmm3 ; round to int32, mm2=data0H=(20 30) + cvtps2pi mm3, xmm1 ; round to int32, mm3=data1H=(21 31) + packssdw mm0, mm2 ; mm0=data0=(00 10 20 30) + packssdw mm1, mm3 ; mm1=data1=(01 11 21 31) + + movhlps xmm6, xmm5 + movhlps xmm7, xmm0 + cvtps2pi mm4, xmm5 ; round to int32, mm4=data7L=(07 17) + cvtps2pi mm5, xmm0 ; round to int32, mm5=data6L=(06 16) + cvtps2pi mm6, xmm6 ; round to int32, mm6=data7H=(27 37) + cvtps2pi mm7, xmm7 ; round to int32, mm7=data6H=(26 36) + packssdw mm4, mm6 ; mm4=data7=(07 17 27 37) + packssdw mm5, mm7 ; mm5=data6=(06 16 26 36) + + packsswb mm0, mm5 ; mm0=(00 10 20 30 06 16 26 36) + packsswb mm1, mm4 ; mm1=(01 11 21 31 07 17 27 37) + + movaps xmm3, XMMWORD [wk(0)] ; xmm3=tmp2 + movaps xmm1, XMMWORD [wk(1)] ; xmm1=tmp3 + + movaps xmm6, [GOTOFF(ebx,PD_0_125)] ; xmm6=[PD_0_125] + + addps xmm4, xmm2 ; xmm4=tmp4 + movaps xmm5, xmm3 + movaps xmm0, xmm1 + addps xmm3, xmm2 ; xmm3=data2=(02 12 22 32) + addps xmm1, xmm4 ; xmm1=data4=(04 14 24 34) + subps xmm5, xmm2 ; xmm5=data5=(05 15 25 35) + subps xmm0, xmm4 ; xmm0=data3=(03 13 23 33) + + mulps xmm3, xmm6 ; descale(1/8) + mulps xmm1, xmm6 ; descale(1/8) + mulps xmm5, xmm6 ; descale(1/8) + mulps xmm0, xmm6 ; descale(1/8) + + movhlps xmm7, xmm3 + movhlps xmm2, xmm1 + cvtps2pi mm2, xmm3 ; round to int32, mm2=data2L=(02 12) + cvtps2pi mm3, xmm1 ; round to int32, mm3=data4L=(04 14) + cvtps2pi mm6, xmm7 ; round to int32, mm6=data2H=(22 32) + cvtps2pi mm7, xmm2 ; round to int32, mm7=data4H=(24 34) + packssdw mm2, mm6 ; mm2=data2=(02 12 22 32) + packssdw mm3, mm7 ; mm3=data4=(04 14 24 34) + + movhlps xmm4, xmm5 + movhlps xmm6, xmm0 + cvtps2pi mm5, xmm5 ; round to int32, mm5=data5L=(05 15) + cvtps2pi mm4, xmm0 ; round to int32, mm4=data3L=(03 13) + cvtps2pi mm6, xmm4 ; round to int32, mm6=data5H=(25 35) + cvtps2pi mm7, xmm6 ; round to int32, mm7=data3H=(23 33) + packssdw mm5, mm6 ; mm5=data5=(05 15 25 35) + packssdw mm4, mm7 ; mm4=data3=(03 13 23 33) + + movq mm6, [GOTOFF(ebx,PB_CENTERJSAMP)] ; mm6=[PB_CENTERJSAMP] + + packsswb mm2, mm3 ; mm2=(02 12 22 32 04 14 24 34) + packsswb mm4, mm5 ; mm4=(03 13 23 33 05 15 25 35) + + paddb mm0, mm6 + paddb mm1, mm6 + paddb mm2, mm6 + paddb mm4, mm6 + + movq mm7, mm0 ; transpose coefficients(phase 1) + punpcklbw mm0, mm1 ; mm0=(00 01 10 11 20 21 30 31) + punpckhbw mm7, mm1 ; mm7=(06 07 16 17 26 27 36 37) + movq mm3, mm2 ; transpose coefficients(phase 1) + punpcklbw mm2, mm4 ; mm2=(02 03 12 13 22 23 32 33) + punpckhbw mm3, mm4 ; mm3=(04 05 14 15 24 25 34 35) + + movq mm5, mm0 ; transpose coefficients(phase 2) + punpcklwd mm0, mm2 ; mm0=(00 01 02 03 10 11 12 13) + punpckhwd mm5, mm2 ; mm5=(20 21 22 23 30 31 32 33) + movq mm6, mm3 ; transpose coefficients(phase 2) + punpcklwd mm3, mm7 ; mm3=(04 05 06 07 14 15 16 17) + punpckhwd mm6, mm7 ; mm6=(24 25 26 27 34 35 36 37) + + movq mm1, mm0 ; transpose coefficients(phase 3) + punpckldq mm0, mm3 ; mm0=(00 01 02 03 04 05 06 07) + punpckhdq mm1, mm3 ; mm1=(10 11 12 13 14 15 16 17) + movq mm4, mm5 ; transpose coefficients(phase 3) + punpckldq mm5, mm6 ; mm5=(20 21 22 23 24 25 26 27) + punpckhdq mm4, mm6 ; mm4=(30 31 32 33 34 35 36 37) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1 + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4 + + poppic ebx ; restore GOT address + + add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr + add edi, byte 4*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jidctflt-sse2.asm b/simd/i386/jidctflt-sse2.asm new file mode 100644 index 0000000..9de7139 --- /dev/null +++ b/simd/i386/jidctflt-sse2.asm @@ -0,0 +1,499 @@ +; +; jidctflt.asm - floating-point IDCT (SSE & SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the inverse DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jidctflt.c; see the jidctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1, %2, 0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1, %2, 0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_idct_float_sse2) + +EXTN(jconst_idct_float_sse2): + +PD_1_414 times 4 dd 1.414213562373095048801689 +PD_1_847 times 4 dd 1.847759065022573512256366 +PD_1_082 times 4 dd 1.082392200292393968799446 +PD_M2_613 times 4 dd -2.613125929752753055713286 +PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_float_sse2(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b) + 8 ; void *dct_table +%define coef_block(b) (b) + 12 ; JCOEFPTR coef_block +%define output_buf(b) (b) + 16 ; JSAMPARRAY output_buf +%define output_col(b) (b) + 20 ; JDIMENSION output_col + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0) - DCTSIZE2 * SIZEOF_FAST_FLOAT + ; FAST_FLOAT workspace[DCTSIZE2] + + align 32 + GLOBAL_FUNCTION(jsimd_idct_float_sse2) + +EXTN(jsimd_idct_float_sse2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; FAST_FLOAT *wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16, 7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movq xmm1, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq xmm2, XMM_MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq xmm4, XMM_MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq xmm5, XMM_MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq xmm6, XMM_MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + movq xmm7, XMM_MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por xmm1, xmm2 + por xmm3, xmm4 + por xmm5, xmm6 + por xmm1, xmm3 + por xmm5, xmm7 + por xmm1, xmm5 + packsswb xmm1, xmm1 + movd eax, xmm1 + test eax, eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq xmm0, XMM_MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + + punpcklwd xmm0, xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + psrad xmm0, (DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) + cvtdq2ps xmm0, xmm0 ; xmm0=in0=(00 01 02 03) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm1, xmm0 + movaps xmm2, xmm0 + movaps xmm3, xmm0 + + shufps xmm0, xmm0, 0x00 ; xmm0=(00 00 00 00) + shufps xmm1, xmm1, 0x55 ; xmm1=(01 01 01 01) + shufps xmm2, xmm2, 0xAA ; xmm2=(02 02 02 02) + shufps xmm3, xmm3, 0xFF ; xmm3=(03 03 03 03) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 + jmp near .nextcolumn + alignx 16, 7 +%endif +.columnDCT: + + ; -- Even part + + movq xmm0, XMM_MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq xmm1, XMM_MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq xmm2, XMM_MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + punpcklwd xmm0, xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpcklwd xmm1, xmm1 ; xmm1=(20 20 21 21 22 22 23 23) + psrad xmm0, (DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) + psrad xmm1, (DWORD_BIT-WORD_BIT) ; xmm1=in2=(20 21 22 23) + cvtdq2ps xmm0, xmm0 ; xmm0=in0=(00 01 02 03) + cvtdq2ps xmm1, xmm1 ; xmm1=in2=(20 21 22 23) + + punpcklwd xmm2, xmm2 ; xmm2=(40 40 41 41 42 42 43 43) + punpcklwd xmm3, xmm3 ; xmm3=(60 60 61 61 62 62 63 63) + psrad xmm2, (DWORD_BIT-WORD_BIT) ; xmm2=in4=(40 41 42 43) + psrad xmm3, (DWORD_BIT-WORD_BIT) ; xmm3=in6=(60 61 62 63) + cvtdq2ps xmm2, xmm2 ; xmm2=in4=(40 41 42 43) + cvtdq2ps xmm3, xmm3 ; xmm3=in6=(60 61 62 63) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4, xmm0 + movaps xmm5, xmm1 + subps xmm0, xmm2 ; xmm0=tmp11 + subps xmm1, xmm3 + addps xmm4, xmm2 ; xmm4=tmp10 + addps xmm5, xmm3 ; xmm5=tmp13 + + mulps xmm1, [GOTOFF(ebx,PD_1_414)] + subps xmm1, xmm5 ; xmm1=tmp12 + + movaps xmm6, xmm4 + movaps xmm7, xmm0 + subps xmm4, xmm5 ; xmm4=tmp3 + subps xmm0, xmm1 ; xmm0=tmp2 + addps xmm6, xmm5 ; xmm6=tmp0 + addps xmm7, xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movq xmm2, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq xmm5, XMM_MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq xmm1, XMM_MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + punpcklwd xmm2, xmm2 ; xmm2=(10 10 11 11 12 12 13 13) + punpcklwd xmm3, xmm3 ; xmm3=(30 30 31 31 32 32 33 33) + psrad xmm2, (DWORD_BIT-WORD_BIT) ; xmm2=in1=(10 11 12 13) + psrad xmm3, (DWORD_BIT-WORD_BIT) ; xmm3=in3=(30 31 32 33) + cvtdq2ps xmm2, xmm2 ; xmm2=in1=(10 11 12 13) + cvtdq2ps xmm3, xmm3 ; xmm3=in3=(30 31 32 33) + + punpcklwd xmm5, xmm5 ; xmm5=(50 50 51 51 52 52 53 53) + punpcklwd xmm1, xmm1 ; xmm1=(70 70 71 71 72 72 73 73) + psrad xmm5, (DWORD_BIT-WORD_BIT) ; xmm5=in5=(50 51 52 53) + psrad xmm1, (DWORD_BIT-WORD_BIT) ; xmm1=in7=(70 71 72 73) + cvtdq2ps xmm5, xmm5 ; xmm5=in5=(50 51 52 53) + cvtdq2ps xmm1, xmm1 ; xmm1=in7=(70 71 72 73) + + mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4, xmm2 + movaps xmm0, xmm5 + addps xmm2, xmm1 ; xmm2=z11 + addps xmm5, xmm3 ; xmm5=z13 + subps xmm4, xmm1 ; xmm4=z12 + subps xmm0, xmm3 ; xmm0=z10 + + movaps xmm1, xmm2 + subps xmm2, xmm5 + addps xmm1, xmm5 ; xmm1=tmp7 + + mulps xmm2, [GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 + + movaps xmm3, xmm0 + addps xmm0, xmm4 + mulps xmm0, [GOTOFF(ebx,PD_1_847)] ; xmm0=z5 + mulps xmm3, [GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) + mulps xmm4, [GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) + addps xmm3, xmm0 ; xmm3=tmp12 + subps xmm4, xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3, xmm1 ; xmm3=tmp6 + movaps xmm5, xmm6 + movaps xmm0, xmm7 + addps xmm6, xmm1 ; xmm6=data0=(00 01 02 03) + addps xmm7, xmm3 ; xmm7=data1=(10 11 12 13) + subps xmm5, xmm1 ; xmm5=data7=(70 71 72 73) + subps xmm0, xmm3 ; xmm0=data6=(60 61 62 63) + subps xmm2, xmm3 ; xmm2=tmp5 + + movaps xmm1, xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6, xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm1, xmm7 ; xmm1=(02 12 03 13) + movaps xmm3, xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0, xmm5 ; xmm0=(60 70 61 71) + unpckhps xmm3, xmm5 ; xmm3=(62 72 63 73) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movaps xmm5, XMMWORD [wk(1)] ; xmm5=tmp3 + + movaps XMMWORD [wk(0)], xmm0 ; wk(0)=(60 70 61 71) + movaps XMMWORD [wk(1)], xmm3 ; wk(1)=(62 72 63 73) + + addps xmm4, xmm2 ; xmm4=tmp4 + movaps xmm0, xmm7 + movaps xmm3, xmm5 + addps xmm7, xmm2 ; xmm7=data2=(20 21 22 23) + addps xmm5, xmm4 ; xmm5=data4=(40 41 42 43) + subps xmm0, xmm2 ; xmm0=data5=(50 51 52 53) + subps xmm3, xmm4 ; xmm3=data3=(30 31 32 33) + + movaps xmm2, xmm7 ; transpose coefficients(phase 1) + unpcklps xmm7, xmm3 ; xmm7=(20 30 21 31) + unpckhps xmm2, xmm3 ; xmm2=(22 32 23 33) + movaps xmm4, xmm5 ; transpose coefficients(phase 1) + unpcklps xmm5, xmm0 ; xmm5=(40 50 41 51) + unpckhps xmm4, xmm0 ; xmm4=(42 52 43 53) + + movaps xmm3, xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6, xmm7 ; xmm6=(00 10 20 30) + unpckhps2 xmm3, xmm7 ; xmm3=(01 11 21 31) + movaps xmm0, xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1, xmm2 ; xmm1=(02 12 22 32) + unpckhps2 xmm0, xmm2 ; xmm0=(03 13 23 33) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71) + movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + + movaps xmm6, xmm5 ; transpose coefficients(phase 2) + unpcklps2 xmm5, xmm7 ; xmm5=(40 50 60 70) + unpckhps2 xmm6, xmm7 ; xmm6=(41 51 61 71) + movaps xmm3, xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4, xmm2 ; xmm4=(42 52 62 72) + unpckhps2 xmm3, xmm2 ; xmm3=(43 53 63 73) + + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4 + movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_FLOAT_MULT_TYPE ; quantptr + add edi, 4*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; FAST_FLOAT *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/4 ; ctr + alignx 16, 7 +.rowloop: + + ; -- Even part + + movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)] + + movaps xmm4, xmm0 + movaps xmm5, xmm1 + subps xmm0, xmm2 ; xmm0=tmp11 + subps xmm1, xmm3 + addps xmm4, xmm2 ; xmm4=tmp10 + addps xmm5, xmm3 ; xmm5=tmp13 + + mulps xmm1, [GOTOFF(ebx,PD_1_414)] + subps xmm1, xmm5 ; xmm1=tmp12 + + movaps xmm6, xmm4 + movaps xmm7, xmm0 + subps xmm4, xmm5 ; xmm4=tmp3 + subps xmm0, xmm1 ; xmm0=tmp2 + addps xmm6, xmm5 ; xmm6=tmp0 + addps xmm7, xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)] + + movaps xmm4, xmm2 + movaps xmm0, xmm5 + addps xmm2, xmm1 ; xmm2=z11 + addps xmm5, xmm3 ; xmm5=z13 + subps xmm4, xmm1 ; xmm4=z12 + subps xmm0, xmm3 ; xmm0=z10 + + movaps xmm1, xmm2 + subps xmm2, xmm5 + addps xmm1, xmm5 ; xmm1=tmp7 + + mulps xmm2, [GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 + + movaps xmm3, xmm0 + addps xmm0, xmm4 + mulps xmm0, [GOTOFF(ebx,PD_1_847)] ; xmm0=z5 + mulps xmm3, [GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) + mulps xmm4, [GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) + addps xmm3, xmm0 ; xmm3=tmp12 + subps xmm4, xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3, xmm1 ; xmm3=tmp6 + movaps xmm5, xmm6 + movaps xmm0, xmm7 + addps xmm6, xmm1 ; xmm6=data0=(00 10 20 30) + addps xmm7, xmm3 ; xmm7=data1=(01 11 21 31) + subps xmm5, xmm1 ; xmm5=data7=(07 17 27 37) + subps xmm0, xmm3 ; xmm0=data6=(06 16 26 36) + subps xmm2, xmm3 ; xmm2=tmp5 + + movaps xmm1, [GOTOFF(ebx,PD_RNDINT_MAGIC)] ; xmm1=[PD_RNDINT_MAGIC] + pcmpeqd xmm3, xmm3 + psrld xmm3, WORD_BIT ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..} + + addps xmm6, xmm1 ; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **) + addps xmm7, xmm1 ; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **) + addps xmm0, xmm1 ; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **) + addps xmm5, xmm1 ; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **) + + pand xmm6, xmm3 ; xmm6=(00 -- 10 -- 20 -- 30 --) + pslld xmm7, WORD_BIT ; xmm7=(-- 01 -- 11 -- 21 -- 31) + pand xmm0, xmm3 ; xmm0=(06 -- 16 -- 26 -- 36 --) + pslld xmm5, WORD_BIT ; xmm5=(-- 07 -- 17 -- 27 -- 37) + por xmm6, xmm7 ; xmm6=(00 01 10 11 20 21 30 31) + por xmm0, xmm5 ; xmm0=(06 07 16 17 26 27 36 37) + + movaps xmm1, XMMWORD [wk(0)] ; xmm1=tmp2 + movaps xmm3, XMMWORD [wk(1)] ; xmm3=tmp3 + + addps xmm4, xmm2 ; xmm4=tmp4 + movaps xmm7, xmm1 + movaps xmm5, xmm3 + addps xmm1, xmm2 ; xmm1=data2=(02 12 22 32) + addps xmm3, xmm4 ; xmm3=data4=(04 14 24 34) + subps xmm7, xmm2 ; xmm7=data5=(05 15 25 35) + subps xmm5, xmm4 ; xmm5=data3=(03 13 23 33) + + movaps xmm2, [GOTOFF(ebx,PD_RNDINT_MAGIC)] ; xmm2=[PD_RNDINT_MAGIC] + pcmpeqd xmm4, xmm4 + psrld xmm4, WORD_BIT ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..} + + addps xmm3, xmm2 ; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **) + addps xmm7, xmm2 ; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **) + addps xmm1, xmm2 ; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **) + addps xmm5, xmm2 ; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **) + + pand xmm3, xmm4 ; xmm3=(04 -- 14 -- 24 -- 34 --) + pslld xmm7, WORD_BIT ; xmm7=(-- 05 -- 15 -- 25 -- 35) + pand xmm1, xmm4 ; xmm1=(02 -- 12 -- 22 -- 32 --) + pslld xmm5, WORD_BIT ; xmm5=(-- 03 -- 13 -- 23 -- 33) + por xmm3, xmm7 ; xmm3=(04 05 14 15 24 25 34 35) + por xmm1, xmm5 ; xmm1=(02 03 12 13 22 23 32 33) + + movdqa xmm2, [GOTOFF(ebx,PB_CENTERJSAMP)] ; xmm2=[PB_CENTERJSAMP] + + packsswb xmm6, xmm3 ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35) + packsswb xmm1, xmm0 ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37) + paddb xmm6, xmm2 + paddb xmm1, xmm2 + + movdqa xmm4, xmm6 ; transpose coefficients(phase 2) + punpcklwd xmm6, xmm1 ; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4, xmm1 ; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + + movdqa xmm7, xmm6 ; transpose coefficients(phase 3) + punpckldq xmm6, xmm4 ; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm7, xmm4 ; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + + pshufd xmm5, xmm6, 0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm3, xmm7, 0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm7 + mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5 + movq XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm3 + + poppic ebx ; restore GOT address + + add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr + add edi, byte 4*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jidctfst-mmx.asm b/simd/i386/jidctfst-mmx.asm new file mode 100644 index 0000000..d3e8a5d --- /dev/null +++ b/simd/i386/jidctfst-mmx.asm @@ -0,0 +1,501 @@ +; +; jidctfst.asm - fast integer IDCT (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the inverse DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jidctfst.c; see the jidctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. +%define PASS1_BITS 2 + +%if IFAST_SCALE_BITS != PASS1_BITS +%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'." +%endif + +%if CONST_BITS == 8 +F_1_082 equ 277 ; FIX(1.082392200) +F_1_414 equ 362 ; FIX(1.414213562) +F_1_847 equ 473 ; FIX(1.847759065) +F_2_613 equ 669 ; FIX(2.613125930) +F_1_613 equ (F_2_613 - 256) ; FIX(2.613125930) - FIX(1) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_1_082 equ DESCALE(1162209775, 30 - CONST_BITS) ; FIX(1.082392200) +F_1_414 equ DESCALE(1518500249, 30 - CONST_BITS) ; FIX(1.414213562) +F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065) +F_2_613 equ DESCALE(2805822602, 30 - CONST_BITS) ; FIX(2.613125930) +F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 32 + GLOBAL_DATA(jconst_idct_ifast_mmx) + +EXTN(jconst_idct_ifast_mmx): + +PW_F1414 times 4 dw F_1_414 << CONST_SHIFT +PW_F1847 times 4 dw F_1_847 << CONST_SHIFT +PW_MF1613 times 4 dw -F_1_613 << CONST_SHIFT +PW_F1082 times 4 dw F_1_082 << CONST_SHIFT +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_ifast_mmx(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b) + 8 ; jpeg_component_info *compptr +%define coef_block(b) (b) + 12 ; JCOEFPTR coef_block +%define output_buf(b) (b) + 16 ; JSAMPARRAY output_buf +%define output_col(b) (b) + 20 ; JDIMENSION output_col + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_MMWORD + ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0) - DCTSIZE2 * SIZEOF_JCOEF + ; JCOEF workspace[DCTSIZE2] + + align 32 + GLOBAL_FUNCTION(jsimd_idct_ifast_mmx) + +EXTN(jsimd_idct_ifast_mmx): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; JCOEF *wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16, 7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm1, mm0 + packsswb mm1, mm1 + movd eax, mm1 + test eax, eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movq mm2, mm0 ; mm0=in0=(00 01 02 03) + punpcklwd mm0, mm0 ; mm0=(00 00 01 01) + punpckhwd mm2, mm2 ; mm2=(02 02 03 03) + + movq mm1, mm0 + punpckldq mm0, mm0 ; mm0=(00 00 00 00) + punpckhdq mm1, mm1 ; mm1=(01 01 01 01) + movq mm3, mm2 + punpckldq mm2, mm2 ; mm2=(02 02 02 02) + punpckhdq mm3, mm3 ; mm3=(03 03 03 03) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3 + jmp near .nextcolumn + alignx 16, 7 +%endif +.columnDCT: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movq mm4, mm0 + movq mm5, mm1 + psubw mm0, mm2 ; mm0=tmp11 + psubw mm1, mm3 + paddw mm4, mm2 ; mm4=tmp10 + paddw mm5, mm3 ; mm5=tmp13 + + psllw mm1, PRE_MULTIPLY_SCALE_BITS + pmulhw mm1, [GOTOFF(ebx,PW_F1414)] + psubw mm1, mm5 ; mm1=tmp12 + + movq mm6, mm4 + movq mm7, mm0 + psubw mm4, mm5 ; mm4=tmp3 + psubw mm0, mm1 ; mm0=tmp2 + paddw mm6, mm5 ; mm6=tmp0 + paddw mm7, mm1 ; mm7=tmp1 + + movq MMWORD [wk(1)], mm4 ; wk(1)=tmp3 + movq MMWORD [wk(0)], mm0 ; wk(0)=tmp2 + + ; -- Odd part + + movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movq mm4, mm2 + movq mm0, mm5 + psubw mm2, mm1 ; mm2=z12 + psubw mm5, mm3 ; mm5=z10 + paddw mm4, mm1 ; mm4=z11 + paddw mm0, mm3 ; mm0=z13 + + movq mm1, mm5 ; mm1=z10(unscaled) + psllw mm2, PRE_MULTIPLY_SCALE_BITS + psllw mm5, PRE_MULTIPLY_SCALE_BITS + + movq mm3, mm4 + psubw mm4, mm0 + paddw mm3, mm0 ; mm3=tmp7 + + psllw mm4, PRE_MULTIPLY_SCALE_BITS + pmulhw mm4, [GOTOFF(ebx,PW_F1414)] ; mm4=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movq mm0, mm5 + paddw mm5, mm2 + pmulhw mm5, [GOTOFF(ebx,PW_F1847)] ; mm5=z5 + pmulhw mm0, [GOTOFF(ebx,PW_MF1613)] + pmulhw mm2, [GOTOFF(ebx,PW_F1082)] + psubw mm0, mm1 + psubw mm2, mm5 ; mm2=tmp10 + paddw mm0, mm5 ; mm0=tmp12 + + ; -- Final output stage + + psubw mm0, mm3 ; mm0=tmp6 + movq mm1, mm6 + movq mm5, mm7 + paddw mm6, mm3 ; mm6=data0=(00 01 02 03) + paddw mm7, mm0 ; mm7=data1=(10 11 12 13) + psubw mm1, mm3 ; mm1=data7=(70 71 72 73) + psubw mm5, mm0 ; mm5=data6=(60 61 62 63) + psubw mm4, mm0 ; mm4=tmp5 + + movq mm3, mm6 ; transpose coefficients(phase 1) + punpcklwd mm6, mm7 ; mm6=(00 10 01 11) + punpckhwd mm3, mm7 ; mm3=(02 12 03 13) + movq mm0, mm5 ; transpose coefficients(phase 1) + punpcklwd mm5, mm1 ; mm5=(60 70 61 71) + punpckhwd mm0, mm1 ; mm0=(62 72 63 73) + + movq mm7, MMWORD [wk(0)] ; mm7=tmp2 + movq mm1, MMWORD [wk(1)] ; mm1=tmp3 + + movq MMWORD [wk(0)], mm5 ; wk(0)=(60 70 61 71) + movq MMWORD [wk(1)], mm0 ; wk(1)=(62 72 63 73) + + paddw mm2, mm4 ; mm2=tmp4 + movq mm5, mm7 + movq mm0, mm1 + paddw mm7, mm4 ; mm7=data2=(20 21 22 23) + paddw mm1, mm2 ; mm1=data4=(40 41 42 43) + psubw mm5, mm4 ; mm5=data5=(50 51 52 53) + psubw mm0, mm2 ; mm0=data3=(30 31 32 33) + + movq mm4, mm7 ; transpose coefficients(phase 1) + punpcklwd mm7, mm0 ; mm7=(20 30 21 31) + punpckhwd mm4, mm0 ; mm4=(22 32 23 33) + movq mm2, mm1 ; transpose coefficients(phase 1) + punpcklwd mm1, mm5 ; mm1=(40 50 41 51) + punpckhwd mm2, mm5 ; mm2=(42 52 43 53) + + movq mm0, mm6 ; transpose coefficients(phase 2) + punpckldq mm6, mm7 ; mm6=(00 10 20 30) + punpckhdq mm0, mm7 ; mm0=(01 11 21 31) + movq mm5, mm3 ; transpose coefficients(phase 2) + punpckldq mm3, mm4 ; mm3=(02 12 22 32) + punpckhdq mm5, mm4 ; mm5=(03 13 23 33) + + movq mm7, MMWORD [wk(0)] ; mm7=(60 70 61 71) + movq mm4, MMWORD [wk(1)] ; mm4=(62 72 63 73) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm3 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5 + + movq mm6, mm1 ; transpose coefficients(phase 2) + punpckldq mm1, mm7 ; mm1=(40 50 60 70) + punpckhdq mm6, mm7 ; mm6=(41 51 61 71) + movq mm0, mm2 ; transpose coefficients(phase 2) + punpckldq mm2, mm4 ; mm2=(42 52 62 72) + punpckhdq mm0, mm4 ; mm0=(43 53 63 73) + + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm0 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_IFAST_MULT_TYPE ; quantptr + add edi, byte 4*DCTSIZE*SIZEOF_JCOEF ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; JCOEF *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/4 ; ctr + alignx 16, 7 +.rowloop: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + movq mm4, mm0 + movq mm5, mm1 + psubw mm0, mm2 ; mm0=tmp11 + psubw mm1, mm3 + paddw mm4, mm2 ; mm4=tmp10 + paddw mm5, mm3 ; mm5=tmp13 + + psllw mm1, PRE_MULTIPLY_SCALE_BITS + pmulhw mm1, [GOTOFF(ebx,PW_F1414)] + psubw mm1, mm5 ; mm1=tmp12 + + movq mm6, mm4 + movq mm7, mm0 + psubw mm4, mm5 ; mm4=tmp3 + psubw mm0, mm1 ; mm0=tmp2 + paddw mm6, mm5 ; mm6=tmp0 + paddw mm7, mm1 ; mm7=tmp1 + + movq MMWORD [wk(1)], mm4 ; wk(1)=tmp3 + movq MMWORD [wk(0)], mm0 ; wk(0)=tmp2 + + ; -- Odd part + + movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + movq mm4, mm2 + movq mm0, mm5 + psubw mm2, mm1 ; mm2=z12 + psubw mm5, mm3 ; mm5=z10 + paddw mm4, mm1 ; mm4=z11 + paddw mm0, mm3 ; mm0=z13 + + movq mm1, mm5 ; mm1=z10(unscaled) + psllw mm2, PRE_MULTIPLY_SCALE_BITS + psllw mm5, PRE_MULTIPLY_SCALE_BITS + + movq mm3, mm4 + psubw mm4, mm0 + paddw mm3, mm0 ; mm3=tmp7 + + psllw mm4, PRE_MULTIPLY_SCALE_BITS + pmulhw mm4, [GOTOFF(ebx,PW_F1414)] ; mm4=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movq mm0, mm5 + paddw mm5, mm2 + pmulhw mm5, [GOTOFF(ebx,PW_F1847)] ; mm5=z5 + pmulhw mm0, [GOTOFF(ebx,PW_MF1613)] + pmulhw mm2, [GOTOFF(ebx,PW_F1082)] + psubw mm0, mm1 + psubw mm2, mm5 ; mm2=tmp10 + paddw mm0, mm5 ; mm0=tmp12 + + ; -- Final output stage + + psubw mm0, mm3 ; mm0=tmp6 + movq mm1, mm6 + movq mm5, mm7 + paddw mm6, mm3 ; mm6=data0=(00 10 20 30) + paddw mm7, mm0 ; mm7=data1=(01 11 21 31) + psraw mm6, (PASS1_BITS+3) ; descale + psraw mm7, (PASS1_BITS+3) ; descale + psubw mm1, mm3 ; mm1=data7=(07 17 27 37) + psubw mm5, mm0 ; mm5=data6=(06 16 26 36) + psraw mm1, (PASS1_BITS+3) ; descale + psraw mm5, (PASS1_BITS+3) ; descale + psubw mm4, mm0 ; mm4=tmp5 + + packsswb mm6, mm5 ; mm6=(00 10 20 30 06 16 26 36) + packsswb mm7, mm1 ; mm7=(01 11 21 31 07 17 27 37) + + movq mm3, MMWORD [wk(0)] ; mm3=tmp2 + movq mm0, MMWORD [wk(1)] ; mm0=tmp3 + + paddw mm2, mm4 ; mm2=tmp4 + movq mm5, mm3 + movq mm1, mm0 + paddw mm3, mm4 ; mm3=data2=(02 12 22 32) + paddw mm0, mm2 ; mm0=data4=(04 14 24 34) + psraw mm3, (PASS1_BITS+3) ; descale + psraw mm0, (PASS1_BITS+3) ; descale + psubw mm5, mm4 ; mm5=data5=(05 15 25 35) + psubw mm1, mm2 ; mm1=data3=(03 13 23 33) + psraw mm5, (PASS1_BITS+3) ; descale + psraw mm1, (PASS1_BITS+3) ; descale + + movq mm4, [GOTOFF(ebx,PB_CENTERJSAMP)] ; mm4=[PB_CENTERJSAMP] + + packsswb mm3, mm0 ; mm3=(02 12 22 32 04 14 24 34) + packsswb mm1, mm5 ; mm1=(03 13 23 33 05 15 25 35) + + paddb mm6, mm4 + paddb mm7, mm4 + paddb mm3, mm4 + paddb mm1, mm4 + + movq mm2, mm6 ; transpose coefficients(phase 1) + punpcklbw mm6, mm7 ; mm6=(00 01 10 11 20 21 30 31) + punpckhbw mm2, mm7 ; mm2=(06 07 16 17 26 27 36 37) + movq mm0, mm3 ; transpose coefficients(phase 1) + punpcklbw mm3, mm1 ; mm3=(02 03 12 13 22 23 32 33) + punpckhbw mm0, mm1 ; mm0=(04 05 14 15 24 25 34 35) + + movq mm5, mm6 ; transpose coefficients(phase 2) + punpcklwd mm6, mm3 ; mm6=(00 01 02 03 10 11 12 13) + punpckhwd mm5, mm3 ; mm5=(20 21 22 23 30 31 32 33) + movq mm4, mm0 ; transpose coefficients(phase 2) + punpcklwd mm0, mm2 ; mm0=(04 05 06 07 14 15 16 17) + punpckhwd mm4, mm2 ; mm4=(24 25 26 27 34 35 36 37) + + movq mm7, mm6 ; transpose coefficients(phase 3) + punpckldq mm6, mm0 ; mm6=(00 01 02 03 04 05 06 07) + punpckhdq mm7, mm0 ; mm7=(10 11 12 13 14 15 16 17) + movq mm1, mm5 ; transpose coefficients(phase 3) + punpckldq mm5, mm4 ; mm5=(20 21 22 23 24 25 26 27) + punpckhdq mm1, mm4 ; mm1=(30 31 32 33 34 35 36 37) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7 + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1 + + poppic ebx ; restore GOT address + + add esi, byte 4*SIZEOF_JCOEF ; wsptr + add edi, byte 4*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jidctfst-sse2.asm b/simd/i386/jidctfst-sse2.asm new file mode 100644 index 0000000..83bc414 --- /dev/null +++ b/simd/i386/jidctfst-sse2.asm @@ -0,0 +1,503 @@ +; +; jidctfst.asm - fast integer IDCT (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the inverse DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jidctfst.c; see the jidctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. +%define PASS1_BITS 2 + +%if IFAST_SCALE_BITS != PASS1_BITS +%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'." +%endif + +%if CONST_BITS == 8 +F_1_082 equ 277 ; FIX(1.082392200) +F_1_414 equ 362 ; FIX(1.414213562) +F_1_847 equ 473 ; FIX(1.847759065) +F_2_613 equ 669 ; FIX(2.613125930) +F_1_613 equ (F_2_613 - 256) ; FIX(2.613125930) - FIX(1) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_1_082 equ DESCALE(1162209775, 30 - CONST_BITS) ; FIX(1.082392200) +F_1_414 equ DESCALE(1518500249, 30 - CONST_BITS) ; FIX(1.414213562) +F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065) +F_2_613 equ DESCALE(2805822602, 30 - CONST_BITS) ; FIX(2.613125930) +F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 32 + GLOBAL_DATA(jconst_idct_ifast_sse2) + +EXTN(jconst_idct_ifast_sse2): + +PW_F1414 times 8 dw F_1_414 << CONST_SHIFT +PW_F1847 times 8 dw F_1_847 << CONST_SHIFT +PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT +PW_F1082 times 8 dw F_1_082 << CONST_SHIFT +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_ifast_sse2(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b) + 8 ; jpeg_component_info *compptr +%define coef_block(b) (b) + 12 ; JCOEFPTR coef_block +%define output_buf(b) (b) + 16 ; JSAMPARRAY output_buf +%define output_col(b) (b) + 20 ; JDIMENSION output_col + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 32 + GLOBAL_FUNCTION(jsimd_idct_ifast_sse2) + +EXTN(jsimd_idct_ifast_sse2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2 + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por xmm1, xmm0 + packsswb xmm1, xmm1 + packsswb xmm1, xmm1 + movd eax, xmm1 + test eax, eax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm7, xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm0, xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpckhwd xmm7, xmm7 ; xmm7=(04 04 05 05 06 06 07 07) + + pshufd xmm6, xmm0, 0x00 ; xmm6=col0=(00 00 00 00 00 00 00 00) + pshufd xmm2, xmm0, 0x55 ; xmm2=col1=(01 01 01 01 01 01 01 01) + pshufd xmm5, xmm0, 0xAA ; xmm5=col2=(02 02 02 02 02 02 02 02) + pshufd xmm0, xmm0, 0xFF ; xmm0=col3=(03 03 03 03 03 03 03 03) + pshufd xmm1, xmm7, 0x00 ; xmm1=col4=(04 04 04 04 04 04 04 04) + pshufd xmm4, xmm7, 0x55 ; xmm4=col5=(05 05 05 05 05 05 05 05) + pshufd xmm3, xmm7, 0xAA ; xmm3=col6=(06 06 06 06 06 06 06 06) + pshufd xmm7, xmm7, 0xFF ; xmm7=col7=(07 07 07 07 07 07 07 07) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=col1 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=col3 + jmp near .column_end + alignx 16, 7 +%endif +.columnDCT: + + ; -- Even part + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movdqa xmm4, xmm0 + movdqa xmm5, xmm1 + psubw xmm0, xmm2 ; xmm0=tmp11 + psubw xmm1, xmm3 + paddw xmm4, xmm2 ; xmm4=tmp10 + paddw xmm5, xmm3 ; xmm5=tmp13 + + psllw xmm1, PRE_MULTIPLY_SCALE_BITS + pmulhw xmm1, [GOTOFF(ebx,PW_F1414)] + psubw xmm1, xmm5 ; xmm1=tmp12 + + movdqa xmm6, xmm4 + movdqa xmm7, xmm0 + psubw xmm4, xmm5 ; xmm4=tmp3 + psubw xmm0, xmm1 ; xmm0=tmp2 + paddw xmm6, xmm5 ; xmm6=tmp0 + paddw xmm7, xmm1 ; xmm7=tmp1 + + movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=tmp3 + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=tmp2 + + ; -- Odd part + + movdqa xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)] + movdqa xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)] + + movdqa xmm4, xmm2 + movdqa xmm0, xmm5 + psubw xmm2, xmm1 ; xmm2=z12 + psubw xmm5, xmm3 ; xmm5=z10 + paddw xmm4, xmm1 ; xmm4=z11 + paddw xmm0, xmm3 ; xmm0=z13 + + movdqa xmm1, xmm5 ; xmm1=z10(unscaled) + psllw xmm2, PRE_MULTIPLY_SCALE_BITS + psllw xmm5, PRE_MULTIPLY_SCALE_BITS + + movdqa xmm3, xmm4 + psubw xmm4, xmm0 + paddw xmm3, xmm0 ; xmm3=tmp7 + + psllw xmm4, PRE_MULTIPLY_SCALE_BITS + pmulhw xmm4, [GOTOFF(ebx,PW_F1414)] ; xmm4=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movdqa xmm0, xmm5 + paddw xmm5, xmm2 + pmulhw xmm5, [GOTOFF(ebx,PW_F1847)] ; xmm5=z5 + pmulhw xmm0, [GOTOFF(ebx,PW_MF1613)] + pmulhw xmm2, [GOTOFF(ebx,PW_F1082)] + psubw xmm0, xmm1 + psubw xmm2, xmm5 ; xmm2=tmp10 + paddw xmm0, xmm5 ; xmm0=tmp12 + + ; -- Final output stage + + psubw xmm0, xmm3 ; xmm0=tmp6 + movdqa xmm1, xmm6 + movdqa xmm5, xmm7 + paddw xmm6, xmm3 ; xmm6=data0=(00 01 02 03 04 05 06 07) + paddw xmm7, xmm0 ; xmm7=data1=(10 11 12 13 14 15 16 17) + psubw xmm1, xmm3 ; xmm1=data7=(70 71 72 73 74 75 76 77) + psubw xmm5, xmm0 ; xmm5=data6=(60 61 62 63 64 65 66 67) + psubw xmm4, xmm0 ; xmm4=tmp5 + + movdqa xmm3, xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6, xmm7 ; xmm6=(00 10 01 11 02 12 03 13) + punpckhwd xmm3, xmm7 ; xmm3=(04 14 05 15 06 16 07 17) + movdqa xmm0, xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5, xmm1 ; xmm5=(60 70 61 71 62 72 63 73) + punpckhwd xmm0, xmm1 ; xmm0=(64 74 65 75 66 76 67 77) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp3 + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(60 70 61 71 62 72 63 73) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(64 74 65 75 66 76 67 77) + + paddw xmm2, xmm4 ; xmm2=tmp4 + movdqa xmm5, xmm7 + movdqa xmm0, xmm1 + paddw xmm7, xmm4 ; xmm7=data2=(20 21 22 23 24 25 26 27) + paddw xmm1, xmm2 ; xmm1=data4=(40 41 42 43 44 45 46 47) + psubw xmm5, xmm4 ; xmm5=data5=(50 51 52 53 54 55 56 57) + psubw xmm0, xmm2 ; xmm0=data3=(30 31 32 33 34 35 36 37) + + movdqa xmm4, xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7, xmm0 ; xmm7=(20 30 21 31 22 32 23 33) + punpckhwd xmm4, xmm0 ; xmm4=(24 34 25 35 26 36 27 37) + movdqa xmm2, xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1, xmm5 ; xmm1=(40 50 41 51 42 52 43 53) + punpckhwd xmm2, xmm5 ; xmm2=(44 54 45 55 46 56 47 57) + + movdqa xmm0, xmm3 ; transpose coefficients(phase 2) + punpckldq xmm3, xmm4 ; xmm3=(04 14 24 34 05 15 25 35) + punpckhdq xmm0, xmm4 ; xmm0=(06 16 26 36 07 17 27 37) + movdqa xmm5, xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6, xmm7 ; xmm6=(00 10 20 30 01 11 21 31) + punpckhdq xmm5, xmm7 ; xmm5=(02 12 22 32 03 13 23 33) + + movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(60 70 61 71 62 72 63 73) + movdqa xmm7, XMMWORD [wk(1)] ; xmm7=(64 74 65 75 66 76 67 77) + + movdqa XMMWORD [wk(0)], xmm3 ; wk(0)=(04 14 24 34 05 15 25 35) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(06 16 26 36 07 17 27 37) + + movdqa xmm3, xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1, xmm4 ; xmm1=(40 50 60 70 41 51 61 71) + punpckhdq xmm3, xmm4 ; xmm3=(42 52 62 72 43 53 63 73) + movdqa xmm0, xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2, xmm7 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm0, xmm7 ; xmm0=(46 56 66 76 47 57 67 77) + + movdqa xmm4, xmm6 ; transpose coefficients(phase 3) + punpcklqdq xmm6, xmm1 ; xmm6=col0=(00 10 20 30 40 50 60 70) + punpckhqdq xmm4, xmm1 ; xmm4=col1=(01 11 21 31 41 51 61 71) + movdqa xmm7, xmm5 ; transpose coefficients(phase 3) + punpcklqdq xmm5, xmm3 ; xmm5=col2=(02 12 22 32 42 52 62 72) + punpckhqdq xmm7, xmm3 ; xmm7=col3=(03 13 23 33 43 53 63 73) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(04 14 24 34 05 15 25 35) + movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(06 16 26 36 07 17 27 37) + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=col1 + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=col3 + + movdqa xmm4, xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1, xmm2 ; xmm1=col4=(04 14 24 34 44 54 64 74) + punpckhqdq xmm4, xmm2 ; xmm4=col5=(05 15 25 35 45 55 65 75) + movdqa xmm7, xmm3 ; transpose coefficients(phase 3) + punpcklqdq xmm3, xmm0 ; xmm3=col6=(06 16 26 36 46 56 66 76) + punpckhqdq xmm7, xmm0 ; xmm7=col7=(07 17 27 37 47 57 67 77) +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + ; -- Even part + + ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6 + + movdqa xmm2, xmm6 + movdqa xmm0, xmm5 + psubw xmm6, xmm1 ; xmm6=tmp11 + psubw xmm5, xmm3 + paddw xmm2, xmm1 ; xmm2=tmp10 + paddw xmm0, xmm3 ; xmm0=tmp13 + + psllw xmm5, PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5, [GOTOFF(ebx,PW_F1414)] + psubw xmm5, xmm0 ; xmm5=tmp12 + + movdqa xmm1, xmm2 + movdqa xmm3, xmm6 + psubw xmm2, xmm0 ; xmm2=tmp3 + psubw xmm6, xmm5 ; xmm6=tmp2 + paddw xmm1, xmm0 ; xmm1=tmp0 + paddw xmm3, xmm5 ; xmm3=tmp1 + + movdqa xmm0, XMMWORD [wk(0)] ; xmm0=col1 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=col3 + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp3 + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp2 + + ; -- Odd part + + ; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7 + + movdqa xmm2, xmm0 + movdqa xmm6, xmm4 + psubw xmm0, xmm7 ; xmm0=z12 + psubw xmm4, xmm5 ; xmm4=z10 + paddw xmm2, xmm7 ; xmm2=z11 + paddw xmm6, xmm5 ; xmm6=z13 + + movdqa xmm7, xmm4 ; xmm7=z10(unscaled) + psllw xmm0, PRE_MULTIPLY_SCALE_BITS + psllw xmm4, PRE_MULTIPLY_SCALE_BITS + + movdqa xmm5, xmm2 + psubw xmm2, xmm6 + paddw xmm5, xmm6 ; xmm5=tmp7 + + psllw xmm2, PRE_MULTIPLY_SCALE_BITS + pmulhw xmm2, [GOTOFF(ebx,PW_F1414)] ; xmm2=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movdqa xmm6, xmm4 + paddw xmm4, xmm0 + pmulhw xmm4, [GOTOFF(ebx,PW_F1847)] ; xmm4=z5 + pmulhw xmm6, [GOTOFF(ebx,PW_MF1613)] + pmulhw xmm0, [GOTOFF(ebx,PW_F1082)] + psubw xmm6, xmm7 + psubw xmm0, xmm4 ; xmm0=tmp10 + paddw xmm6, xmm4 ; xmm6=tmp12 + + ; -- Final output stage + + psubw xmm6, xmm5 ; xmm6=tmp6 + movdqa xmm7, xmm1 + movdqa xmm4, xmm3 + paddw xmm1, xmm5 ; xmm1=data0=(00 10 20 30 40 50 60 70) + paddw xmm3, xmm6 ; xmm3=data1=(01 11 21 31 41 51 61 71) + psraw xmm1, (PASS1_BITS+3) ; descale + psraw xmm3, (PASS1_BITS+3) ; descale + psubw xmm7, xmm5 ; xmm7=data7=(07 17 27 37 47 57 67 77) + psubw xmm4, xmm6 ; xmm4=data6=(06 16 26 36 46 56 66 76) + psraw xmm7, (PASS1_BITS+3) ; descale + psraw xmm4, (PASS1_BITS+3) ; descale + psubw xmm2, xmm6 ; xmm2=tmp5 + + packsswb xmm1, xmm4 ; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + packsswb xmm3, xmm7 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp2 + movdqa xmm6, XMMWORD [wk(0)] ; xmm6=tmp3 + + paddw xmm0, xmm2 ; xmm0=tmp4 + movdqa xmm4, xmm5 + movdqa xmm7, xmm6 + paddw xmm5, xmm2 ; xmm5=data2=(02 12 22 32 42 52 62 72) + paddw xmm6, xmm0 ; xmm6=data4=(04 14 24 34 44 54 64 74) + psraw xmm5, (PASS1_BITS+3) ; descale + psraw xmm6, (PASS1_BITS+3) ; descale + psubw xmm4, xmm2 ; xmm4=data5=(05 15 25 35 45 55 65 75) + psubw xmm7, xmm0 ; xmm7=data3=(03 13 23 33 43 53 63 73) + psraw xmm4, (PASS1_BITS+3) ; descale + psraw xmm7, (PASS1_BITS+3) ; descale + + movdqa xmm2, [GOTOFF(ebx,PB_CENTERJSAMP)] ; xmm2=[PB_CENTERJSAMP] + + packsswb xmm5, xmm6 ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) + packsswb xmm7, xmm4 ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) + + paddb xmm1, xmm2 + paddb xmm3, xmm2 + paddb xmm5, xmm2 + paddb xmm7, xmm2 + + movdqa xmm0, xmm1 ; transpose coefficients(phase 1) + punpcklbw xmm1, xmm3 ; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) + punpckhbw xmm0, xmm3 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) + movdqa xmm6, xmm5 ; transpose coefficients(phase 1) + punpcklbw xmm5, xmm7 ; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) + punpckhbw xmm6, xmm7 ; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) + + movdqa xmm4, xmm1 ; transpose coefficients(phase 2) + punpcklwd xmm1, xmm5 ; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4, xmm5 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) + movdqa xmm2, xmm6 ; transpose coefficients(phase 2) + punpcklwd xmm6, xmm0 ; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + punpckhwd xmm2, xmm0 ; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) + + movdqa xmm3, xmm1 ; transpose coefficients(phase 3) + punpckldq xmm1, xmm6 ; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm3, xmm6 ; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + movdqa xmm7, xmm4 ; transpose coefficients(phase 3) + punpckldq xmm4, xmm2 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + punpckhdq xmm7, xmm2 ; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + pshufd xmm5, xmm1, 0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm0, xmm3, 0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + pshufd xmm6, xmm4, 0x4E ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) + pshufd xmm2, xmm7, 0x4E ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm1 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3 + mov edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm7 + + mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0 + mov edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm2 + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jidctint-avx2.asm b/simd/i386/jidctint-avx2.asm new file mode 100644 index 0000000..b3b7b14 --- /dev/null +++ b/simd/i386/jidctint-avx2.asm @@ -0,0 +1,455 @@ +; +; jidctint.asm - accurate integer IDCT (AVX2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, 2018, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; inverse DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jidctint.c; see the jidctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS - PASS1_BITS) +%define DESCALE_P2 (CONST_BITS + PASS1_BITS + 3) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_0_298 equ DESCALE( 320652955, 30 - CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276, 30 - CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887, 30 - CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413, 30 - CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111, 30 - CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813, 30 - CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267, 30 - CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350, 30 - CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- +; In-place 8x8x16-bit inverse matrix transpose using AVX2 instructions +; %1-%4: Input/output registers +; %5-%8: Temp registers + +%macro dotranspose 8 + ; %5=(00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71) + ; %6=(03 13 23 33 43 53 63 73 02 12 22 32 42 52 62 72) + ; %7=(04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75) + ; %8=(07 17 27 37 47 57 67 77 06 16 26 36 46 56 66 76) + + vpermq %5, %1, 0xD8 + vpermq %6, %2, 0x72 + vpermq %7, %3, 0xD8 + vpermq %8, %4, 0x72 + ; transpose coefficients(phase 1) + ; %5=(00 10 20 30 01 11 21 31 40 50 60 70 41 51 61 71) + ; %6=(02 12 22 32 03 13 23 33 42 52 62 72 43 53 63 73) + ; %7=(04 14 24 34 05 15 25 35 44 54 64 74 45 55 65 75) + ; %8=(06 16 26 36 07 17 27 37 46 56 66 76 47 57 67 77) + + vpunpcklwd %1, %5, %6 + vpunpckhwd %2, %5, %6 + vpunpcklwd %3, %7, %8 + vpunpckhwd %4, %7, %8 + ; transpose coefficients(phase 2) + ; %1=(00 02 10 12 20 22 30 32 40 42 50 52 60 62 70 72) + ; %2=(01 03 11 13 21 23 31 33 41 43 51 53 61 63 71 73) + ; %3=(04 06 14 16 24 26 34 36 44 46 54 56 64 66 74 76) + ; %4=(05 07 15 17 25 27 35 37 45 47 55 57 65 67 75 77) + + vpunpcklwd %5, %1, %2 + vpunpcklwd %6, %3, %4 + vpunpckhwd %7, %1, %2 + vpunpckhwd %8, %3, %4 + ; transpose coefficients(phase 3) + ; %5=(00 01 02 03 10 11 12 13 40 41 42 43 50 51 52 53) + ; %6=(04 05 06 07 14 15 16 17 44 45 46 47 54 55 56 57) + ; %7=(20 21 22 23 30 31 32 33 60 61 62 63 70 71 72 73) + ; %8=(24 25 26 27 34 35 36 37 64 65 66 67 74 75 76 77) + + vpunpcklqdq %1, %5, %6 + vpunpckhqdq %2, %5, %6 + vpunpcklqdq %3, %7, %8 + vpunpckhqdq %4, %7, %8 + ; transpose coefficients(phase 4) + ; %1=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47) + ; %2=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57) + ; %3=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67) + ; %4=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77) +%endmacro + +; -------------------------------------------------------------------------- +; In-place 8x8x16-bit slow integer inverse DCT using AVX2 instructions +; %1-%4: Input/output registers +; %5-%12: Temp registers +; %9: Pass (1 or 2) + +%macro dodct 13 + ; -- Even part + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + vperm2i128 %6, %3, %3, 0x01 ; %6=in6_2 + vpunpcklwd %5, %3, %6 ; %5=in26_62L + vpunpckhwd %6, %3, %6 ; %6=in26_62H + vpmaddwd %5, %5, [GOTOFF(ebx,PW_F130_F054_MF130_F054)] ; %5=tmp3_2L + vpmaddwd %6, %6, [GOTOFF(ebx,PW_F130_F054_MF130_F054)] ; %6=tmp3_2H + + vperm2i128 %7, %1, %1, 0x01 ; %7=in4_0 + vpsignw %1, %1, [GOTOFF(ebx,PW_1_NEG1)] + vpaddw %7, %7, %1 ; %7=(in0+in4)_(in0-in4) + + vpxor %1, %1, %1 + vpunpcklwd %8, %1, %7 ; %8=tmp0_1L + vpunpckhwd %1, %1, %7 ; %1=tmp0_1H + vpsrad %8, %8, (16-CONST_BITS) ; vpsrad %8,16 & vpslld %8,CONST_BITS + vpsrad %1, %1, (16-CONST_BITS) ; vpsrad %1,16 & vpslld %1,CONST_BITS + + vpsubd %3, %8, %5 + vmovdqu %11, %3 ; %11=tmp0_1L-tmp3_2L=tmp13_12L + vpaddd %3, %8, %5 + vmovdqu %9, %3 ; %9=tmp0_1L+tmp3_2L=tmp10_11L + vpsubd %3, %1, %6 + vmovdqu %12, %3 ; %12=tmp0_1H-tmp3_2H=tmp13_12H + vpaddd %3, %1, %6 + vmovdqu %10, %3 ; %10=tmp0_1H+tmp3_2H=tmp10_11H + + ; -- Odd part + + vpaddw %1, %4, %2 ; %1=in7_5+in3_1=z3_4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + vperm2i128 %8, %1, %1, 0x01 ; %8=z4_3 + vpunpcklwd %7, %1, %8 ; %7=z34_43L + vpunpckhwd %8, %1, %8 ; %8=z34_43H + vpmaddwd %7, %7, [GOTOFF(ebx,PW_MF078_F117_F078_F117)] ; %7=z3_4L + vpmaddwd %8, %8, [GOTOFF(ebx,PW_MF078_F117_F078_F117)] ; %8=z3_4H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + vperm2i128 %2, %2, %2, 0x01 ; %2=in1_3 + vpunpcklwd %3, %4, %2 ; %3=in71_53L + vpunpckhwd %4, %4, %2 ; %4=in71_53H + + vpmaddwd %5, %3, [GOTOFF(ebx,PW_MF060_MF089_MF050_MF256)] ; %5=tmp0_1L + vpmaddwd %6, %4, [GOTOFF(ebx,PW_MF060_MF089_MF050_MF256)] ; %6=tmp0_1H + vpaddd %5, %5, %7 ; %5=tmp0_1L+z3_4L=tmp0_1L + vpaddd %6, %6, %8 ; %6=tmp0_1H+z3_4H=tmp0_1H + + vpmaddwd %3, %3, [GOTOFF(ebx,PW_MF089_F060_MF256_F050)] ; %3=tmp3_2L + vpmaddwd %4, %4, [GOTOFF(ebx,PW_MF089_F060_MF256_F050)] ; %4=tmp3_2H + vperm2i128 %7, %7, %7, 0x01 ; %7=z4_3L + vperm2i128 %8, %8, %8, 0x01 ; %8=z4_3H + vpaddd %7, %3, %7 ; %7=tmp3_2L+z4_3L=tmp3_2L + vpaddd %8, %4, %8 ; %8=tmp3_2H+z4_3H=tmp3_2H + + ; -- Final output stage + + vmovdqu %3, %9 + vmovdqu %4, %10 + + vpaddd %1, %3, %7 ; %1=tmp10_11L+tmp3_2L=data0_1L + vpaddd %2, %4, %8 ; %2=tmp10_11H+tmp3_2H=data0_1H + vpaddd %1, %1, [GOTOFF(ebx,PD_DESCALE_P %+ %13)] + vpaddd %2, %2, [GOTOFF(ebx,PD_DESCALE_P %+ %13)] + vpsrad %1, %1, DESCALE_P %+ %13 + vpsrad %2, %2, DESCALE_P %+ %13 + vpackssdw %1, %1, %2 ; %1=data0_1 + + vpsubd %3, %3, %7 ; %3=tmp10_11L-tmp3_2L=data7_6L + vpsubd %4, %4, %8 ; %4=tmp10_11H-tmp3_2H=data7_6H + vpaddd %3, %3, [GOTOFF(ebx,PD_DESCALE_P %+ %13)] + vpaddd %4, %4, [GOTOFF(ebx,PD_DESCALE_P %+ %13)] + vpsrad %3, %3, DESCALE_P %+ %13 + vpsrad %4, %4, DESCALE_P %+ %13 + vpackssdw %4, %3, %4 ; %4=data7_6 + + vmovdqu %7, %11 + vmovdqu %8, %12 + + vpaddd %2, %7, %5 ; %7=tmp13_12L+tmp0_1L=data3_2L + vpaddd %3, %8, %6 ; %8=tmp13_12H+tmp0_1H=data3_2H + vpaddd %2, %2, [GOTOFF(ebx,PD_DESCALE_P %+ %13)] + vpaddd %3, %3, [GOTOFF(ebx,PD_DESCALE_P %+ %13)] + vpsrad %2, %2, DESCALE_P %+ %13 + vpsrad %3, %3, DESCALE_P %+ %13 + vpackssdw %2, %2, %3 ; %2=data3_2 + + vpsubd %3, %7, %5 ; %7=tmp13_12L-tmp0_1L=data4_5L + vpsubd %6, %8, %6 ; %8=tmp13_12H-tmp0_1H=data4_5H + vpaddd %3, %3, [GOTOFF(ebx,PD_DESCALE_P %+ %13)] + vpaddd %6, %6, [GOTOFF(ebx,PD_DESCALE_P %+ %13)] + vpsrad %3, %3, DESCALE_P %+ %13 + vpsrad %6, %6, DESCALE_P %+ %13 + vpackssdw %3, %3, %6 ; %3=data4_5 +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_idct_islow_avx2) + +EXTN(jconst_idct_islow_avx2): + +PW_F130_F054_MF130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541 + times 4 dw (F_0_541 - F_1_847), F_0_541 +PW_MF078_F117_F078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175 + times 4 dw (F_1_175 - F_0_390), F_1_175 +PW_MF060_MF089_MF050_MF256 times 4 dw (F_0_298 - F_0_899), -F_0_899 + times 4 dw (F_2_053 - F_2_562), -F_2_562 +PW_MF089_F060_MF256_F050 times 4 dw -F_0_899, (F_1_501 - F_0_899) + times 4 dw -F_2_562, (F_3_072 - F_2_562) +PD_DESCALE_P1 times 8 dd 1 << (DESCALE_P1 - 1) +PD_DESCALE_P2 times 8 dd 1 << (DESCALE_P2 - 1) +PB_CENTERJSAMP times 32 db CENTERJSAMPLE +PW_1_NEG1 times 8 dw 1 + times 8 dw -1 + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_islow_avx2(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b) + 8 ; jpeg_component_info *compptr +%define coef_block(b) (b) + 12 ; JCOEFPTR coef_block +%define output_buf(b) (b) + 16 ; JSAMPARRAY output_buf +%define output_col(b) (b) + 20 ; JDIMENSION output_col + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_YMMWORD + ; ymmword wk[WK_NUM] +%define WK_NUM 4 + + align 32 + GLOBAL_FUNCTION(jsimd_idct_islow_avx2) + +EXTN(jsimd_idct_islow_avx2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_ISLOW_AVX2 + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + vpor xmm0, xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + vpor xmm1, xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + vpor xmm0, xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + vpor xmm1, xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + vpor xmm0, xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + vpor xmm1, xmm1, xmm0 + vpacksswb xmm1, xmm1, xmm1 + vpacksswb xmm1, xmm1, xmm1 + movd eax, xmm1 + test eax, eax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm5, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + vpmullw xmm5, xmm5, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + vpsllw xmm5, xmm5, PASS1_BITS + + vpunpcklwd xmm4, xmm5, xmm5 ; xmm4=(00 00 01 01 02 02 03 03) + vpunpckhwd xmm5, xmm5, xmm5 ; xmm5=(04 04 05 05 06 06 07 07) + vinserti128 ymm4, ymm4, xmm5, 1 + + vpshufd ymm0, ymm4, 0x00 ; ymm0=col0_4=(00 00 00 00 00 00 00 00 04 04 04 04 04 04 04 04) + vpshufd ymm1, ymm4, 0x55 ; ymm1=col1_5=(01 01 01 01 01 01 01 01 05 05 05 05 05 05 05 05) + vpshufd ymm2, ymm4, 0xAA ; ymm2=col2_6=(02 02 02 02 02 02 02 02 06 06 06 06 06 06 06 06) + vpshufd ymm3, ymm4, 0xFF ; ymm3=col3_7=(03 03 03 03 03 03 03 03 07 07 07 07 07 07 07 07) + + jmp near .column_end + alignx 16, 7 +%endif +.columnDCT: + + vmovdqu ymm4, YMMWORD [YMMBLOCK(0,0,esi,SIZEOF_JCOEF)] ; ymm4=in0_1 + vmovdqu ymm5, YMMWORD [YMMBLOCK(2,0,esi,SIZEOF_JCOEF)] ; ymm5=in2_3 + vmovdqu ymm6, YMMWORD [YMMBLOCK(4,0,esi,SIZEOF_JCOEF)] ; ymm6=in4_5 + vmovdqu ymm7, YMMWORD [YMMBLOCK(6,0,esi,SIZEOF_JCOEF)] ; ymm7=in6_7 + vpmullw ymm4, ymm4, YMMWORD [YMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + vpmullw ymm5, ymm5, YMMWORD [YMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + vpmullw ymm6, ymm6, YMMWORD [YMMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + vpmullw ymm7, ymm7, YMMWORD [YMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + vperm2i128 ymm0, ymm4, ymm6, 0x20 ; ymm0=in0_4 + vperm2i128 ymm1, ymm5, ymm4, 0x31 ; ymm1=in3_1 + vperm2i128 ymm2, ymm5, ymm7, 0x20 ; ymm2=in2_6 + vperm2i128 ymm3, ymm7, ymm6, 0x31 ; ymm3=in7_5 + + dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 1 + ; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm3=data7_6 + + dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7 + ; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm3=data3_7 + +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows. + + mov eax, [original_ebp] + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + vperm2i128 ymm4, ymm3, ymm1, 0x31 ; ymm3=in7_5 + vperm2i128 ymm1, ymm3, ymm1, 0x20 ; ymm1=in3_1 + + dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 2 + ; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm4=data7_6 + + dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7 + ; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm4=data3_7 + + vpacksswb ymm0, ymm0, ymm1 ; ymm0=data01_45 + vpacksswb ymm1, ymm2, ymm4 ; ymm1=data23_67 + vpaddb ymm0, ymm0, [GOTOFF(ebx,PB_CENTERJSAMP)] + vpaddb ymm1, ymm1, [GOTOFF(ebx,PB_CENTERJSAMP)] + + vextracti128 xmm6, ymm1, 1 ; xmm3=data67 + vextracti128 xmm4, ymm0, 1 ; xmm2=data45 + vextracti128 xmm2, ymm1, 0 ; xmm1=data23 + vextracti128 xmm0, ymm0, 0 ; xmm0=data01 + + vpshufd xmm1, xmm0, 0x4E ; xmm1=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + vpshufd xmm3, xmm2, 0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + vpshufd xmm5, xmm4, 0x4E ; xmm5=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) + vpshufd xmm7, xmm6, 0x4E ; xmm7=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) + + vzeroupper + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm0 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm1 + + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm2 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3 + + mov edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov esi, JSAMPROW [edi+5*SIZEOF_JSAMPROW] ; (JSAMPLE *) + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm5 + + mov edx, JSAMPROW [edi+6*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW] ; (JSAMPLE *) + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm7 + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jidctint-mmx.asm b/simd/i386/jidctint-mmx.asm new file mode 100644 index 0000000..6ca6d06 --- /dev/null +++ b/simd/i386/jidctint-mmx.asm @@ -0,0 +1,853 @@ +; +; jidctint.asm - accurate integer IDCT (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; inverse DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jidctint.c; see the jidctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS - PASS1_BITS) +%define DESCALE_P2 (CONST_BITS + PASS1_BITS + 3) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_0_298 equ DESCALE( 320652955, 30 - CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276, 30 - CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887, 30 - CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413, 30 - CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111, 30 - CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813, 30 - CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267, 30 - CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350, 30 - CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_idct_islow_mmx) + +EXTN(jconst_idct_islow_mmx): + +PW_F130_F054 times 2 dw (F_0_541 + F_0_765), F_0_541 +PW_F054_MF130 times 2 dw F_0_541, (F_0_541 - F_1_847) +PW_MF078_F117 times 2 dw (F_1_175 - F_1_961), F_1_175 +PW_F117_F078 times 2 dw F_1_175, (F_1_175 - F_0_390) +PW_MF060_MF089 times 2 dw (F_0_298 - F_0_899), -F_0_899 +PW_MF089_F060 times 2 dw -F_0_899, (F_1_501 - F_0_899) +PW_MF050_MF256 times 2 dw (F_2_053 - F_2_562), -F_2_562 +PW_MF256_F050 times 2 dw -F_2_562, (F_3_072 - F_2_562) +PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1 - 1) +PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2 - 1) +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_islow_mmx(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b) + 8 ; jpeg_component_info *compptr +%define coef_block(b) (b) + 12 ; JCOEFPTR coef_block +%define output_buf(b) (b) + 16 ; JSAMPARRAY output_buf +%define output_col(b) (b) + 20 ; JDIMENSION output_col + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_MMWORD + ; mmword wk[WK_NUM] +%define WK_NUM 12 +%define workspace wk(0) - DCTSIZE2 * SIZEOF_JCOEF + ; JCOEF workspace[DCTSIZE2] + + align 32 + GLOBAL_FUNCTION(jsimd_idct_islow_mmx) + +EXTN(jsimd_idct_islow_mmx): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [workspace] + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; JCOEF *wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16, 7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm1, mm0 + packsswb mm1, mm1 + movd eax, mm1 + test eax, eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw mm0, PASS1_BITS + + movq mm2, mm0 ; mm0=in0=(00 01 02 03) + punpcklwd mm0, mm0 ; mm0=(00 00 01 01) + punpckhwd mm2, mm2 ; mm2=(02 02 03 03) + + movq mm1, mm0 + punpckldq mm0, mm0 ; mm0=(00 00 00 00) + punpckhdq mm1, mm1 ; mm1=(01 01 01 01) + movq mm3, mm2 + punpckldq mm2, mm2 ; mm2=(02 02 02 02) + punpckhdq mm3, mm3 ; mm3=(03 03 03 03) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3 + jmp near .nextcolumn + alignx 16, 7 +%endif +.columnDCT: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movq mm4, mm1 ; mm1=in2=z2 + movq mm5, mm1 + punpcklwd mm4, mm3 ; mm3=in6=z3 + punpckhwd mm5, mm3 + movq mm1, mm4 + movq mm3, mm5 + pmaddwd mm4, [GOTOFF(ebx,PW_F130_F054)] ; mm4=tmp3L + pmaddwd mm5, [GOTOFF(ebx,PW_F130_F054)] ; mm5=tmp3H + pmaddwd mm1, [GOTOFF(ebx,PW_F054_MF130)] ; mm1=tmp2L + pmaddwd mm3, [GOTOFF(ebx,PW_F054_MF130)] ; mm3=tmp2H + + movq mm6, mm0 + paddw mm0, mm2 ; mm0=in0+in4 + psubw mm6, mm2 ; mm6=in0-in4 + + pxor mm7, mm7 + pxor mm2, mm2 + punpcklwd mm7, mm0 ; mm7=tmp0L + punpckhwd mm2, mm0 ; mm2=tmp0H + psrad mm7, (16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS + psrad mm2, (16-CONST_BITS) ; psrad mm2,16 & pslld mm2,CONST_BITS + + movq mm0, mm7 + paddd mm7, mm4 ; mm7=tmp10L + psubd mm0, mm4 ; mm0=tmp13L + movq mm4, mm2 + paddd mm2, mm5 ; mm2=tmp10H + psubd mm4, mm5 ; mm4=tmp13H + + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp10L + movq MMWORD [wk(1)], mm2 ; wk(1)=tmp10H + movq MMWORD [wk(2)], mm0 ; wk(2)=tmp13L + movq MMWORD [wk(3)], mm4 ; wk(3)=tmp13H + + pxor mm5, mm5 + pxor mm7, mm7 + punpcklwd mm5, mm6 ; mm5=tmp1L + punpckhwd mm7, mm6 ; mm7=tmp1H + psrad mm5, (16-CONST_BITS) ; psrad mm5,16 & pslld mm5,CONST_BITS + psrad mm7, (16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS + + movq mm2, mm5 + paddd mm5, mm1 ; mm5=tmp11L + psubd mm2, mm1 ; mm2=tmp12L + movq mm0, mm7 + paddd mm7, mm3 ; mm7=tmp11H + psubd mm0, mm3 ; mm0=tmp12H + + movq MMWORD [wk(4)], mm5 ; wk(4)=tmp11L + movq MMWORD [wk(5)], mm7 ; wk(5)=tmp11H + movq MMWORD [wk(6)], mm2 ; wk(6)=tmp12L + movq MMWORD [wk(7)], mm0 ; wk(7)=tmp12H + + ; -- Odd part + + movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw mm4, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm6, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw mm1, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movq mm5, mm6 + movq mm7, mm4 + paddw mm5, mm3 ; mm5=z3 + paddw mm7, mm1 ; mm7=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movq mm2, mm5 + movq mm0, mm5 + punpcklwd mm2, mm7 + punpckhwd mm0, mm7 + movq mm5, mm2 + movq mm7, mm0 + pmaddwd mm2, [GOTOFF(ebx,PW_MF078_F117)] ; mm2=z3L + pmaddwd mm0, [GOTOFF(ebx,PW_MF078_F117)] ; mm0=z3H + pmaddwd mm5, [GOTOFF(ebx,PW_F117_F078)] ; mm5=z4L + pmaddwd mm7, [GOTOFF(ebx,PW_F117_F078)] ; mm7=z4H + + movq MMWORD [wk(10)], mm2 ; wk(10)=z3L + movq MMWORD [wk(11)], mm0 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movq mm2, mm3 + movq mm0, mm3 + punpcklwd mm2, mm4 + punpckhwd mm0, mm4 + movq mm3, mm2 + movq mm4, mm0 + pmaddwd mm2, [GOTOFF(ebx,PW_MF060_MF089)] ; mm2=tmp0L + pmaddwd mm0, [GOTOFF(ebx,PW_MF060_MF089)] ; mm0=tmp0H + pmaddwd mm3, [GOTOFF(ebx,PW_MF089_F060)] ; mm3=tmp3L + pmaddwd mm4, [GOTOFF(ebx,PW_MF089_F060)] ; mm4=tmp3H + + paddd mm2, MMWORD [wk(10)] ; mm2=tmp0L + paddd mm0, MMWORD [wk(11)] ; mm0=tmp0H + paddd mm3, mm5 ; mm3=tmp3L + paddd mm4, mm7 ; mm4=tmp3H + + movq MMWORD [wk(8)], mm2 ; wk(8)=tmp0L + movq MMWORD [wk(9)], mm0 ; wk(9)=tmp0H + + movq mm2, mm1 + movq mm0, mm1 + punpcklwd mm2, mm6 + punpckhwd mm0, mm6 + movq mm1, mm2 + movq mm6, mm0 + pmaddwd mm2, [GOTOFF(ebx,PW_MF050_MF256)] ; mm2=tmp1L + pmaddwd mm0, [GOTOFF(ebx,PW_MF050_MF256)] ; mm0=tmp1H + pmaddwd mm1, [GOTOFF(ebx,PW_MF256_F050)] ; mm1=tmp2L + pmaddwd mm6, [GOTOFF(ebx,PW_MF256_F050)] ; mm6=tmp2H + + paddd mm2, mm5 ; mm2=tmp1L + paddd mm0, mm7 ; mm0=tmp1H + paddd mm1, MMWORD [wk(10)] ; mm1=tmp2L + paddd mm6, MMWORD [wk(11)] ; mm6=tmp2H + + movq MMWORD [wk(10)], mm2 ; wk(10)=tmp1L + movq MMWORD [wk(11)], mm0 ; wk(11)=tmp1H + + ; -- Final output stage + + movq mm5, MMWORD [wk(0)] ; mm5=tmp10L + movq mm7, MMWORD [wk(1)] ; mm7=tmp10H + + movq mm2, mm5 + movq mm0, mm7 + paddd mm5, mm3 ; mm5=data0L + paddd mm7, mm4 ; mm7=data0H + psubd mm2, mm3 ; mm2=data7L + psubd mm0, mm4 ; mm0=data7H + + movq mm3, [GOTOFF(ebx,PD_DESCALE_P1)] ; mm3=[PD_DESCALE_P1] + + paddd mm5, mm3 + paddd mm7, mm3 + psrad mm5, DESCALE_P1 + psrad mm7, DESCALE_P1 + paddd mm2, mm3 + paddd mm0, mm3 + psrad mm2, DESCALE_P1 + psrad mm0, DESCALE_P1 + + packssdw mm5, mm7 ; mm5=data0=(00 01 02 03) + packssdw mm2, mm0 ; mm2=data7=(70 71 72 73) + + movq mm4, MMWORD [wk(4)] ; mm4=tmp11L + movq mm3, MMWORD [wk(5)] ; mm3=tmp11H + + movq mm7, mm4 + movq mm0, mm3 + paddd mm4, mm1 ; mm4=data1L + paddd mm3, mm6 ; mm3=data1H + psubd mm7, mm1 ; mm7=data6L + psubd mm0, mm6 ; mm0=data6H + + movq mm1, [GOTOFF(ebx,PD_DESCALE_P1)] ; mm1=[PD_DESCALE_P1] + + paddd mm4, mm1 + paddd mm3, mm1 + psrad mm4, DESCALE_P1 + psrad mm3, DESCALE_P1 + paddd mm7, mm1 + paddd mm0, mm1 + psrad mm7, DESCALE_P1 + psrad mm0, DESCALE_P1 + + packssdw mm4, mm3 ; mm4=data1=(10 11 12 13) + packssdw mm7, mm0 ; mm7=data6=(60 61 62 63) + + movq mm6, mm5 ; transpose coefficients(phase 1) + punpcklwd mm5, mm4 ; mm5=(00 10 01 11) + punpckhwd mm6, mm4 ; mm6=(02 12 03 13) + movq mm1, mm7 ; transpose coefficients(phase 1) + punpcklwd mm7, mm2 ; mm7=(60 70 61 71) + punpckhwd mm1, mm2 ; mm1=(62 72 63 73) + + movq mm3, MMWORD [wk(6)] ; mm3=tmp12L + movq mm0, MMWORD [wk(7)] ; mm0=tmp12H + movq mm4, MMWORD [wk(10)] ; mm4=tmp1L + movq mm2, MMWORD [wk(11)] ; mm2=tmp1H + + movq MMWORD [wk(0)], mm5 ; wk(0)=(00 10 01 11) + movq MMWORD [wk(1)], mm6 ; wk(1)=(02 12 03 13) + movq MMWORD [wk(4)], mm7 ; wk(4)=(60 70 61 71) + movq MMWORD [wk(5)], mm1 ; wk(5)=(62 72 63 73) + + movq mm5, mm3 + movq mm6, mm0 + paddd mm3, mm4 ; mm3=data2L + paddd mm0, mm2 ; mm0=data2H + psubd mm5, mm4 ; mm5=data5L + psubd mm6, mm2 ; mm6=data5H + + movq mm7, [GOTOFF(ebx,PD_DESCALE_P1)] ; mm7=[PD_DESCALE_P1] + + paddd mm3, mm7 + paddd mm0, mm7 + psrad mm3, DESCALE_P1 + psrad mm0, DESCALE_P1 + paddd mm5, mm7 + paddd mm6, mm7 + psrad mm5, DESCALE_P1 + psrad mm6, DESCALE_P1 + + packssdw mm3, mm0 ; mm3=data2=(20 21 22 23) + packssdw mm5, mm6 ; mm5=data5=(50 51 52 53) + + movq mm1, MMWORD [wk(2)] ; mm1=tmp13L + movq mm4, MMWORD [wk(3)] ; mm4=tmp13H + movq mm2, MMWORD [wk(8)] ; mm2=tmp0L + movq mm7, MMWORD [wk(9)] ; mm7=tmp0H + + movq mm0, mm1 + movq mm6, mm4 + paddd mm1, mm2 ; mm1=data3L + paddd mm4, mm7 ; mm4=data3H + psubd mm0, mm2 ; mm0=data4L + psubd mm6, mm7 ; mm6=data4H + + movq mm2, [GOTOFF(ebx,PD_DESCALE_P1)] ; mm2=[PD_DESCALE_P1] + + paddd mm1, mm2 + paddd mm4, mm2 + psrad mm1, DESCALE_P1 + psrad mm4, DESCALE_P1 + paddd mm0, mm2 + paddd mm6, mm2 + psrad mm0, DESCALE_P1 + psrad mm6, DESCALE_P1 + + packssdw mm1, mm4 ; mm1=data3=(30 31 32 33) + packssdw mm0, mm6 ; mm0=data4=(40 41 42 43) + + movq mm7, MMWORD [wk(0)] ; mm7=(00 10 01 11) + movq mm2, MMWORD [wk(1)] ; mm2=(02 12 03 13) + + movq mm4, mm3 ; transpose coefficients(phase 1) + punpcklwd mm3, mm1 ; mm3=(20 30 21 31) + punpckhwd mm4, mm1 ; mm4=(22 32 23 33) + movq mm6, mm0 ; transpose coefficients(phase 1) + punpcklwd mm0, mm5 ; mm0=(40 50 41 51) + punpckhwd mm6, mm5 ; mm6=(42 52 43 53) + + movq mm1, mm7 ; transpose coefficients(phase 2) + punpckldq mm7, mm3 ; mm7=(00 10 20 30) + punpckhdq mm1, mm3 ; mm1=(01 11 21 31) + movq mm5, mm2 ; transpose coefficients(phase 2) + punpckldq mm2, mm4 ; mm2=(02 12 22 32) + punpckhdq mm5, mm4 ; mm5=(03 13 23 33) + + movq mm3, MMWORD [wk(4)] ; mm3=(60 70 61 71) + movq mm4, MMWORD [wk(5)] ; mm4=(62 72 63 73) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm7 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5 + + movq mm7, mm0 ; transpose coefficients(phase 2) + punpckldq mm0, mm3 ; mm0=(40 50 60 70) + punpckhdq mm7, mm3 ; mm7=(41 51 61 71) + movq mm1, mm6 ; transpose coefficients(phase 2) + punpckldq mm6, mm4 ; mm6=(42 52 62 72) + punpckhdq mm1, mm4 ; mm1=(43 53 63 73) + + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm7 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm1 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_ISLOW_MULT_TYPE ; quantptr + add edi, byte 4*DCTSIZE*SIZEOF_JCOEF ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; JCOEF *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + mov ecx, DCTSIZE/4 ; ctr + alignx 16, 7 +.rowloop: + + ; -- Even part + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movq mm4, mm1 ; mm1=in2=z2 + movq mm5, mm1 + punpcklwd mm4, mm3 ; mm3=in6=z3 + punpckhwd mm5, mm3 + movq mm1, mm4 + movq mm3, mm5 + pmaddwd mm4, [GOTOFF(ebx,PW_F130_F054)] ; mm4=tmp3L + pmaddwd mm5, [GOTOFF(ebx,PW_F130_F054)] ; mm5=tmp3H + pmaddwd mm1, [GOTOFF(ebx,PW_F054_MF130)] ; mm1=tmp2L + pmaddwd mm3, [GOTOFF(ebx,PW_F054_MF130)] ; mm3=tmp2H + + movq mm6, mm0 + paddw mm0, mm2 ; mm0=in0+in4 + psubw mm6, mm2 ; mm6=in0-in4 + + pxor mm7, mm7 + pxor mm2, mm2 + punpcklwd mm7, mm0 ; mm7=tmp0L + punpckhwd mm2, mm0 ; mm2=tmp0H + psrad mm7, (16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS + psrad mm2, (16-CONST_BITS) ; psrad mm2,16 & pslld mm2,CONST_BITS + + movq mm0, mm7 + paddd mm7, mm4 ; mm7=tmp10L + psubd mm0, mm4 ; mm0=tmp13L + movq mm4, mm2 + paddd mm2, mm5 ; mm2=tmp10H + psubd mm4, mm5 ; mm4=tmp13H + + movq MMWORD [wk(0)], mm7 ; wk(0)=tmp10L + movq MMWORD [wk(1)], mm2 ; wk(1)=tmp10H + movq MMWORD [wk(2)], mm0 ; wk(2)=tmp13L + movq MMWORD [wk(3)], mm4 ; wk(3)=tmp13H + + pxor mm5, mm5 + pxor mm7, mm7 + punpcklwd mm5, mm6 ; mm5=tmp1L + punpckhwd mm7, mm6 ; mm7=tmp1H + psrad mm5, (16-CONST_BITS) ; psrad mm5,16 & pslld mm5,CONST_BITS + psrad mm7, (16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS + + movq mm2, mm5 + paddd mm5, mm1 ; mm5=tmp11L + psubd mm2, mm1 ; mm2=tmp12L + movq mm0, mm7 + paddd mm7, mm3 ; mm7=tmp11H + psubd mm0, mm3 ; mm0=tmp12H + + movq MMWORD [wk(4)], mm5 ; wk(4)=tmp11L + movq MMWORD [wk(5)], mm7 ; wk(5)=tmp11H + movq MMWORD [wk(6)], mm2 ; wk(6)=tmp12L + movq MMWORD [wk(7)], mm0 ; wk(7)=tmp12H + + ; -- Odd part + + movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + movq mm5, mm6 + movq mm7, mm4 + paddw mm5, mm3 ; mm5=z3 + paddw mm7, mm1 ; mm7=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movq mm2, mm5 + movq mm0, mm5 + punpcklwd mm2, mm7 + punpckhwd mm0, mm7 + movq mm5, mm2 + movq mm7, mm0 + pmaddwd mm2, [GOTOFF(ebx,PW_MF078_F117)] ; mm2=z3L + pmaddwd mm0, [GOTOFF(ebx,PW_MF078_F117)] ; mm0=z3H + pmaddwd mm5, [GOTOFF(ebx,PW_F117_F078)] ; mm5=z4L + pmaddwd mm7, [GOTOFF(ebx,PW_F117_F078)] ; mm7=z4H + + movq MMWORD [wk(10)], mm2 ; wk(10)=z3L + movq MMWORD [wk(11)], mm0 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movq mm2, mm3 + movq mm0, mm3 + punpcklwd mm2, mm4 + punpckhwd mm0, mm4 + movq mm3, mm2 + movq mm4, mm0 + pmaddwd mm2, [GOTOFF(ebx,PW_MF060_MF089)] ; mm2=tmp0L + pmaddwd mm0, [GOTOFF(ebx,PW_MF060_MF089)] ; mm0=tmp0H + pmaddwd mm3, [GOTOFF(ebx,PW_MF089_F060)] ; mm3=tmp3L + pmaddwd mm4, [GOTOFF(ebx,PW_MF089_F060)] ; mm4=tmp3H + + paddd mm2, MMWORD [wk(10)] ; mm2=tmp0L + paddd mm0, MMWORD [wk(11)] ; mm0=tmp0H + paddd mm3, mm5 ; mm3=tmp3L + paddd mm4, mm7 ; mm4=tmp3H + + movq MMWORD [wk(8)], mm2 ; wk(8)=tmp0L + movq MMWORD [wk(9)], mm0 ; wk(9)=tmp0H + + movq mm2, mm1 + movq mm0, mm1 + punpcklwd mm2, mm6 + punpckhwd mm0, mm6 + movq mm1, mm2 + movq mm6, mm0 + pmaddwd mm2, [GOTOFF(ebx,PW_MF050_MF256)] ; mm2=tmp1L + pmaddwd mm0, [GOTOFF(ebx,PW_MF050_MF256)] ; mm0=tmp1H + pmaddwd mm1, [GOTOFF(ebx,PW_MF256_F050)] ; mm1=tmp2L + pmaddwd mm6, [GOTOFF(ebx,PW_MF256_F050)] ; mm6=tmp2H + + paddd mm2, mm5 ; mm2=tmp1L + paddd mm0, mm7 ; mm0=tmp1H + paddd mm1, MMWORD [wk(10)] ; mm1=tmp2L + paddd mm6, MMWORD [wk(11)] ; mm6=tmp2H + + movq MMWORD [wk(10)], mm2 ; wk(10)=tmp1L + movq MMWORD [wk(11)], mm0 ; wk(11)=tmp1H + + ; -- Final output stage + + movq mm5, MMWORD [wk(0)] ; mm5=tmp10L + movq mm7, MMWORD [wk(1)] ; mm7=tmp10H + + movq mm2, mm5 + movq mm0, mm7 + paddd mm5, mm3 ; mm5=data0L + paddd mm7, mm4 ; mm7=data0H + psubd mm2, mm3 ; mm2=data7L + psubd mm0, mm4 ; mm0=data7H + + movq mm3, [GOTOFF(ebx,PD_DESCALE_P2)] ; mm3=[PD_DESCALE_P2] + + paddd mm5, mm3 + paddd mm7, mm3 + psrad mm5, DESCALE_P2 + psrad mm7, DESCALE_P2 + paddd mm2, mm3 + paddd mm0, mm3 + psrad mm2, DESCALE_P2 + psrad mm0, DESCALE_P2 + + packssdw mm5, mm7 ; mm5=data0=(00 10 20 30) + packssdw mm2, mm0 ; mm2=data7=(07 17 27 37) + + movq mm4, MMWORD [wk(4)] ; mm4=tmp11L + movq mm3, MMWORD [wk(5)] ; mm3=tmp11H + + movq mm7, mm4 + movq mm0, mm3 + paddd mm4, mm1 ; mm4=data1L + paddd mm3, mm6 ; mm3=data1H + psubd mm7, mm1 ; mm7=data6L + psubd mm0, mm6 ; mm0=data6H + + movq mm1, [GOTOFF(ebx,PD_DESCALE_P2)] ; mm1=[PD_DESCALE_P2] + + paddd mm4, mm1 + paddd mm3, mm1 + psrad mm4, DESCALE_P2 + psrad mm3, DESCALE_P2 + paddd mm7, mm1 + paddd mm0, mm1 + psrad mm7, DESCALE_P2 + psrad mm0, DESCALE_P2 + + packssdw mm4, mm3 ; mm4=data1=(01 11 21 31) + packssdw mm7, mm0 ; mm7=data6=(06 16 26 36) + + packsswb mm5, mm7 ; mm5=(00 10 20 30 06 16 26 36) + packsswb mm4, mm2 ; mm4=(01 11 21 31 07 17 27 37) + + movq mm6, MMWORD [wk(6)] ; mm6=tmp12L + movq mm1, MMWORD [wk(7)] ; mm1=tmp12H + movq mm3, MMWORD [wk(10)] ; mm3=tmp1L + movq mm0, MMWORD [wk(11)] ; mm0=tmp1H + + movq MMWORD [wk(0)], mm5 ; wk(0)=(00 10 20 30 06 16 26 36) + movq MMWORD [wk(1)], mm4 ; wk(1)=(01 11 21 31 07 17 27 37) + + movq mm7, mm6 + movq mm2, mm1 + paddd mm6, mm3 ; mm6=data2L + paddd mm1, mm0 ; mm1=data2H + psubd mm7, mm3 ; mm7=data5L + psubd mm2, mm0 ; mm2=data5H + + movq mm5, [GOTOFF(ebx,PD_DESCALE_P2)] ; mm5=[PD_DESCALE_P2] + + paddd mm6, mm5 + paddd mm1, mm5 + psrad mm6, DESCALE_P2 + psrad mm1, DESCALE_P2 + paddd mm7, mm5 + paddd mm2, mm5 + psrad mm7, DESCALE_P2 + psrad mm2, DESCALE_P2 + + packssdw mm6, mm1 ; mm6=data2=(02 12 22 32) + packssdw mm7, mm2 ; mm7=data5=(05 15 25 35) + + movq mm4, MMWORD [wk(2)] ; mm4=tmp13L + movq mm3, MMWORD [wk(3)] ; mm3=tmp13H + movq mm0, MMWORD [wk(8)] ; mm0=tmp0L + movq mm5, MMWORD [wk(9)] ; mm5=tmp0H + + movq mm1, mm4 + movq mm2, mm3 + paddd mm4, mm0 ; mm4=data3L + paddd mm3, mm5 ; mm3=data3H + psubd mm1, mm0 ; mm1=data4L + psubd mm2, mm5 ; mm2=data4H + + movq mm0, [GOTOFF(ebx,PD_DESCALE_P2)] ; mm0=[PD_DESCALE_P2] + + paddd mm4, mm0 + paddd mm3, mm0 + psrad mm4, DESCALE_P2 + psrad mm3, DESCALE_P2 + paddd mm1, mm0 + paddd mm2, mm0 + psrad mm1, DESCALE_P2 + psrad mm2, DESCALE_P2 + + movq mm5, [GOTOFF(ebx,PB_CENTERJSAMP)] ; mm5=[PB_CENTERJSAMP] + + packssdw mm4, mm3 ; mm4=data3=(03 13 23 33) + packssdw mm1, mm2 ; mm1=data4=(04 14 24 34) + + movq mm0, MMWORD [wk(0)] ; mm0=(00 10 20 30 06 16 26 36) + movq mm3, MMWORD [wk(1)] ; mm3=(01 11 21 31 07 17 27 37) + + packsswb mm6, mm1 ; mm6=(02 12 22 32 04 14 24 34) + packsswb mm4, mm7 ; mm4=(03 13 23 33 05 15 25 35) + + paddb mm0, mm5 + paddb mm3, mm5 + paddb mm6, mm5 + paddb mm4, mm5 + + movq mm2, mm0 ; transpose coefficients(phase 1) + punpcklbw mm0, mm3 ; mm0=(00 01 10 11 20 21 30 31) + punpckhbw mm2, mm3 ; mm2=(06 07 16 17 26 27 36 37) + movq mm1, mm6 ; transpose coefficients(phase 1) + punpcklbw mm6, mm4 ; mm6=(02 03 12 13 22 23 32 33) + punpckhbw mm1, mm4 ; mm1=(04 05 14 15 24 25 34 35) + + movq mm7, mm0 ; transpose coefficients(phase 2) + punpcklwd mm0, mm6 ; mm0=(00 01 02 03 10 11 12 13) + punpckhwd mm7, mm6 ; mm7=(20 21 22 23 30 31 32 33) + movq mm5, mm1 ; transpose coefficients(phase 2) + punpcklwd mm1, mm2 ; mm1=(04 05 06 07 14 15 16 17) + punpckhwd mm5, mm2 ; mm5=(24 25 26 27 34 35 36 37) + + movq mm3, mm0 ; transpose coefficients(phase 3) + punpckldq mm0, mm1 ; mm0=(00 01 02 03 04 05 06 07) + punpckhdq mm3, mm1 ; mm3=(10 11 12 13 14 15 16 17) + movq mm4, mm7 ; transpose coefficients(phase 3) + punpckldq mm7, mm5 ; mm7=(20 21 22 23 24 25 26 27) + punpckhdq mm4, mm5 ; mm4=(30 31 32 33 34 35 36 37) + + pushpic ebx ; save GOT address + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm3 + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm7 + movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4 + + poppic ebx ; restore GOT address + + add esi, byte 4*SIZEOF_JCOEF ; wsptr + add edi, byte 4*SIZEOF_JSAMPROW + dec ecx ; ctr + jnz near .rowloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jidctint-sse2.asm b/simd/i386/jidctint-sse2.asm new file mode 100644 index 0000000..a6bd00a --- /dev/null +++ b/simd/i386/jidctint-sse2.asm @@ -0,0 +1,860 @@ +; +; jidctint.asm - accurate integer IDCT (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; inverse DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jidctint.c; see the jidctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS - PASS1_BITS) +%define DESCALE_P2 (CONST_BITS + PASS1_BITS + 3) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_0_298 equ DESCALE( 320652955, 30 - CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276, 30 - CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887, 30 - CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413, 30 - CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111, 30 - CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813, 30 - CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267, 30 - CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350, 30 - CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_idct_islow_sse2) + +EXTN(jconst_idct_islow_sse2): + +PW_F130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541 +PW_F054_MF130 times 4 dw F_0_541, (F_0_541 - F_1_847) +PW_MF078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175 +PW_F117_F078 times 4 dw F_1_175, (F_1_175 - F_0_390) +PW_MF060_MF089 times 4 dw (F_0_298 - F_0_899), -F_0_899 +PW_MF089_F060 times 4 dw -F_0_899, (F_1_501 - F_0_899) +PW_MF050_MF256 times 4 dw (F_2_053 - F_2_562), -F_2_562 +PW_MF256_F050 times 4 dw -F_2_562, (F_3_072 - F_2_562) +PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1) +PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_islow_sse2(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b) + 8 ; jpeg_component_info *compptr +%define coef_block(b) (b) + 12 ; JCOEFPTR coef_block +%define output_buf(b) (b) + 16 ; JSAMPARRAY output_buf +%define output_col(b) (b) + 20 ; JDIMENSION output_col + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 12 + + align 32 + GLOBAL_FUNCTION(jsimd_idct_islow_sse2) + +EXTN(jsimd_idct_islow_sse2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2 + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por xmm1, xmm0 + packsswb xmm1, xmm1 + packsswb xmm1, xmm1 + movd eax, xmm1 + test eax, eax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm5, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw xmm5, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw xmm5, PASS1_BITS + + movdqa xmm4, xmm5 ; xmm5=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm5, xmm5 ; xmm5=(00 00 01 01 02 02 03 03) + punpckhwd xmm4, xmm4 ; xmm4=(04 04 05 05 06 06 07 07) + + pshufd xmm7, xmm5, 0x00 ; xmm7=col0=(00 00 00 00 00 00 00 00) + pshufd xmm6, xmm5, 0x55 ; xmm6=col1=(01 01 01 01 01 01 01 01) + pshufd xmm1, xmm5, 0xAA ; xmm1=col2=(02 02 02 02 02 02 02 02) + pshufd xmm5, xmm5, 0xFF ; xmm5=col3=(03 03 03 03 03 03 03 03) + pshufd xmm0, xmm4, 0x00 ; xmm0=col4=(04 04 04 04 04 04 04 04) + pshufd xmm3, xmm4, 0x55 ; xmm3=col5=(05 05 05 05 05 05 05 05) + pshufd xmm2, xmm4, 0xAA ; xmm2=col6=(06 06 06 06 06 06 06 06) + pshufd xmm4, xmm4, 0xFF ; xmm4=col7=(07 07 07 07 07 07 07 07) + + movdqa XMMWORD [wk(8)], xmm6 ; wk(8)=col1 + movdqa XMMWORD [wk(9)], xmm5 ; wk(9)=col3 + movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 + movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 + jmp near .column_end + alignx 16, 7 +%endif +.columnDCT: + + ; -- Even part + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movdqa xmm4, xmm1 ; xmm1=in2=z2 + movdqa xmm5, xmm1 + punpcklwd xmm4, xmm3 ; xmm3=in6=z3 + punpckhwd xmm5, xmm3 + movdqa xmm1, xmm4 + movdqa xmm3, xmm5 + pmaddwd xmm4, [GOTOFF(ebx,PW_F130_F054)] ; xmm4=tmp3L + pmaddwd xmm5, [GOTOFF(ebx,PW_F130_F054)] ; xmm5=tmp3H + pmaddwd xmm1, [GOTOFF(ebx,PW_F054_MF130)] ; xmm1=tmp2L + pmaddwd xmm3, [GOTOFF(ebx,PW_F054_MF130)] ; xmm3=tmp2H + + movdqa xmm6, xmm0 + paddw xmm0, xmm2 ; xmm0=in0+in4 + psubw xmm6, xmm2 ; xmm6=in0-in4 + + pxor xmm7, xmm7 + pxor xmm2, xmm2 + punpcklwd xmm7, xmm0 ; xmm7=tmp0L + punpckhwd xmm2, xmm0 ; xmm2=tmp0H + psrad xmm7, (16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS + psrad xmm2, (16-CONST_BITS) ; psrad xmm2,16 & pslld xmm2,CONST_BITS + + movdqa xmm0, xmm7 + paddd xmm7, xmm4 ; xmm7=tmp10L + psubd xmm0, xmm4 ; xmm0=tmp13L + movdqa xmm4, xmm2 + paddd xmm2, xmm5 ; xmm2=tmp10H + psubd xmm4, xmm5 ; xmm4=tmp13H + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=tmp10L + movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=tmp10H + movdqa XMMWORD [wk(2)], xmm0 ; wk(2)=tmp13L + movdqa XMMWORD [wk(3)], xmm4 ; wk(3)=tmp13H + + pxor xmm5, xmm5 + pxor xmm7, xmm7 + punpcklwd xmm5, xmm6 ; xmm5=tmp1L + punpckhwd xmm7, xmm6 ; xmm7=tmp1H + psrad xmm5, (16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS + psrad xmm7, (16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS + + movdqa xmm2, xmm5 + paddd xmm5, xmm1 ; xmm5=tmp11L + psubd xmm2, xmm1 ; xmm2=tmp12L + movdqa xmm0, xmm7 + paddd xmm7, xmm3 ; xmm7=tmp11H + psubd xmm0, xmm3 ; xmm0=tmp12H + + movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L + movdqa XMMWORD [wk(5)], xmm7 ; wk(5)=tmp11H + movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=tmp12L + movdqa XMMWORD [wk(7)], xmm0 ; wk(7)=tmp12H + + ; -- Odd part + + movdqa xmm4, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm6, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw xmm4, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm6, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw xmm1, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm5, xmm6 + movdqa xmm7, xmm4 + paddw xmm5, xmm3 ; xmm5=z3 + paddw xmm7, xmm1 ; xmm7=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm2, xmm5 + movdqa xmm0, xmm5 + punpcklwd xmm2, xmm7 + punpckhwd xmm0, xmm7 + movdqa xmm5, xmm2 + movdqa xmm7, xmm0 + pmaddwd xmm2, [GOTOFF(ebx,PW_MF078_F117)] ; xmm2=z3L + pmaddwd xmm0, [GOTOFF(ebx,PW_MF078_F117)] ; xmm0=z3H + pmaddwd xmm5, [GOTOFF(ebx,PW_F117_F078)] ; xmm5=z4L + pmaddwd xmm7, [GOTOFF(ebx,PW_F117_F078)] ; xmm7=z4H + + movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=z3L + movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movdqa xmm2, xmm3 + movdqa xmm0, xmm3 + punpcklwd xmm2, xmm4 + punpckhwd xmm0, xmm4 + movdqa xmm3, xmm2 + movdqa xmm4, xmm0 + pmaddwd xmm2, [GOTOFF(ebx,PW_MF060_MF089)] ; xmm2=tmp0L + pmaddwd xmm0, [GOTOFF(ebx,PW_MF060_MF089)] ; xmm0=tmp0H + pmaddwd xmm3, [GOTOFF(ebx,PW_MF089_F060)] ; xmm3=tmp3L + pmaddwd xmm4, [GOTOFF(ebx,PW_MF089_F060)] ; xmm4=tmp3H + + paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp0L + paddd xmm0, XMMWORD [wk(11)] ; xmm0=tmp0H + paddd xmm3, xmm5 ; xmm3=tmp3L + paddd xmm4, xmm7 ; xmm4=tmp3H + + movdqa XMMWORD [wk(8)], xmm2 ; wk(8)=tmp0L + movdqa XMMWORD [wk(9)], xmm0 ; wk(9)=tmp0H + + movdqa xmm2, xmm1 + movdqa xmm0, xmm1 + punpcklwd xmm2, xmm6 + punpckhwd xmm0, xmm6 + movdqa xmm1, xmm2 + movdqa xmm6, xmm0 + pmaddwd xmm2, [GOTOFF(ebx,PW_MF050_MF256)] ; xmm2=tmp1L + pmaddwd xmm0, [GOTOFF(ebx,PW_MF050_MF256)] ; xmm0=tmp1H + pmaddwd xmm1, [GOTOFF(ebx,PW_MF256_F050)] ; xmm1=tmp2L + pmaddwd xmm6, [GOTOFF(ebx,PW_MF256_F050)] ; xmm6=tmp2H + + paddd xmm2, xmm5 ; xmm2=tmp1L + paddd xmm0, xmm7 ; xmm0=tmp1H + paddd xmm1, XMMWORD [wk(10)] ; xmm1=tmp2L + paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H + + movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=tmp1L + movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=tmp1H + + ; -- Final output stage + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L + movdqa xmm7, XMMWORD [wk(1)] ; xmm7=tmp10H + + movdqa xmm2, xmm5 + movdqa xmm0, xmm7 + paddd xmm5, xmm3 ; xmm5=data0L + paddd xmm7, xmm4 ; xmm7=data0H + psubd xmm2, xmm3 ; xmm2=data7L + psubd xmm0, xmm4 ; xmm0=data7H + + movdqa xmm3, [GOTOFF(ebx,PD_DESCALE_P1)] ; xmm3=[PD_DESCALE_P1] + + paddd xmm5, xmm3 + paddd xmm7, xmm3 + psrad xmm5, DESCALE_P1 + psrad xmm7, DESCALE_P1 + paddd xmm2, xmm3 + paddd xmm0, xmm3 + psrad xmm2, DESCALE_P1 + psrad xmm0, DESCALE_P1 + + packssdw xmm5, xmm7 ; xmm5=data0=(00 01 02 03 04 05 06 07) + packssdw xmm2, xmm0 ; xmm2=data7=(70 71 72 73 74 75 76 77) + + movdqa xmm4, XMMWORD [wk(4)] ; xmm4=tmp11L + movdqa xmm3, XMMWORD [wk(5)] ; xmm3=tmp11H + + movdqa xmm7, xmm4 + movdqa xmm0, xmm3 + paddd xmm4, xmm1 ; xmm4=data1L + paddd xmm3, xmm6 ; xmm3=data1H + psubd xmm7, xmm1 ; xmm7=data6L + psubd xmm0, xmm6 ; xmm0=data6H + + movdqa xmm1, [GOTOFF(ebx,PD_DESCALE_P1)] ; xmm1=[PD_DESCALE_P1] + + paddd xmm4, xmm1 + paddd xmm3, xmm1 + psrad xmm4, DESCALE_P1 + psrad xmm3, DESCALE_P1 + paddd xmm7, xmm1 + paddd xmm0, xmm1 + psrad xmm7, DESCALE_P1 + psrad xmm0, DESCALE_P1 + + packssdw xmm4, xmm3 ; xmm4=data1=(10 11 12 13 14 15 16 17) + packssdw xmm7, xmm0 ; xmm7=data6=(60 61 62 63 64 65 66 67) + + movdqa xmm6, xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5, xmm4 ; xmm5=(00 10 01 11 02 12 03 13) + punpckhwd xmm6, xmm4 ; xmm6=(04 14 05 15 06 16 07 17) + movdqa xmm1, xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7, xmm2 ; xmm7=(60 70 61 71 62 72 63 73) + punpckhwd xmm1, xmm2 ; xmm1=(64 74 65 75 66 76 67 77) + + movdqa xmm3, XMMWORD [wk(6)] ; xmm3=tmp12L + movdqa xmm0, XMMWORD [wk(7)] ; xmm0=tmp12H + movdqa xmm4, XMMWORD [wk(10)] ; xmm4=tmp1L + movdqa xmm2, XMMWORD [wk(11)] ; xmm2=tmp1H + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 01 11 02 12 03 13) + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=(04 14 05 15 06 16 07 17) + movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=(60 70 61 71 62 72 63 73) + movdqa XMMWORD [wk(5)], xmm1 ; wk(5)=(64 74 65 75 66 76 67 77) + + movdqa xmm5, xmm3 + movdqa xmm6, xmm0 + paddd xmm3, xmm4 ; xmm3=data2L + paddd xmm0, xmm2 ; xmm0=data2H + psubd xmm5, xmm4 ; xmm5=data5L + psubd xmm6, xmm2 ; xmm6=data5H + + movdqa xmm7, [GOTOFF(ebx,PD_DESCALE_P1)] ; xmm7=[PD_DESCALE_P1] + + paddd xmm3, xmm7 + paddd xmm0, xmm7 + psrad xmm3, DESCALE_P1 + psrad xmm0, DESCALE_P1 + paddd xmm5, xmm7 + paddd xmm6, xmm7 + psrad xmm5, DESCALE_P1 + psrad xmm6, DESCALE_P1 + + packssdw xmm3, xmm0 ; xmm3=data2=(20 21 22 23 24 25 26 27) + packssdw xmm5, xmm6 ; xmm5=data5=(50 51 52 53 54 55 56 57) + + movdqa xmm1, XMMWORD [wk(2)] ; xmm1=tmp13L + movdqa xmm4, XMMWORD [wk(3)] ; xmm4=tmp13H + movdqa xmm2, XMMWORD [wk(8)] ; xmm2=tmp0L + movdqa xmm7, XMMWORD [wk(9)] ; xmm7=tmp0H + + movdqa xmm0, xmm1 + movdqa xmm6, xmm4 + paddd xmm1, xmm2 ; xmm1=data3L + paddd xmm4, xmm7 ; xmm4=data3H + psubd xmm0, xmm2 ; xmm0=data4L + psubd xmm6, xmm7 ; xmm6=data4H + + movdqa xmm2, [GOTOFF(ebx,PD_DESCALE_P1)] ; xmm2=[PD_DESCALE_P1] + + paddd xmm1, xmm2 + paddd xmm4, xmm2 + psrad xmm1, DESCALE_P1 + psrad xmm4, DESCALE_P1 + paddd xmm0, xmm2 + paddd xmm6, xmm2 + psrad xmm0, DESCALE_P1 + psrad xmm6, DESCALE_P1 + + packssdw xmm1, xmm4 ; xmm1=data3=(30 31 32 33 34 35 36 37) + packssdw xmm0, xmm6 ; xmm0=data4=(40 41 42 43 44 45 46 47) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 01 11 02 12 03 13) + movdqa xmm2, XMMWORD [wk(1)] ; xmm2=(04 14 05 15 06 16 07 17) + + movdqa xmm4, xmm3 ; transpose coefficients(phase 1) + punpcklwd xmm3, xmm1 ; xmm3=(20 30 21 31 22 32 23 33) + punpckhwd xmm4, xmm1 ; xmm4=(24 34 25 35 26 36 27 37) + movdqa xmm6, xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0, xmm5 ; xmm0=(40 50 41 51 42 52 43 53) + punpckhwd xmm6, xmm5 ; xmm6=(44 54 45 55 46 56 47 57) + + movdqa xmm1, xmm7 ; transpose coefficients(phase 2) + punpckldq xmm7, xmm3 ; xmm7=(00 10 20 30 01 11 21 31) + punpckhdq xmm1, xmm3 ; xmm1=(02 12 22 32 03 13 23 33) + movdqa xmm5, xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2, xmm4 ; xmm2=(04 14 24 34 05 15 25 35) + punpckhdq xmm5, xmm4 ; xmm5=(06 16 26 36 07 17 27 37) + + movdqa xmm3, XMMWORD [wk(4)] ; xmm3=(60 70 61 71 62 72 63 73) + movdqa xmm4, XMMWORD [wk(5)] ; xmm4=(64 74 65 75 66 76 67 77) + + movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=(04 14 24 34 05 15 25 35) + movdqa XMMWORD [wk(7)], xmm5 ; wk(7)=(06 16 26 36 07 17 27 37) + + movdqa xmm2, xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0, xmm3 ; xmm0=(40 50 60 70 41 51 61 71) + punpckhdq xmm2, xmm3 ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5, xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6, xmm4 ; xmm6=(44 54 64 74 45 55 65 75) + punpckhdq xmm5, xmm4 ; xmm5=(46 56 66 76 47 57 67 77) + + movdqa xmm3, xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7, xmm0 ; xmm7=col0=(00 10 20 30 40 50 60 70) + punpckhqdq xmm3, xmm0 ; xmm3=col1=(01 11 21 31 41 51 61 71) + movdqa xmm4, xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1, xmm2 ; xmm1=col2=(02 12 22 32 42 52 62 72) + punpckhqdq xmm4, xmm2 ; xmm4=col3=(03 13 23 33 43 53 63 73) + + movdqa xmm0, XMMWORD [wk(6)] ; xmm0=(04 14 24 34 05 15 25 35) + movdqa xmm2, XMMWORD [wk(7)] ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa XMMWORD [wk(8)], xmm3 ; wk(8)=col1 + movdqa XMMWORD [wk(9)], xmm4 ; wk(9)=col3 + + movdqa xmm3, xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0, xmm6 ; xmm0=col4=(04 14 24 34 44 54 64 74) + punpckhqdq xmm3, xmm6 ; xmm3=col5=(05 15 25 35 45 55 65 75) + movdqa xmm4, xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2, xmm5 ; xmm2=col6=(06 16 26 36 46 56 66 76) + punpckhqdq xmm4, xmm5 ; xmm4=col7=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 + movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + ; -- Even part + + ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6 + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movdqa xmm6, xmm1 ; xmm1=in2=z2 + movdqa xmm5, xmm1 + punpcklwd xmm6, xmm2 ; xmm2=in6=z3 + punpckhwd xmm5, xmm2 + movdqa xmm1, xmm6 + movdqa xmm2, xmm5 + pmaddwd xmm6, [GOTOFF(ebx,PW_F130_F054)] ; xmm6=tmp3L + pmaddwd xmm5, [GOTOFF(ebx,PW_F130_F054)] ; xmm5=tmp3H + pmaddwd xmm1, [GOTOFF(ebx,PW_F054_MF130)] ; xmm1=tmp2L + pmaddwd xmm2, [GOTOFF(ebx,PW_F054_MF130)] ; xmm2=tmp2H + + movdqa xmm3, xmm7 + paddw xmm7, xmm0 ; xmm7=in0+in4 + psubw xmm3, xmm0 ; xmm3=in0-in4 + + pxor xmm4, xmm4 + pxor xmm0, xmm0 + punpcklwd xmm4, xmm7 ; xmm4=tmp0L + punpckhwd xmm0, xmm7 ; xmm0=tmp0H + psrad xmm4, (16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS + psrad xmm0, (16-CONST_BITS) ; psrad xmm0,16 & pslld xmm0,CONST_BITS + + movdqa xmm7, xmm4 + paddd xmm4, xmm6 ; xmm4=tmp10L + psubd xmm7, xmm6 ; xmm7=tmp13L + movdqa xmm6, xmm0 + paddd xmm0, xmm5 ; xmm0=tmp10H + psubd xmm6, xmm5 ; xmm6=tmp13H + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=tmp10L + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp10H + movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=tmp13L + movdqa XMMWORD [wk(3)], xmm6 ; wk(3)=tmp13H + + pxor xmm5, xmm5 + pxor xmm4, xmm4 + punpcklwd xmm5, xmm3 ; xmm5=tmp1L + punpckhwd xmm4, xmm3 ; xmm4=tmp1H + psrad xmm5, (16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS + psrad xmm4, (16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS + + movdqa xmm0, xmm5 + paddd xmm5, xmm1 ; xmm5=tmp11L + psubd xmm0, xmm1 ; xmm0=tmp12L + movdqa xmm7, xmm4 + paddd xmm4, xmm2 ; xmm4=tmp11H + psubd xmm7, xmm2 ; xmm7=tmp12H + + movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L + movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=tmp11H + movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=tmp12L + movdqa XMMWORD [wk(7)], xmm7 ; wk(7)=tmp12H + + ; -- Odd part + + movdqa xmm6, XMMWORD [wk(9)] ; xmm6=col3 + movdqa xmm3, XMMWORD [wk(8)] ; xmm3=col1 + movdqa xmm1, XMMWORD [wk(11)] ; xmm1=col7 + movdqa xmm2, XMMWORD [wk(10)] ; xmm2=col5 + + movdqa xmm5, xmm6 + movdqa xmm4, xmm3 + paddw xmm5, xmm1 ; xmm5=z3 + paddw xmm4, xmm2 ; xmm4=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm0, xmm5 + movdqa xmm7, xmm5 + punpcklwd xmm0, xmm4 + punpckhwd xmm7, xmm4 + movdqa xmm5, xmm0 + movdqa xmm4, xmm7 + pmaddwd xmm0, [GOTOFF(ebx,PW_MF078_F117)] ; xmm0=z3L + pmaddwd xmm7, [GOTOFF(ebx,PW_MF078_F117)] ; xmm7=z3H + pmaddwd xmm5, [GOTOFF(ebx,PW_F117_F078)] ; xmm5=z4L + pmaddwd xmm4, [GOTOFF(ebx,PW_F117_F078)] ; xmm4=z4H + + movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=z3L + movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movdqa xmm0, xmm1 + movdqa xmm7, xmm1 + punpcklwd xmm0, xmm3 + punpckhwd xmm7, xmm3 + movdqa xmm1, xmm0 + movdqa xmm3, xmm7 + pmaddwd xmm0, [GOTOFF(ebx,PW_MF060_MF089)] ; xmm0=tmp0L + pmaddwd xmm7, [GOTOFF(ebx,PW_MF060_MF089)] ; xmm7=tmp0H + pmaddwd xmm1, [GOTOFF(ebx,PW_MF089_F060)] ; xmm1=tmp3L + pmaddwd xmm3, [GOTOFF(ebx,PW_MF089_F060)] ; xmm3=tmp3H + + paddd xmm0, XMMWORD [wk(10)] ; xmm0=tmp0L + paddd xmm7, XMMWORD [wk(11)] ; xmm7=tmp0H + paddd xmm1, xmm5 ; xmm1=tmp3L + paddd xmm3, xmm4 ; xmm3=tmp3H + + movdqa XMMWORD [wk(8)], xmm0 ; wk(8)=tmp0L + movdqa XMMWORD [wk(9)], xmm7 ; wk(9)=tmp0H + + movdqa xmm0, xmm2 + movdqa xmm7, xmm2 + punpcklwd xmm0, xmm6 + punpckhwd xmm7, xmm6 + movdqa xmm2, xmm0 + movdqa xmm6, xmm7 + pmaddwd xmm0, [GOTOFF(ebx,PW_MF050_MF256)] ; xmm0=tmp1L + pmaddwd xmm7, [GOTOFF(ebx,PW_MF050_MF256)] ; xmm7=tmp1H + pmaddwd xmm2, [GOTOFF(ebx,PW_MF256_F050)] ; xmm2=tmp2L + pmaddwd xmm6, [GOTOFF(ebx,PW_MF256_F050)] ; xmm6=tmp2H + + paddd xmm0, xmm5 ; xmm0=tmp1L + paddd xmm7, xmm4 ; xmm7=tmp1H + paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp2L + paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H + + movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=tmp1L + movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=tmp1H + + ; -- Final output stage + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L + movdqa xmm4, XMMWORD [wk(1)] ; xmm4=tmp10H + + movdqa xmm0, xmm5 + movdqa xmm7, xmm4 + paddd xmm5, xmm1 ; xmm5=data0L + paddd xmm4, xmm3 ; xmm4=data0H + psubd xmm0, xmm1 ; xmm0=data7L + psubd xmm7, xmm3 ; xmm7=data7H + + movdqa xmm1, [GOTOFF(ebx,PD_DESCALE_P2)] ; xmm1=[PD_DESCALE_P2] + + paddd xmm5, xmm1 + paddd xmm4, xmm1 + psrad xmm5, DESCALE_P2 + psrad xmm4, DESCALE_P2 + paddd xmm0, xmm1 + paddd xmm7, xmm1 + psrad xmm0, DESCALE_P2 + psrad xmm7, DESCALE_P2 + + packssdw xmm5, xmm4 ; xmm5=data0=(00 10 20 30 40 50 60 70) + packssdw xmm0, xmm7 ; xmm0=data7=(07 17 27 37 47 57 67 77) + + movdqa xmm3, XMMWORD [wk(4)] ; xmm3=tmp11L + movdqa xmm1, XMMWORD [wk(5)] ; xmm1=tmp11H + + movdqa xmm4, xmm3 + movdqa xmm7, xmm1 + paddd xmm3, xmm2 ; xmm3=data1L + paddd xmm1, xmm6 ; xmm1=data1H + psubd xmm4, xmm2 ; xmm4=data6L + psubd xmm7, xmm6 ; xmm7=data6H + + movdqa xmm2, [GOTOFF(ebx,PD_DESCALE_P2)] ; xmm2=[PD_DESCALE_P2] + + paddd xmm3, xmm2 + paddd xmm1, xmm2 + psrad xmm3, DESCALE_P2 + psrad xmm1, DESCALE_P2 + paddd xmm4, xmm2 + paddd xmm7, xmm2 + psrad xmm4, DESCALE_P2 + psrad xmm7, DESCALE_P2 + + packssdw xmm3, xmm1 ; xmm3=data1=(01 11 21 31 41 51 61 71) + packssdw xmm4, xmm7 ; xmm4=data6=(06 16 26 36 46 56 66 76) + + packsswb xmm5, xmm4 ; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + packsswb xmm3, xmm0 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm6, XMMWORD [wk(6)] ; xmm6=tmp12L + movdqa xmm2, XMMWORD [wk(7)] ; xmm2=tmp12H + movdqa xmm1, XMMWORD [wk(10)] ; xmm1=tmp1L + movdqa xmm7, XMMWORD [wk(11)] ; xmm7=tmp1H + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm4, xmm6 + movdqa xmm0, xmm2 + paddd xmm6, xmm1 ; xmm6=data2L + paddd xmm2, xmm7 ; xmm2=data2H + psubd xmm4, xmm1 ; xmm4=data5L + psubd xmm0, xmm7 ; xmm0=data5H + + movdqa xmm5, [GOTOFF(ebx,PD_DESCALE_P2)] ; xmm5=[PD_DESCALE_P2] + + paddd xmm6, xmm5 + paddd xmm2, xmm5 + psrad xmm6, DESCALE_P2 + psrad xmm2, DESCALE_P2 + paddd xmm4, xmm5 + paddd xmm0, xmm5 + psrad xmm4, DESCALE_P2 + psrad xmm0, DESCALE_P2 + + packssdw xmm6, xmm2 ; xmm6=data2=(02 12 22 32 42 52 62 72) + packssdw xmm4, xmm0 ; xmm4=data5=(05 15 25 35 45 55 65 75) + + movdqa xmm3, XMMWORD [wk(2)] ; xmm3=tmp13L + movdqa xmm1, XMMWORD [wk(3)] ; xmm1=tmp13H + movdqa xmm7, XMMWORD [wk(8)] ; xmm7=tmp0L + movdqa xmm5, XMMWORD [wk(9)] ; xmm5=tmp0H + + movdqa xmm2, xmm3 + movdqa xmm0, xmm1 + paddd xmm3, xmm7 ; xmm3=data3L + paddd xmm1, xmm5 ; xmm1=data3H + psubd xmm2, xmm7 ; xmm2=data4L + psubd xmm0, xmm5 ; xmm0=data4H + + movdqa xmm7, [GOTOFF(ebx,PD_DESCALE_P2)] ; xmm7=[PD_DESCALE_P2] + + paddd xmm3, xmm7 + paddd xmm1, xmm7 + psrad xmm3, DESCALE_P2 + psrad xmm1, DESCALE_P2 + paddd xmm2, xmm7 + paddd xmm0, xmm7 + psrad xmm2, DESCALE_P2 + psrad xmm0, DESCALE_P2 + + movdqa xmm5, [GOTOFF(ebx,PB_CENTERJSAMP)] ; xmm5=[PB_CENTERJSAMP] + + packssdw xmm3, xmm1 ; xmm3=data3=(03 13 23 33 43 53 63 73) + packssdw xmm2, xmm0 ; xmm2=data4=(04 14 24 34 44 54 64 74) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + packsswb xmm6, xmm2 ; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) + packsswb xmm3, xmm4 ; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) + + paddb xmm7, xmm5 + paddb xmm1, xmm5 + paddb xmm6, xmm5 + paddb xmm3, xmm5 + + movdqa xmm0, xmm7 ; transpose coefficients(phase 1) + punpcklbw xmm7, xmm1 ; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) + punpckhbw xmm0, xmm1 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) + movdqa xmm2, xmm6 ; transpose coefficients(phase 1) + punpcklbw xmm6, xmm3 ; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) + punpckhbw xmm2, xmm3 ; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) + + movdqa xmm4, xmm7 ; transpose coefficients(phase 2) + punpcklwd xmm7, xmm6 ; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4, xmm6 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) + movdqa xmm5, xmm2 ; transpose coefficients(phase 2) + punpcklwd xmm2, xmm0 ; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + punpckhwd xmm5, xmm0 ; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) + + movdqa xmm1, xmm7 ; transpose coefficients(phase 3) + punpckldq xmm7, xmm2 ; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm1, xmm2 ; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + movdqa xmm3, xmm4 ; transpose coefficients(phase 3) + punpckldq xmm4, xmm5 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + punpckhdq xmm3, xmm5 ; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + pshufd xmm6, xmm7, 0x4E ; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm0, xmm1, 0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + pshufd xmm2, xmm4, 0x4E ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) + pshufd xmm5, xmm3, 0x4E ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm7 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm1 + mov edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3 + + mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0 + mov edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW] + movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm2 + movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm5 + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jidctred-mmx.asm b/simd/i386/jidctred-mmx.asm new file mode 100644 index 0000000..336ee3b --- /dev/null +++ b/simd/i386/jidctred-mmx.asm @@ -0,0 +1,706 @@ +; +; jidctred.asm - reduced-size IDCT (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains inverse-DCT routines that produce reduced-size +; output: either 4x4 or 2x2 pixels from an 8x8 DCT block. +; The following code is based directly on the IJG's original jidctred.c; +; see the jidctred.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1_4 (CONST_BITS - PASS1_BITS + 1) +%define DESCALE_P2_4 (CONST_BITS + PASS1_BITS + 3 + 1) +%define DESCALE_P1_2 (CONST_BITS - PASS1_BITS + 2) +%define DESCALE_P2_2 (CONST_BITS + PASS1_BITS + 3 + 2) + +%if CONST_BITS == 13 +F_0_211 equ 1730 ; FIX(0.211164243) +F_0_509 equ 4176 ; FIX(0.509795579) +F_0_601 equ 4926 ; FIX(0.601344887) +F_0_720 equ 5906 ; FIX(0.720959822) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_850 equ 6967 ; FIX(0.850430095) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_061 equ 8697 ; FIX(1.061594337) +F_1_272 equ 10426 ; FIX(1.272758580) +F_1_451 equ 11893 ; FIX(1.451774981) +F_1_847 equ 15137 ; FIX(1.847759065) +F_2_172 equ 17799 ; FIX(2.172734803) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_624 equ 29692 ; FIX(3.624509785) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_0_211 equ DESCALE( 226735879, 30 - CONST_BITS) ; FIX(0.211164243) +F_0_509 equ DESCALE( 547388834, 30 - CONST_BITS) ; FIX(0.509795579) +F_0_601 equ DESCALE( 645689155, 30 - CONST_BITS) ; FIX(0.601344887) +F_0_720 equ DESCALE( 774124714, 30 - CONST_BITS) ; FIX(0.720959822) +F_0_765 equ DESCALE( 821806413, 30 - CONST_BITS) ; FIX(0.765366865) +F_0_850 equ DESCALE( 913142361, 30 - CONST_BITS) ; FIX(0.850430095) +F_0_899 equ DESCALE( 966342111, 30 - CONST_BITS) ; FIX(0.899976223) +F_1_061 equ DESCALE(1139878239, 30 - CONST_BITS) ; FIX(1.061594337) +F_1_272 equ DESCALE(1366614119, 30 - CONST_BITS) ; FIX(1.272758580) +F_1_451 equ DESCALE(1558831516, 30 - CONST_BITS) ; FIX(1.451774981) +F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065) +F_2_172 equ DESCALE(2332956230, 30 - CONST_BITS) ; FIX(2.172734803) +F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447) +F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS) ; FIX(3.624509785) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_idct_red_mmx) + +EXTN(jconst_idct_red_mmx): + +PW_F184_MF076 times 2 dw F_1_847, -F_0_765 +PW_F256_F089 times 2 dw F_2_562, F_0_899 +PW_F106_MF217 times 2 dw F_1_061, -F_2_172 +PW_MF060_MF050 times 2 dw -F_0_601, -F_0_509 +PW_F145_MF021 times 2 dw F_1_451, -F_0_211 +PW_F362_MF127 times 2 dw F_3_624, -F_1_272 +PW_F085_MF072 times 2 dw F_0_850, -F_0_720 +PD_DESCALE_P1_4 times 2 dd 1 << (DESCALE_P1_4 - 1) +PD_DESCALE_P2_4 times 2 dd 1 << (DESCALE_P2_4 - 1) +PD_DESCALE_P1_2 times 2 dd 1 << (DESCALE_P1_2 - 1) +PD_DESCALE_P2_2 times 2 dd 1 << (DESCALE_P2_2 - 1) +PB_CENTERJSAMP times 8 db CENTERJSAMPLE + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 4x4 output block. +; +; GLOBAL(void) +; jsimd_idct_4x4_mmx(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b) + 8 ; void *dct_table +%define coef_block(b) (b) + 12 ; JCOEFPTR coef_block +%define output_buf(b) (b) + 16 ; JSAMPARRAY output_buf +%define output_col(b) (b) + 20 ; JDIMENSION output_col + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_MMWORD + ; mmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0) - DCTSIZE2 * SIZEOF_JCOEF + ; JCOEF workspace[DCTSIZE2] + + align 32 + GLOBAL_FUNCTION(jsimd_idct_4x4_mmx) + +EXTN(jsimd_idct_4x4_mmx): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [workspace] + pushpic ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input, store into work array. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + lea edi, [workspace] ; JCOEF *wsptr + mov ecx, DCTSIZE/4 ; ctr + alignx 16, 7 +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_4X4_MMX + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por mm0, mm1 + packsswb mm0, mm0 + movd eax, mm0 + test eax, eax + jnz short .columnDCT + + ; -- AC terms all zero + + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw mm0, PASS1_BITS + + movq mm2, mm0 ; mm0=in0=(00 01 02 03) + punpcklwd mm0, mm0 ; mm0=(00 00 01 01) + punpckhwd mm2, mm2 ; mm2=(02 02 03 03) + + movq mm1, mm0 + punpckldq mm0, mm0 ; mm0=(00 00 00 00) + punpckhdq mm1, mm1 ; mm1=(01 01 01 01) + movq mm3, mm2 + punpckldq mm2, mm2 ; mm2=(02 02 02 02) + punpckhdq mm3, mm3 ; mm3=(03 03 03 03) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 + jmp near .nextcolumn + alignx 16, 7 +%endif +.columnDCT: + + ; -- Odd part + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movq mm4, mm0 + movq mm5, mm0 + punpcklwd mm4, mm1 + punpckhwd mm5, mm1 + movq mm0, mm4 + movq mm1, mm5 + pmaddwd mm4, [GOTOFF(ebx,PW_F256_F089)] ; mm4=(tmp2L) + pmaddwd mm5, [GOTOFF(ebx,PW_F256_F089)] ; mm5=(tmp2H) + pmaddwd mm0, [GOTOFF(ebx,PW_F106_MF217)] ; mm0=(tmp0L) + pmaddwd mm1, [GOTOFF(ebx,PW_F106_MF217)] ; mm1=(tmp0H) + + movq mm6, mm2 + movq mm7, mm2 + punpcklwd mm6, mm3 + punpckhwd mm7, mm3 + movq mm2, mm6 + movq mm3, mm7 + pmaddwd mm6, [GOTOFF(ebx,PW_MF060_MF050)] ; mm6=(tmp2L) + pmaddwd mm7, [GOTOFF(ebx,PW_MF060_MF050)] ; mm7=(tmp2H) + pmaddwd mm2, [GOTOFF(ebx,PW_F145_MF021)] ; mm2=(tmp0L) + pmaddwd mm3, [GOTOFF(ebx,PW_F145_MF021)] ; mm3=(tmp0H) + + paddd mm6, mm4 ; mm6=tmp2L + paddd mm7, mm5 ; mm7=tmp2H + paddd mm2, mm0 ; mm2=tmp0L + paddd mm3, mm1 ; mm3=tmp0H + + movq MMWORD [wk(0)], mm2 ; wk(0)=tmp0L + movq MMWORD [wk(1)], mm3 ; wk(1)=tmp0H + + ; -- Even part + + movq mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw mm4, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm5, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm0, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + pxor mm1, mm1 + pxor mm2, mm2 + punpcklwd mm1, mm4 ; mm1=tmp0L + punpckhwd mm2, mm4 ; mm2=tmp0H + psrad mm1, (16-CONST_BITS-1) ; psrad mm1,16 & pslld mm1,CONST_BITS+1 + psrad mm2, (16-CONST_BITS-1) ; psrad mm2,16 & pslld mm2,CONST_BITS+1 + + movq mm3, mm5 ; mm5=in2=z2 + punpcklwd mm5, mm0 ; mm0=in6=z3 + punpckhwd mm3, mm0 + pmaddwd mm5, [GOTOFF(ebx,PW_F184_MF076)] ; mm5=tmp2L + pmaddwd mm3, [GOTOFF(ebx,PW_F184_MF076)] ; mm3=tmp2H + + movq mm4, mm1 + movq mm0, mm2 + paddd mm1, mm5 ; mm1=tmp10L + paddd mm2, mm3 ; mm2=tmp10H + psubd mm4, mm5 ; mm4=tmp12L + psubd mm0, mm3 ; mm0=tmp12H + + ; -- Final output stage + + movq mm5, mm1 + movq mm3, mm2 + paddd mm1, mm6 ; mm1=data0L + paddd mm2, mm7 ; mm2=data0H + psubd mm5, mm6 ; mm5=data3L + psubd mm3, mm7 ; mm3=data3H + + movq mm6, [GOTOFF(ebx,PD_DESCALE_P1_4)] ; mm6=[PD_DESCALE_P1_4] + + paddd mm1, mm6 + paddd mm2, mm6 + psrad mm1, DESCALE_P1_4 + psrad mm2, DESCALE_P1_4 + paddd mm5, mm6 + paddd mm3, mm6 + psrad mm5, DESCALE_P1_4 + psrad mm3, DESCALE_P1_4 + + packssdw mm1, mm2 ; mm1=data0=(00 01 02 03) + packssdw mm5, mm3 ; mm5=data3=(30 31 32 33) + + movq mm7, MMWORD [wk(0)] ; mm7=tmp0L + movq mm6, MMWORD [wk(1)] ; mm6=tmp0H + + movq mm2, mm4 + movq mm3, mm0 + paddd mm4, mm7 ; mm4=data1L + paddd mm0, mm6 ; mm0=data1H + psubd mm2, mm7 ; mm2=data2L + psubd mm3, mm6 ; mm3=data2H + + movq mm7, [GOTOFF(ebx,PD_DESCALE_P1_4)] ; mm7=[PD_DESCALE_P1_4] + + paddd mm4, mm7 + paddd mm0, mm7 + psrad mm4, DESCALE_P1_4 + psrad mm0, DESCALE_P1_4 + paddd mm2, mm7 + paddd mm3, mm7 + psrad mm2, DESCALE_P1_4 + psrad mm3, DESCALE_P1_4 + + packssdw mm4, mm0 ; mm4=data1=(10 11 12 13) + packssdw mm2, mm3 ; mm2=data2=(20 21 22 23) + + movq mm6, mm1 ; transpose coefficients(phase 1) + punpcklwd mm1, mm4 ; mm1=(00 10 01 11) + punpckhwd mm6, mm4 ; mm6=(02 12 03 13) + movq mm7, mm2 ; transpose coefficients(phase 1) + punpcklwd mm2, mm5 ; mm2=(20 30 21 31) + punpckhwd mm7, mm5 ; mm7=(22 32 23 33) + + movq mm0, mm1 ; transpose coefficients(phase 2) + punpckldq mm1, mm2 ; mm1=(00 10 20 30) + punpckhdq mm0, mm2 ; mm0=(01 11 21 31) + movq mm3, mm6 ; transpose coefficients(phase 2) + punpckldq mm6, mm7 ; mm6=(02 12 22 32) + punpckhdq mm3, mm7 ; mm3=(03 13 23 33) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 + +.nextcolumn: + add esi, byte 4*SIZEOF_JCOEF ; coef_block + add edx, byte 4*SIZEOF_ISLOW_MULT_TYPE ; quantptr + add edi, byte 4*DCTSIZE*SIZEOF_JCOEF ; wsptr + dec ecx ; ctr + jnz near .columnloop + + ; ---- Pass 2: process rows from work array, store into output array. + + mov eax, [original_ebp] + lea esi, [workspace] ; JCOEF *wsptr + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + ; -- Odd part + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + + movq mm4, mm0 + movq mm5, mm0 + punpcklwd mm4, mm1 + punpckhwd mm5, mm1 + movq mm0, mm4 + movq mm1, mm5 + pmaddwd mm4, [GOTOFF(ebx,PW_F256_F089)] ; mm4=(tmp2L) + pmaddwd mm5, [GOTOFF(ebx,PW_F256_F089)] ; mm5=(tmp2H) + pmaddwd mm0, [GOTOFF(ebx,PW_F106_MF217)] ; mm0=(tmp0L) + pmaddwd mm1, [GOTOFF(ebx,PW_F106_MF217)] ; mm1=(tmp0H) + + movq mm6, mm2 + movq mm7, mm2 + punpcklwd mm6, mm3 + punpckhwd mm7, mm3 + movq mm2, mm6 + movq mm3, mm7 + pmaddwd mm6, [GOTOFF(ebx,PW_MF060_MF050)] ; mm6=(tmp2L) + pmaddwd mm7, [GOTOFF(ebx,PW_MF060_MF050)] ; mm7=(tmp2H) + pmaddwd mm2, [GOTOFF(ebx,PW_F145_MF021)] ; mm2=(tmp0L) + pmaddwd mm3, [GOTOFF(ebx,PW_F145_MF021)] ; mm3=(tmp0H) + + paddd mm6, mm4 ; mm6=tmp2L + paddd mm7, mm5 ; mm7=tmp2H + paddd mm2, mm0 ; mm2=tmp0L + paddd mm3, mm1 ; mm3=tmp0H + + movq MMWORD [wk(0)], mm2 ; wk(0)=tmp0L + movq MMWORD [wk(1)], mm3 ; wk(1)=tmp0H + + ; -- Even part + + movq mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movq mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] + + pxor mm1, mm1 + pxor mm2, mm2 + punpcklwd mm1, mm4 ; mm1=tmp0L + punpckhwd mm2, mm4 ; mm2=tmp0H + psrad mm1, (16-CONST_BITS-1) ; psrad mm1,16 & pslld mm1,CONST_BITS+1 + psrad mm2, (16-CONST_BITS-1) ; psrad mm2,16 & pslld mm2,CONST_BITS+1 + + movq mm3, mm5 ; mm5=in2=z2 + punpcklwd mm5, mm0 ; mm0=in6=z3 + punpckhwd mm3, mm0 + pmaddwd mm5, [GOTOFF(ebx,PW_F184_MF076)] ; mm5=tmp2L + pmaddwd mm3, [GOTOFF(ebx,PW_F184_MF076)] ; mm3=tmp2H + + movq mm4, mm1 + movq mm0, mm2 + paddd mm1, mm5 ; mm1=tmp10L + paddd mm2, mm3 ; mm2=tmp10H + psubd mm4, mm5 ; mm4=tmp12L + psubd mm0, mm3 ; mm0=tmp12H + + ; -- Final output stage + + movq mm5, mm1 + movq mm3, mm2 + paddd mm1, mm6 ; mm1=data0L + paddd mm2, mm7 ; mm2=data0H + psubd mm5, mm6 ; mm5=data3L + psubd mm3, mm7 ; mm3=data3H + + movq mm6, [GOTOFF(ebx,PD_DESCALE_P2_4)] ; mm6=[PD_DESCALE_P2_4] + + paddd mm1, mm6 + paddd mm2, mm6 + psrad mm1, DESCALE_P2_4 + psrad mm2, DESCALE_P2_4 + paddd mm5, mm6 + paddd mm3, mm6 + psrad mm5, DESCALE_P2_4 + psrad mm3, DESCALE_P2_4 + + packssdw mm1, mm2 ; mm1=data0=(00 10 20 30) + packssdw mm5, mm3 ; mm5=data3=(03 13 23 33) + + movq mm7, MMWORD [wk(0)] ; mm7=tmp0L + movq mm6, MMWORD [wk(1)] ; mm6=tmp0H + + movq mm2, mm4 + movq mm3, mm0 + paddd mm4, mm7 ; mm4=data1L + paddd mm0, mm6 ; mm0=data1H + psubd mm2, mm7 ; mm2=data2L + psubd mm3, mm6 ; mm3=data2H + + movq mm7, [GOTOFF(ebx,PD_DESCALE_P2_4)] ; mm7=[PD_DESCALE_P2_4] + + paddd mm4, mm7 + paddd mm0, mm7 + psrad mm4, DESCALE_P2_4 + psrad mm0, DESCALE_P2_4 + paddd mm2, mm7 + paddd mm3, mm7 + psrad mm2, DESCALE_P2_4 + psrad mm3, DESCALE_P2_4 + + packssdw mm4, mm0 ; mm4=data1=(01 11 21 31) + packssdw mm2, mm3 ; mm2=data2=(02 12 22 32) + + movq mm6, [GOTOFF(ebx,PB_CENTERJSAMP)] ; mm6=[PB_CENTERJSAMP] + + packsswb mm1, mm2 ; mm1=(00 10 20 30 02 12 22 32) + packsswb mm4, mm5 ; mm4=(01 11 21 31 03 13 23 33) + paddb mm1, mm6 + paddb mm4, mm6 + + movq mm7, mm1 ; transpose coefficients(phase 1) + punpcklbw mm1, mm4 ; mm1=(00 01 10 11 20 21 30 31) + punpckhbw mm7, mm4 ; mm7=(02 03 12 13 22 23 32 33) + + movq mm0, mm1 ; transpose coefficients(phase 2) + punpcklwd mm1, mm7 ; mm1=(00 01 02 03 10 11 12 13) + punpckhwd mm0, mm7 ; mm0=(20 21 22 23 30 31 32 33) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1 + movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0 + + psrlq mm1, 4*BYTE_BIT + psrlq mm0, 4*BYTE_BIT + + mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1 + movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0 + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + poppic ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 2x2 output block. +; +; GLOBAL(void) +; jsimd_idct_2x2_mmx(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b) + 8 ; void *dct_table +%define coef_block(b) (b) + 12 ; JCOEFPTR coef_block +%define output_buf(b) (b) + 16 ; JSAMPARRAY output_buf +%define output_col(b) (b) + 20 ; JDIMENSION output_col + + align 32 + GLOBAL_FUNCTION(jsimd_idct_2x2_mmx) + +EXTN(jsimd_idct_2x2_mmx): + push ebp + mov ebp, esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + + mov edx, POINTER [dct_table(ebp)] ; quantptr + mov esi, JCOEFPTR [coef_block(ebp)] ; inptr + + ; | input: | result: | + ; | 00 01 ** 03 ** 05 ** 07 | | + ; | 10 11 ** 13 ** 15 ** 17 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 | + ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 | + ; | 50 51 ** 53 ** 55 ** 57 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 70 71 ** 73 ** 75 ** 77 | | + + ; -- Odd part + + movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; mm0=(10 11 ** 13), mm1=(30 31 ** 33) + ; mm2=(50 51 ** 53), mm3=(70 71 ** 73) + + pcmpeqd mm7, mm7 + pslld mm7, WORD_BIT ; mm7={0x0000 0xFFFF 0x0000 0xFFFF} + + movq mm4, mm0 ; mm4=(10 11 ** 13) + movq mm5, mm2 ; mm5=(50 51 ** 53) + punpcklwd mm4, mm1 ; mm4=(10 30 11 31) + punpcklwd mm5, mm3 ; mm5=(50 70 51 71) + pmaddwd mm4, [GOTOFF(ebx,PW_F362_MF127)] + pmaddwd mm5, [GOTOFF(ebx,PW_F085_MF072)] + + psrld mm0, WORD_BIT ; mm0=(11 -- 13 --) + pand mm1, mm7 ; mm1=(-- 31 -- 33) + psrld mm2, WORD_BIT ; mm2=(51 -- 53 --) + pand mm3, mm7 ; mm3=(-- 71 -- 73) + por mm0, mm1 ; mm0=(11 31 13 33) + por mm2, mm3 ; mm2=(51 71 53 73) + pmaddwd mm0, [GOTOFF(ebx,PW_F362_MF127)] + pmaddwd mm2, [GOTOFF(ebx,PW_F085_MF072)] + + paddd mm4, mm5 ; mm4=tmp0[col0 col1] + + movq mm6, MMWORD [MMBLOCK(1,1,esi,SIZEOF_JCOEF)] + movq mm1, MMWORD [MMBLOCK(3,1,esi,SIZEOF_JCOEF)] + pmullw mm6, MMWORD [MMBLOCK(1,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm1, MMWORD [MMBLOCK(3,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + movq mm3, MMWORD [MMBLOCK(5,1,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(7,1,esi,SIZEOF_JCOEF)] + pmullw mm3, MMWORD [MMBLOCK(5,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm5, MMWORD [MMBLOCK(7,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; mm6=(** 15 ** 17), mm1=(** 35 ** 37) + ; mm3=(** 55 ** 57), mm5=(** 75 ** 77) + + psrld mm6, WORD_BIT ; mm6=(15 -- 17 --) + pand mm1, mm7 ; mm1=(-- 35 -- 37) + psrld mm3, WORD_BIT ; mm3=(55 -- 57 --) + pand mm5, mm7 ; mm5=(-- 75 -- 77) + por mm6, mm1 ; mm6=(15 35 17 37) + por mm3, mm5 ; mm3=(55 75 57 77) + pmaddwd mm6, [GOTOFF(ebx,PW_F362_MF127)] + pmaddwd mm3, [GOTOFF(ebx,PW_F085_MF072)] + + paddd mm0, mm2 ; mm0=tmp0[col1 col3] + paddd mm6, mm3 ; mm6=tmp0[col5 col7] + + ; -- Even part + + movq mm1, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movq mm5, MMWORD [MMBLOCK(0,1,esi,SIZEOF_JCOEF)] + pmullw mm1, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw mm5, MMWORD [MMBLOCK(0,1,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; mm1=(00 01 ** 03), mm5=(** 05 ** 07) + + movq mm2, mm1 ; mm2=(00 01 ** 03) + pslld mm1, WORD_BIT ; mm1=(-- 00 -- **) + psrad mm1, (WORD_BIT-CONST_BITS-2) ; mm1=tmp10[col0 ****] + + pand mm2, mm7 ; mm2=(-- 01 -- 03) + pand mm5, mm7 ; mm5=(-- 05 -- 07) + psrad mm2, (WORD_BIT-CONST_BITS-2) ; mm2=tmp10[col1 col3] + psrad mm5, (WORD_BIT-CONST_BITS-2) ; mm5=tmp10[col5 col7] + + ; -- Final output stage + + movq mm3, mm1 + paddd mm1, mm4 ; mm1=data0[col0 ****]=(A0 **) + psubd mm3, mm4 ; mm3=data1[col0 ****]=(B0 **) + punpckldq mm1, mm3 ; mm1=(A0 B0) + + movq mm7, [GOTOFF(ebx,PD_DESCALE_P1_2)] ; mm7=[PD_DESCALE_P1_2] + + movq mm4, mm2 + movq mm3, mm5 + paddd mm2, mm0 ; mm2=data0[col1 col3]=(A1 A3) + paddd mm5, mm6 ; mm5=data0[col5 col7]=(A5 A7) + psubd mm4, mm0 ; mm4=data1[col1 col3]=(B1 B3) + psubd mm3, mm6 ; mm3=data1[col5 col7]=(B5 B7) + + paddd mm1, mm7 + psrad mm1, DESCALE_P1_2 + + paddd mm2, mm7 + paddd mm5, mm7 + psrad mm2, DESCALE_P1_2 + psrad mm5, DESCALE_P1_2 + paddd mm4, mm7 + paddd mm3, mm7 + psrad mm4, DESCALE_P1_2 + psrad mm3, DESCALE_P1_2 + + ; ---- Pass 2: process rows, store into output array. + + mov edi, JSAMPARRAY [output_buf(ebp)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(ebp)] + + ; | input:| result:| + ; | A0 B0 | | + ; | A1 B1 | C0 C1 | + ; | A3 B3 | D0 D1 | + ; | A5 B5 | | + ; | A7 B7 | | + + ; -- Odd part + + packssdw mm2, mm4 ; mm2=(A1 A3 B1 B3) + packssdw mm5, mm3 ; mm5=(A5 A7 B5 B7) + pmaddwd mm2, [GOTOFF(ebx,PW_F362_MF127)] + pmaddwd mm5, [GOTOFF(ebx,PW_F085_MF072)] + + paddd mm2, mm5 ; mm2=tmp0[row0 row1] + + ; -- Even part + + pslld mm1, (CONST_BITS+2) ; mm1=tmp10[row0 row1] + + ; -- Final output stage + + movq mm0, [GOTOFF(ebx,PD_DESCALE_P2_2)] ; mm0=[PD_DESCALE_P2_2] + + movq mm6, mm1 + paddd mm1, mm2 ; mm1=data0[row0 row1]=(C0 C1) + psubd mm6, mm2 ; mm6=data1[row0 row1]=(D0 D1) + + paddd mm1, mm0 + paddd mm6, mm0 + psrad mm1, DESCALE_P2_2 + psrad mm6, DESCALE_P2_2 + + movq mm7, mm1 ; transpose coefficients + punpckldq mm1, mm6 ; mm1=(C0 D0) + punpckhdq mm7, mm6 ; mm7=(C1 D1) + + packssdw mm1, mm7 ; mm1=(C0 D0 C1 D1) + packsswb mm1, mm1 ; mm1=(C0 D0 C1 D1 C0 D0 C1 D1) + paddb mm1, [GOTOFF(ebx,PB_CENTERJSAMP)] + + movd ecx, mm1 + movd ebx, mm1 ; ebx=(C0 D0 C1 D1) + shr ecx, 2*BYTE_BIT ; ecx=(C1 D1 -- --) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov WORD [edx+eax*SIZEOF_JSAMPLE], bx + mov WORD [esi+eax*SIZEOF_JSAMPLE], cx + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jidctred-sse2.asm b/simd/i386/jidctred-sse2.asm new file mode 100644 index 0000000..97838ba --- /dev/null +++ b/simd/i386/jidctred-sse2.asm @@ -0,0 +1,594 @@ +; +; jidctred.asm - reduced-size IDCT (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains inverse-DCT routines that produce reduced-size +; output: either 4x4 or 2x2 pixels from an 8x8 DCT block. +; The following code is based directly on the IJG's original jidctred.c; +; see the jidctred.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1_4 (CONST_BITS - PASS1_BITS + 1) +%define DESCALE_P2_4 (CONST_BITS + PASS1_BITS + 3 + 1) +%define DESCALE_P1_2 (CONST_BITS - PASS1_BITS + 2) +%define DESCALE_P2_2 (CONST_BITS + PASS1_BITS + 3 + 2) + +%if CONST_BITS == 13 +F_0_211 equ 1730 ; FIX(0.211164243) +F_0_509 equ 4176 ; FIX(0.509795579) +F_0_601 equ 4926 ; FIX(0.601344887) +F_0_720 equ 5906 ; FIX(0.720959822) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_850 equ 6967 ; FIX(0.850430095) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_061 equ 8697 ; FIX(1.061594337) +F_1_272 equ 10426 ; FIX(1.272758580) +F_1_451 equ 11893 ; FIX(1.451774981) +F_1_847 equ 15137 ; FIX(1.847759065) +F_2_172 equ 17799 ; FIX(2.172734803) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_624 equ 29692 ; FIX(3.624509785) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_0_211 equ DESCALE( 226735879, 30 - CONST_BITS) ; FIX(0.211164243) +F_0_509 equ DESCALE( 547388834, 30 - CONST_BITS) ; FIX(0.509795579) +F_0_601 equ DESCALE( 645689155, 30 - CONST_BITS) ; FIX(0.601344887) +F_0_720 equ DESCALE( 774124714, 30 - CONST_BITS) ; FIX(0.720959822) +F_0_765 equ DESCALE( 821806413, 30 - CONST_BITS) ; FIX(0.765366865) +F_0_850 equ DESCALE( 913142361, 30 - CONST_BITS) ; FIX(0.850430095) +F_0_899 equ DESCALE( 966342111, 30 - CONST_BITS) ; FIX(0.899976223) +F_1_061 equ DESCALE(1139878239, 30 - CONST_BITS) ; FIX(1.061594337) +F_1_272 equ DESCALE(1366614119, 30 - CONST_BITS) ; FIX(1.272758580) +F_1_451 equ DESCALE(1558831516, 30 - CONST_BITS) ; FIX(1.451774981) +F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065) +F_2_172 equ DESCALE(2332956230, 30 - CONST_BITS) ; FIX(2.172734803) +F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447) +F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS) ; FIX(3.624509785) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_idct_red_sse2) + +EXTN(jconst_idct_red_sse2): + +PW_F184_MF076 times 4 dw F_1_847, -F_0_765 +PW_F256_F089 times 4 dw F_2_562, F_0_899 +PW_F106_MF217 times 4 dw F_1_061, -F_2_172 +PW_MF060_MF050 times 4 dw -F_0_601, -F_0_509 +PW_F145_MF021 times 4 dw F_1_451, -F_0_211 +PW_F362_MF127 times 4 dw F_3_624, -F_1_272 +PW_F085_MF072 times 4 dw F_0_850, -F_0_720 +PD_DESCALE_P1_4 times 4 dd 1 << (DESCALE_P1_4 - 1) +PD_DESCALE_P2_4 times 4 dd 1 << (DESCALE_P2_4 - 1) +PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2 - 1) +PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2 - 1) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 4x4 output block. +; +; GLOBAL(void) +; jsimd_idct_4x4_sse2(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b) + 8 ; void *dct_table +%define coef_block(b) (b) + 12 ; JCOEFPTR coef_block +%define output_buf(b) (b) + 16 ; JSAMPARRAY output_buf +%define output_col(b) (b) + 20 ; JDIMENSION output_col + +%define original_ebp ebp + 0 +%define wk(i) ebp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 32 + GLOBAL_FUNCTION(jsimd_idct_4x4_sse2) + +EXTN(jsimd_idct_4x4_sse2): + push ebp + mov eax, esp ; eax = original ebp + sub esp, byte 4 + and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [esp], eax + mov ebp, esp ; ebp = aligned ebp + lea esp, [wk(0)] + pushpic ebx +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + +; mov eax, [original_ebp] + mov edx, POINTER [dct_table(eax)] ; quantptr + mov esi, JCOEFPTR [coef_block(eax)] ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2 + mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + por xmm0, xmm1 + packsswb xmm0, xmm0 + packsswb xmm0, xmm0 + movd eax, xmm0 + test eax, eax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw xmm0, PASS1_BITS + + movdqa xmm3, xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm0, xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpckhwd xmm3, xmm3 ; xmm3=(04 04 05 05 06 06 07 07) + + pshufd xmm1, xmm0, 0x50 ; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01) + pshufd xmm0, xmm0, 0xFA ; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03) + pshufd xmm6, xmm3, 0x50 ; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05) + pshufd xmm3, xmm3, 0xFA ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07) + + jmp near .column_end + alignx 16, 7 +%endif +.columnDCT: + + ; -- Odd part + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm4, xmm0 + movdqa xmm5, xmm0 + punpcklwd xmm4, xmm1 + punpckhwd xmm5, xmm1 + movdqa xmm0, xmm4 + movdqa xmm1, xmm5 + pmaddwd xmm4, [GOTOFF(ebx,PW_F256_F089)] ; xmm4=(tmp2L) + pmaddwd xmm5, [GOTOFF(ebx,PW_F256_F089)] ; xmm5=(tmp2H) + pmaddwd xmm0, [GOTOFF(ebx,PW_F106_MF217)] ; xmm0=(tmp0L) + pmaddwd xmm1, [GOTOFF(ebx,PW_F106_MF217)] ; xmm1=(tmp0H) + + movdqa xmm6, xmm2 + movdqa xmm7, xmm2 + punpcklwd xmm6, xmm3 + punpckhwd xmm7, xmm3 + movdqa xmm2, xmm6 + movdqa xmm3, xmm7 + pmaddwd xmm6, [GOTOFF(ebx,PW_MF060_MF050)] ; xmm6=(tmp2L) + pmaddwd xmm7, [GOTOFF(ebx,PW_MF060_MF050)] ; xmm7=(tmp2H) + pmaddwd xmm2, [GOTOFF(ebx,PW_F145_MF021)] ; xmm2=(tmp0L) + pmaddwd xmm3, [GOTOFF(ebx,PW_F145_MF021)] ; xmm3=(tmp0H) + + paddd xmm6, xmm4 ; xmm6=tmp2L + paddd xmm7, xmm5 ; xmm7=tmp2H + paddd xmm2, xmm0 ; xmm2=tmp0L + paddd xmm3, xmm1 ; xmm3=tmp0H + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp0L + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=tmp0H + + ; -- Even part + + movdqa xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + movdqa xmm5, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] + movdqa xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] + pmullw xmm4, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm5, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm0, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + pxor xmm1, xmm1 + pxor xmm2, xmm2 + punpcklwd xmm1, xmm4 ; xmm1=tmp0L + punpckhwd xmm2, xmm4 ; xmm2=tmp0H + psrad xmm1, (16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1 + psrad xmm2, (16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1 + + movdqa xmm3, xmm5 ; xmm5=in2=z2 + punpcklwd xmm5, xmm0 ; xmm0=in6=z3 + punpckhwd xmm3, xmm0 + pmaddwd xmm5, [GOTOFF(ebx,PW_F184_MF076)] ; xmm5=tmp2L + pmaddwd xmm3, [GOTOFF(ebx,PW_F184_MF076)] ; xmm3=tmp2H + + movdqa xmm4, xmm1 + movdqa xmm0, xmm2 + paddd xmm1, xmm5 ; xmm1=tmp10L + paddd xmm2, xmm3 ; xmm2=tmp10H + psubd xmm4, xmm5 ; xmm4=tmp12L + psubd xmm0, xmm3 ; xmm0=tmp12H + + ; -- Final output stage + + movdqa xmm5, xmm1 + movdqa xmm3, xmm2 + paddd xmm1, xmm6 ; xmm1=data0L + paddd xmm2, xmm7 ; xmm2=data0H + psubd xmm5, xmm6 ; xmm5=data3L + psubd xmm3, xmm7 ; xmm3=data3H + + movdqa xmm6, [GOTOFF(ebx,PD_DESCALE_P1_4)] ; xmm6=[PD_DESCALE_P1_4] + + paddd xmm1, xmm6 + paddd xmm2, xmm6 + psrad xmm1, DESCALE_P1_4 + psrad xmm2, DESCALE_P1_4 + paddd xmm5, xmm6 + paddd xmm3, xmm6 + psrad xmm5, DESCALE_P1_4 + psrad xmm3, DESCALE_P1_4 + + packssdw xmm1, xmm2 ; xmm1=data0=(00 01 02 03 04 05 06 07) + packssdw xmm5, xmm3 ; xmm5=data3=(30 31 32 33 34 35 36 37) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp0L + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp0H + + movdqa xmm2, xmm4 + movdqa xmm3, xmm0 + paddd xmm4, xmm7 ; xmm4=data1L + paddd xmm0, xmm6 ; xmm0=data1H + psubd xmm2, xmm7 ; xmm2=data2L + psubd xmm3, xmm6 ; xmm3=data2H + + movdqa xmm7, [GOTOFF(ebx,PD_DESCALE_P1_4)] ; xmm7=[PD_DESCALE_P1_4] + + paddd xmm4, xmm7 + paddd xmm0, xmm7 + psrad xmm4, DESCALE_P1_4 + psrad xmm0, DESCALE_P1_4 + paddd xmm2, xmm7 + paddd xmm3, xmm7 + psrad xmm2, DESCALE_P1_4 + psrad xmm3, DESCALE_P1_4 + + packssdw xmm4, xmm0 ; xmm4=data1=(10 11 12 13 14 15 16 17) + packssdw xmm2, xmm3 ; xmm2=data2=(20 21 22 23 24 25 26 27) + + movdqa xmm6, xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1, xmm4 ; xmm1=(00 10 01 11 02 12 03 13) + punpckhwd xmm6, xmm4 ; xmm6=(04 14 05 15 06 16 07 17) + movdqa xmm7, xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2, xmm5 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm7, xmm5 ; xmm7=(24 34 25 35 26 36 27 37) + + movdqa xmm0, xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1, xmm2 ; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31) + punpckhdq xmm0, xmm2 ; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33) + movdqa xmm3, xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6, xmm7 ; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35) + punpckhdq xmm3, xmm7 ; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37) +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows, store into output array. + + mov eax, [original_ebp] + mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(eax)] + + ; -- Even part + + pxor xmm4, xmm4 + punpcklwd xmm4, xmm1 ; xmm4=tmp0 + psrad xmm4, (16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1 + + ; -- Odd part + + punpckhwd xmm1, xmm0 + punpckhwd xmm6, xmm3 + movdqa xmm5, xmm1 + movdqa xmm2, xmm6 + pmaddwd xmm1, [GOTOFF(ebx,PW_F256_F089)] ; xmm1=(tmp2) + pmaddwd xmm6, [GOTOFF(ebx,PW_MF060_MF050)] ; xmm6=(tmp2) + pmaddwd xmm5, [GOTOFF(ebx,PW_F106_MF217)] ; xmm5=(tmp0) + pmaddwd xmm2, [GOTOFF(ebx,PW_F145_MF021)] ; xmm2=(tmp0) + + paddd xmm6, xmm1 ; xmm6=tmp2 + paddd xmm2, xmm5 ; xmm2=tmp0 + + ; -- Even part + + punpcklwd xmm0, xmm3 + pmaddwd xmm0, [GOTOFF(ebx,PW_F184_MF076)] ; xmm0=tmp2 + + movdqa xmm7, xmm4 + paddd xmm4, xmm0 ; xmm4=tmp10 + psubd xmm7, xmm0 ; xmm7=tmp12 + + ; -- Final output stage + + movdqa xmm1, [GOTOFF(ebx,PD_DESCALE_P2_4)] ; xmm1=[PD_DESCALE_P2_4] + + movdqa xmm5, xmm4 + movdqa xmm3, xmm7 + paddd xmm4, xmm6 ; xmm4=data0=(00 10 20 30) + paddd xmm7, xmm2 ; xmm7=data1=(01 11 21 31) + psubd xmm5, xmm6 ; xmm5=data3=(03 13 23 33) + psubd xmm3, xmm2 ; xmm3=data2=(02 12 22 32) + + paddd xmm4, xmm1 + paddd xmm7, xmm1 + psrad xmm4, DESCALE_P2_4 + psrad xmm7, DESCALE_P2_4 + paddd xmm5, xmm1 + paddd xmm3, xmm1 + psrad xmm5, DESCALE_P2_4 + psrad xmm3, DESCALE_P2_4 + + packssdw xmm4, xmm3 ; xmm4=(00 10 20 30 02 12 22 32) + packssdw xmm7, xmm5 ; xmm7=(01 11 21 31 03 13 23 33) + + movdqa xmm0, xmm4 ; transpose coefficients(phase 1) + punpcklwd xmm4, xmm7 ; xmm4=(00 01 10 11 20 21 30 31) + punpckhwd xmm0, xmm7 ; xmm0=(02 03 12 13 22 23 32 33) + + movdqa xmm6, xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4, xmm0 ; xmm4=(00 01 02 03 10 11 12 13) + punpckhdq xmm6, xmm0 ; xmm6=(20 21 22 23 30 31 32 33) + + packsswb xmm4, xmm6 ; xmm4=(00 01 02 03 10 11 12 13 20 ..) + paddb xmm4, [GOTOFF(ebx,PB_CENTERJSAMP)] + + pshufd xmm2, xmm4, 0x39 ; xmm2=(10 11 12 13 20 21 22 23 30 ..) + pshufd xmm1, xmm4, 0x4E ; xmm1=(20 21 22 23 30 31 32 33 00 ..) + pshufd xmm3, xmm4, 0x93 ; xmm3=(30 31 32 33 00 01 02 03 10 ..) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + movd XMM_DWORD [edx+eax*SIZEOF_JSAMPLE], xmm4 + movd XMM_DWORD [esi+eax*SIZEOF_JSAMPLE], xmm2 + mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] + movd XMM_DWORD [edx+eax*SIZEOF_JSAMPLE], xmm1 + movd XMM_DWORD [esi+eax*SIZEOF_JSAMPLE], xmm3 + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused + poppic ebx + mov esp, ebp ; esp <- aligned ebp + pop esp ; esp <- original ebp + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 2x2 output block. +; +; GLOBAL(void) +; jsimd_idct_2x2_sse2(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +%define dct_table(b) (b) + 8 ; void *dct_table +%define coef_block(b) (b) + 12 ; JCOEFPTR coef_block +%define output_buf(b) (b) + 16 ; JSAMPARRAY output_buf +%define output_col(b) (b) + 20 ; JDIMENSION output_col + + align 32 + GLOBAL_FUNCTION(jsimd_idct_2x2_sse2) + +EXTN(jsimd_idct_2x2_sse2): + push ebp + mov ebp, esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + get_GOT ebx ; get GOT address + + ; ---- Pass 1: process columns from input. + + mov edx, POINTER [dct_table(ebp)] ; quantptr + mov esi, JCOEFPTR [coef_block(ebp)] ; inptr + + ; | input: | result: | + ; | 00 01 ** 03 ** 05 ** 07 | | + ; | 10 11 ** 13 ** 15 ** 17 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 | + ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 | + ; | 50 51 ** 53 ** 55 ** 57 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 70 71 ** 73 ** 75 ** 77 | | + + ; -- Odd part + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37) + ; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77) + + pcmpeqd xmm7, xmm7 + pslld xmm7, WORD_BIT ; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..} + + movdqa xmm4, xmm0 ; xmm4=(10 11 ** 13 ** 15 ** 17) + movdqa xmm5, xmm2 ; xmm5=(50 51 ** 53 ** 55 ** 57) + punpcklwd xmm4, xmm1 ; xmm4=(10 30 11 31 ** ** 13 33) + punpcklwd xmm5, xmm3 ; xmm5=(50 70 51 71 ** ** 53 73) + pmaddwd xmm4, [GOTOFF(ebx,PW_F362_MF127)] + pmaddwd xmm5, [GOTOFF(ebx,PW_F085_MF072)] + + psrld xmm0, WORD_BIT ; xmm0=(11 -- 13 -- 15 -- 17 --) + pand xmm1, xmm7 ; xmm1=(-- 31 -- 33 -- 35 -- 37) + psrld xmm2, WORD_BIT ; xmm2=(51 -- 53 -- 55 -- 57 --) + pand xmm3, xmm7 ; xmm3=(-- 71 -- 73 -- 75 -- 77) + por xmm0, xmm1 ; xmm0=(11 31 13 33 15 35 17 37) + por xmm2, xmm3 ; xmm2=(51 71 53 73 55 75 57 77) + pmaddwd xmm0, [GOTOFF(ebx,PW_F362_MF127)] + pmaddwd xmm2, [GOTOFF(ebx,PW_F085_MF072)] + + paddd xmm4, xmm5 ; xmm4=tmp0[col0 col1 **** col3] + paddd xmm0, xmm2 ; xmm0=tmp0[col1 col3 col5 col7] + + ; -- Even part + + movdqa xmm6, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] + pmullw xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] + + ; xmm6=(00 01 ** 03 ** 05 ** 07) + + movdqa xmm1, xmm6 ; xmm1=(00 01 ** 03 ** 05 ** 07) + pslld xmm6, WORD_BIT ; xmm6=(-- 00 -- ** -- ** -- **) + pand xmm1, xmm7 ; xmm1=(-- 01 -- 03 -- 05 -- 07) + psrad xmm6, (WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****] + psrad xmm1, (WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7] + + ; -- Final output stage + + movdqa xmm3, xmm6 + movdqa xmm5, xmm1 + paddd xmm6, xmm4 ; xmm6=data0[col0 **** **** ****]=(A0 ** ** **) + paddd xmm1, xmm0 ; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7) + psubd xmm3, xmm4 ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **) + psubd xmm5, xmm0 ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7) + + movdqa xmm2, [GOTOFF(ebx,PD_DESCALE_P1_2)] ; xmm2=[PD_DESCALE_P1_2] + + punpckldq xmm6, xmm3 ; xmm6=(A0 B0 ** **) + + movdqa xmm7, xmm1 + punpcklqdq xmm1, xmm5 ; xmm1=(A1 A3 B1 B3) + punpckhqdq xmm7, xmm5 ; xmm7=(A5 A7 B5 B7) + + paddd xmm6, xmm2 + psrad xmm6, DESCALE_P1_2 + + paddd xmm1, xmm2 + paddd xmm7, xmm2 + psrad xmm1, DESCALE_P1_2 + psrad xmm7, DESCALE_P1_2 + + ; -- Prefetch the next coefficient block + + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows, store into output array. + + mov edi, JSAMPARRAY [output_buf(ebp)] ; (JSAMPROW *) + mov eax, JDIMENSION [output_col(ebp)] + + ; | input:| result:| + ; | A0 B0 | | + ; | A1 B1 | C0 C1 | + ; | A3 B3 | D0 D1 | + ; | A5 B5 | | + ; | A7 B7 | | + + ; -- Odd part + + packssdw xmm1, xmm1 ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3) + packssdw xmm7, xmm7 ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7) + pmaddwd xmm1, [GOTOFF(ebx,PW_F362_MF127)] + pmaddwd xmm7, [GOTOFF(ebx,PW_F085_MF072)] + + paddd xmm1, xmm7 ; xmm1=tmp0[row0 row1 row0 row1] + + ; -- Even part + + pslld xmm6, (CONST_BITS+2) ; xmm6=tmp10[row0 row1 **** ****] + + ; -- Final output stage + + movdqa xmm4, xmm6 + paddd xmm6, xmm1 ; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **) + psubd xmm4, xmm1 ; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **) + + punpckldq xmm6, xmm4 ; xmm6=(C0 D0 C1 D1) + + paddd xmm6, [GOTOFF(ebx,PD_DESCALE_P2_2)] + psrad xmm6, DESCALE_P2_2 + + packssdw xmm6, xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1) + packsswb xmm6, xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..) + paddb xmm6, [GOTOFF(ebx,PB_CENTERJSAMP)] + + pextrw ebx, xmm6, 0x00 ; ebx=(C0 D0 -- --) + pextrw ecx, xmm6, 0x01 ; ecx=(C1 D1 -- --) + + mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] + mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] + mov WORD [edx+eax*SIZEOF_JSAMPLE], bx + mov WORD [esi+eax*SIZEOF_JSAMPLE], cx + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jquant-3dn.asm b/simd/i386/jquant-3dn.asm new file mode 100644 index 0000000..1767f44 --- /dev/null +++ b/simd/i386/jquant-3dn.asm @@ -0,0 +1,232 @@ +; +; jquant.asm - sample data conversion and quantization (3DNow! & MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_float_3dnow(JSAMPARRAY sample_data, JDIMENSION start_col, +; FAST_FLOAT *workspace); +; + +%define sample_data ebp + 8 ; JSAMPARRAY sample_data +%define start_col ebp + 12 ; JDIMENSION start_col +%define workspace ebp + 16 ; FAST_FLOAT *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_convsamp_float_3dnow) + +EXTN(jsimd_convsamp_float_3dnow): + push ebp + mov ebp, esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pcmpeqw mm7, mm7 + psllw mm7, 7 + packsswb mm7, mm7 ; mm7 = PB_CENTERJSAMPLE (0x808080..) + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/2 + alignx 16, 7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] + movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] + + psubb mm0, mm7 ; mm0=(01234567) + psubb mm1, mm7 ; mm1=(89ABCDEF) + + punpcklbw mm2, mm0 ; mm2=(*0*1*2*3) + punpckhbw mm0, mm0 ; mm0=(*4*5*6*7) + punpcklbw mm3, mm1 ; mm3=(*8*9*A*B) + punpckhbw mm1, mm1 ; mm1=(*C*D*E*F) + + punpcklwd mm4, mm2 ; mm4=(***0***1) + punpckhwd mm2, mm2 ; mm2=(***2***3) + punpcklwd mm5, mm0 ; mm5=(***4***5) + punpckhwd mm0, mm0 ; mm0=(***6***7) + + psrad mm4, (DWORD_BIT-BYTE_BIT) ; mm4=(01) + psrad mm2, (DWORD_BIT-BYTE_BIT) ; mm2=(23) + pi2fd mm4, mm4 + pi2fd mm2, mm2 + psrad mm5, (DWORD_BIT-BYTE_BIT) ; mm5=(45) + psrad mm0, (DWORD_BIT-BYTE_BIT) ; mm0=(67) + pi2fd mm5, mm5 + pi2fd mm0, mm0 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm2 + movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5 + movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0 + + punpcklwd mm6, mm3 ; mm6=(***8***9) + punpckhwd mm3, mm3 ; mm3=(***A***B) + punpcklwd mm4, mm1 ; mm4=(***C***D) + punpckhwd mm1, mm1 ; mm1=(***E***F) + + psrad mm6, (DWORD_BIT-BYTE_BIT) ; mm6=(89) + psrad mm3, (DWORD_BIT-BYTE_BIT) ; mm3=(AB) + pi2fd mm6, mm6 + pi2fd mm3, mm3 + psrad mm4, (DWORD_BIT-BYTE_BIT) ; mm4=(CD) + psrad mm1, (DWORD_BIT-BYTE_BIT) ; mm1=(EF) + pi2fd mm4, mm4 + pi2fd mm1, mm1 + + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm6 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm3 + movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm4 + movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1 + + add esi, byte 2*SIZEOF_JSAMPROW + add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .convloop + + femms ; empty MMX/3DNow! state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jsimd_quantize_float_3dnow(JCOEFPTR coef_block, FAST_FLOAT *divisors, +; FAST_FLOAT *workspace); +; + +%define coef_block ebp + 8 ; JCOEFPTR coef_block +%define divisors ebp + 12 ; FAST_FLOAT *divisors +%define workspace ebp + 16 ; FAST_FLOAT *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_quantize_float_3dnow) + +EXTN(jsimd_quantize_float_3dnow): + push ebp + mov ebp, esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov eax, 0x4B400000 ; (float)0x00C00000 (rndint_magic) + movd mm7, eax + punpckldq mm7, mm7 ; mm7={12582912.0F 12582912.0F} + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/16 + alignx 16, 7 +.quantloop: + movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)] + pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + pfmul mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movq mm2, MMWORD [MMBLOCK(0,2,esi,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(0,3,esi,SIZEOF_FAST_FLOAT)] + pfmul mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)] + pfmul mm3, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)] + + pfadd mm0, mm7 ; mm0=(00 ** 01 **) + pfadd mm1, mm7 ; mm1=(02 ** 03 **) + pfadd mm2, mm7 ; mm0=(04 ** 05 **) + pfadd mm3, mm7 ; mm1=(06 ** 07 **) + + movq mm4, mm0 + punpcklwd mm0, mm1 ; mm0=(00 02 ** **) + punpckhwd mm4, mm1 ; mm4=(01 03 ** **) + movq mm5, mm2 + punpcklwd mm2, mm3 ; mm2=(04 06 ** **) + punpckhwd mm5, mm3 ; mm5=(05 07 ** **) + + punpcklwd mm0, mm4 ; mm0=(00 01 02 03) + punpcklwd mm2, mm5 ; mm2=(04 05 06 07) + + movq mm6, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movq mm1, MMWORD [MMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)] + pfmul mm6, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + pfmul mm1, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + movq mm3, MMWORD [MMBLOCK(1,2,esi,SIZEOF_FAST_FLOAT)] + movq mm4, MMWORD [MMBLOCK(1,3,esi,SIZEOF_FAST_FLOAT)] + pfmul mm3, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)] + pfmul mm4, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)] + + pfadd mm6, mm7 ; mm0=(10 ** 11 **) + pfadd mm1, mm7 ; mm4=(12 ** 13 **) + pfadd mm3, mm7 ; mm0=(14 ** 15 **) + pfadd mm4, mm7 ; mm4=(16 ** 17 **) + + movq mm5, mm6 + punpcklwd mm6, mm1 ; mm6=(10 12 ** **) + punpckhwd mm5, mm1 ; mm5=(11 13 ** **) + movq mm1, mm3 + punpcklwd mm3, mm4 ; mm3=(14 16 ** **) + punpckhwd mm1, mm4 ; mm1=(15 17 ** **) + + punpcklwd mm6, mm5 ; mm6=(10 11 12 13) + punpcklwd mm3, mm1 ; mm3=(14 15 16 17) + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm6 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3 + + add esi, byte 16*SIZEOF_FAST_FLOAT + add edx, byte 16*SIZEOF_FAST_FLOAT + add edi, byte 16*SIZEOF_JCOEF + dec eax + jnz near .quantloop + + femms ; empty MMX/3DNow! state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jquant-mmx.asm b/simd/i386/jquant-mmx.asm new file mode 100644 index 0000000..98932db --- /dev/null +++ b/simd/i386/jquant-mmx.asm @@ -0,0 +1,278 @@ +; +; jquant.asm - sample data conversion and quantization (MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_mmx(JSAMPARRAY sample_data, JDIMENSION start_col, +; DCTELEM *workspace); +; + +%define sample_data ebp + 8 ; JSAMPARRAY sample_data +%define start_col ebp + 12 ; JDIMENSION start_col +%define workspace ebp + 16 ; DCTELEM *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_convsamp_mmx) + +EXTN(jsimd_convsamp_mmx): + push ebp + mov ebp, esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pxor mm6, mm6 ; mm6=(all 0's) + pcmpeqw mm7, mm7 + psllw mm7, 7 ; mm7={0xFF80 0xFF80 0xFF80 0xFF80} + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16, 7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; mm0=(01234567) + movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] ; mm1=(89ABCDEF) + + mov ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq mm2, MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; mm2=(GHIJKLMN) + movq mm3, MMWORD [edx+eax*SIZEOF_JSAMPLE] ; mm3=(OPQRSTUV) + + movq mm4, mm0 + punpcklbw mm0, mm6 ; mm0=(0123) + punpckhbw mm4, mm6 ; mm4=(4567) + movq mm5, mm1 + punpcklbw mm1, mm6 ; mm1=(89AB) + punpckhbw mm5, mm6 ; mm5=(CDEF) + + paddw mm0, mm7 + paddw mm4, mm7 + paddw mm1, mm7 + paddw mm5, mm7 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm4 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_DCTELEM)], mm1 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_DCTELEM)], mm5 + + movq mm0, mm2 + punpcklbw mm2, mm6 ; mm2=(GHIJ) + punpckhbw mm0, mm6 ; mm0=(KLMN) + movq mm4, mm3 + punpcklbw mm3, mm6 ; mm3=(OPQR) + punpckhbw mm4, mm6 ; mm4=(STUV) + + paddw mm2, mm7 + paddw mm0, mm7 + paddw mm3, mm7 + paddw mm4, mm7 + + movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_DCTELEM)], mm2 + movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_DCTELEM)], mm3 + movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_DCTELEM)], mm4 + + add esi, byte 4*SIZEOF_JSAMPROW + add edi, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz short .convloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; This implementation is based on an algorithm described in +; "How to optimize for the Pentium family of microprocessors" +; (http://www.agner.org/assem/). +; +; GLOBAL(void) +; jsimd_quantize_mmx(JCOEFPTR coef_block, DCTELEM *divisors, +; DCTELEM *workspace); +; + +%define RECIPROCAL(m, n, b) \ + MMBLOCK(DCTSIZE * 0 + (m), (n), (b), SIZEOF_DCTELEM) +%define CORRECTION(m, n, b) \ + MMBLOCK(DCTSIZE * 1 + (m), (n), (b), SIZEOF_DCTELEM) +%define SCALE(m, n, b) \ + MMBLOCK(DCTSIZE * 2 + (m), (n), (b), SIZEOF_DCTELEM) +%define SHIFT(m, n, b) \ + MMBLOCK(DCTSIZE * 3 + (m), (n), (b), SIZEOF_DCTELEM) + +%define coef_block ebp + 8 ; JCOEFPTR coef_block +%define divisors ebp + 12 ; DCTELEM *divisors +%define workspace ebp + 16 ; DCTELEM *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_quantize_mmx) + +EXTN(jsimd_quantize_mmx): + push ebp + mov ebp, esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov ah, 2 + alignx 16, 7 +.quantloop1: + mov al, DCTSIZE2/8/2 + alignx 16, 7 +.quantloop2: + movq mm2, MMWORD [MMBLOCK(0,0,esi,SIZEOF_DCTELEM)] + movq mm3, MMWORD [MMBLOCK(0,1,esi,SIZEOF_DCTELEM)] + + movq mm0, mm2 + movq mm1, mm3 + + psraw mm2, (WORD_BIT-1) ; -1 if value < 0, 0 otherwise + psraw mm3, (WORD_BIT-1) + + pxor mm0, mm2 ; val = -val + pxor mm1, mm3 + psubw mm0, mm2 + psubw mm1, mm3 + + ; + ; MMX is an annoyingly crappy instruction set. It has two + ; misfeatures that are causing problems here: + ; + ; - All multiplications are signed. + ; + ; - The second operand for the shifts is not treated as packed. + ; + ; + ; We work around the first problem by implementing this algorithm: + ; + ; unsigned long unsigned_multiply(unsigned short x, unsigned short y) + ; { + ; enum { SHORT_BIT = 16 }; + ; signed short sx = (signed short)x; + ; signed short sy = (signed short)y; + ; signed long sz; + ; + ; sz = (long)sx * (long)sy; /* signed multiply */ + ; + ; if (sx < 0) sz += (long)sy << SHORT_BIT; + ; if (sy < 0) sz += (long)sx << SHORT_BIT; + ; + ; return (unsigned long)sz; + ; } + ; + ; (note that a negative sx adds _sy_ and vice versa) + ; + ; For the second problem, we replace the shift by a multiplication. + ; Unfortunately that means we have to deal with the signed issue again. + ; + + paddw mm0, MMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor + paddw mm1, MMWORD [CORRECTION(0,1,edx)] + + movq mm4, mm0 ; store current value for later + movq mm5, mm1 + pmulhw mm0, MMWORD [RECIPROCAL(0,0,edx)] ; reciprocal + pmulhw mm1, MMWORD [RECIPROCAL(0,1,edx)] + paddw mm0, mm4 ; reciprocal is always negative (MSB=1), + paddw mm1, mm5 ; so we always need to add the initial value + ; (input value is never negative as we + ; inverted it at the start of this routine) + + ; here it gets a bit tricky as both scale + ; and mm0/mm1 can be negative + movq mm6, MMWORD [SCALE(0,0,edx)] ; scale + movq mm7, MMWORD [SCALE(0,1,edx)] + movq mm4, mm0 + movq mm5, mm1 + pmulhw mm0, mm6 + pmulhw mm1, mm7 + + psraw mm6, (WORD_BIT-1) ; determine if scale is negative + psraw mm7, (WORD_BIT-1) + + pand mm6, mm4 ; and add input if it is + pand mm7, mm5 + paddw mm0, mm6 + paddw mm1, mm7 + + psraw mm4, (WORD_BIT-1) ; then check if negative input + psraw mm5, (WORD_BIT-1) + + pand mm4, MMWORD [SCALE(0,0,edx)] ; and add scale if it is + pand mm5, MMWORD [SCALE(0,1,edx)] + paddw mm0, mm4 + paddw mm1, mm5 + + pxor mm0, mm2 ; val = -val + pxor mm1, mm3 + psubw mm0, mm2 + psubw mm1, mm3 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm1 + + add esi, byte 8*SIZEOF_DCTELEM + add edx, byte 8*SIZEOF_DCTELEM + add edi, byte 8*SIZEOF_JCOEF + dec al + jnz near .quantloop2 + dec ah + jnz near .quantloop1 ; to avoid branch misprediction + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jquant-sse.asm b/simd/i386/jquant-sse.asm new file mode 100644 index 0000000..cc244c4 --- /dev/null +++ b/simd/i386/jquant-sse.asm @@ -0,0 +1,210 @@ +; +; jquant.asm - sample data conversion and quantization (SSE & MMX) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_float_sse(JSAMPARRAY sample_data, JDIMENSION start_col, +; FAST_FLOAT *workspace); +; + +%define sample_data ebp + 8 ; JSAMPARRAY sample_data +%define start_col ebp + 12 ; JDIMENSION start_col +%define workspace ebp + 16 ; FAST_FLOAT *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_convsamp_float_sse) + +EXTN(jsimd_convsamp_float_sse): + push ebp + mov ebp, esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pcmpeqw mm7, mm7 + psllw mm7, 7 + packsswb mm7, mm7 ; mm7 = PB_CENTERJSAMPLE (0x808080..) + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/2 + alignx 16, 7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] + movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] + + psubb mm0, mm7 ; mm0=(01234567) + psubb mm1, mm7 ; mm1=(89ABCDEF) + + punpcklbw mm2, mm0 ; mm2=(*0*1*2*3) + punpckhbw mm0, mm0 ; mm0=(*4*5*6*7) + punpcklbw mm3, mm1 ; mm3=(*8*9*A*B) + punpckhbw mm1, mm1 ; mm1=(*C*D*E*F) + + punpcklwd mm4, mm2 ; mm4=(***0***1) + punpckhwd mm2, mm2 ; mm2=(***2***3) + punpcklwd mm5, mm0 ; mm5=(***4***5) + punpckhwd mm0, mm0 ; mm0=(***6***7) + + psrad mm4, (DWORD_BIT-BYTE_BIT) ; mm4=(01) + psrad mm2, (DWORD_BIT-BYTE_BIT) ; mm2=(23) + cvtpi2ps xmm0, mm4 ; xmm0=(01**) + cvtpi2ps xmm1, mm2 ; xmm1=(23**) + psrad mm5, (DWORD_BIT-BYTE_BIT) ; mm5=(45) + psrad mm0, (DWORD_BIT-BYTE_BIT) ; mm0=(67) + cvtpi2ps xmm2, mm5 ; xmm2=(45**) + cvtpi2ps xmm3, mm0 ; xmm3=(67**) + + punpcklwd mm6, mm3 ; mm6=(***8***9) + punpckhwd mm3, mm3 ; mm3=(***A***B) + punpcklwd mm4, mm1 ; mm4=(***C***D) + punpckhwd mm1, mm1 ; mm1=(***E***F) + + psrad mm6, (DWORD_BIT-BYTE_BIT) ; mm6=(89) + psrad mm3, (DWORD_BIT-BYTE_BIT) ; mm3=(AB) + cvtpi2ps xmm4, mm6 ; xmm4=(89**) + cvtpi2ps xmm5, mm3 ; xmm5=(AB**) + psrad mm4, (DWORD_BIT-BYTE_BIT) ; mm4=(CD) + psrad mm1, (DWORD_BIT-BYTE_BIT) ; mm1=(EF) + cvtpi2ps xmm6, mm4 ; xmm6=(CD**) + cvtpi2ps xmm7, mm1 ; xmm7=(EF**) + + movlhps xmm0, xmm1 ; xmm0=(0123) + movlhps xmm2, xmm3 ; xmm2=(4567) + movlhps xmm4, xmm5 ; xmm4=(89AB) + movlhps xmm6, xmm7 ; xmm6=(CDEF) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm4 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6 + + add esi, byte 2*SIZEOF_JSAMPROW + add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz near .convloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jsimd_quantize_float_sse(JCOEFPTR coef_block, FAST_FLOAT *divisors, +; FAST_FLOAT *workspace); +; + +%define coef_block ebp + 8 ; JCOEFPTR coef_block +%define divisors ebp + 12 ; FAST_FLOAT *divisors +%define workspace ebp + 16 ; FAST_FLOAT *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_quantize_float_sse) + +EXTN(jsimd_quantize_float_sse): + push ebp + mov ebp, esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/16 + alignx 16, 7 +.quantloop: + movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)] + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + mulps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)] + mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + mulps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + + movhlps xmm4, xmm0 + movhlps xmm5, xmm1 + + cvtps2pi mm0, xmm0 + cvtps2pi mm1, xmm1 + cvtps2pi mm4, xmm4 + cvtps2pi mm5, xmm5 + + movhlps xmm6, xmm2 + movhlps xmm7, xmm3 + + cvtps2pi mm2, xmm2 + cvtps2pi mm3, xmm3 + cvtps2pi mm6, xmm6 + cvtps2pi mm7, xmm7 + + packssdw mm0, mm4 + packssdw mm1, mm5 + packssdw mm2, mm6 + packssdw mm3, mm7 + + movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 + movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1 + movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm2 + movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3 + + add esi, byte 16*SIZEOF_FAST_FLOAT + add edx, byte 16*SIZEOF_FAST_FLOAT + add edi, byte 16*SIZEOF_JCOEF + dec eax + jnz short .quantloop + + emms ; empty MMX state + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jquantf-sse2.asm b/simd/i386/jquantf-sse2.asm new file mode 100644 index 0000000..8d1201c --- /dev/null +++ b/simd/i386/jquantf-sse2.asm @@ -0,0 +1,170 @@ +; +; jquantf.asm - sample data conversion and quantization (SSE & SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_float_sse2(JSAMPARRAY sample_data, JDIMENSION start_col, +; FAST_FLOAT *workspace); +; + +%define sample_data ebp + 8 ; JSAMPARRAY sample_data +%define start_col ebp + 12 ; JDIMENSION start_col +%define workspace ebp + 16 ; FAST_FLOAT *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_convsamp_float_sse2) + +EXTN(jsimd_convsamp_float_sse2): + push ebp + mov ebp, esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pcmpeqw xmm7, xmm7 + psllw xmm7, 7 + packsswb xmm7, xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..) + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/2 + alignx 16, 7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] + movq xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] + + psubb xmm0, xmm7 ; xmm0=(01234567) + psubb xmm1, xmm7 ; xmm1=(89ABCDEF) + + punpcklbw xmm0, xmm0 ; xmm0=(*0*1*2*3*4*5*6*7) + punpcklbw xmm1, xmm1 ; xmm1=(*8*9*A*B*C*D*E*F) + + punpcklwd xmm2, xmm0 ; xmm2=(***0***1***2***3) + punpckhwd xmm0, xmm0 ; xmm0=(***4***5***6***7) + punpcklwd xmm3, xmm1 ; xmm3=(***8***9***A***B) + punpckhwd xmm1, xmm1 ; xmm1=(***C***D***E***F) + + psrad xmm2, (DWORD_BIT-BYTE_BIT) ; xmm2=(0123) + psrad xmm0, (DWORD_BIT-BYTE_BIT) ; xmm0=(4567) + cvtdq2ps xmm2, xmm2 ; xmm2=(0123) + cvtdq2ps xmm0, xmm0 ; xmm0=(4567) + psrad xmm3, (DWORD_BIT-BYTE_BIT) ; xmm3=(89AB) + psrad xmm1, (DWORD_BIT-BYTE_BIT) ; xmm1=(CDEF) + cvtdq2ps xmm3, xmm3 ; xmm3=(89AB) + cvtdq2ps xmm1, xmm1 ; xmm1=(CDEF) + + movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1 + + add esi, byte 2*SIZEOF_JSAMPROW + add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec ecx + jnz short .convloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jsimd_quantize_float_sse2(JCOEFPTR coef_block, FAST_FLOAT *divisors, +; FAST_FLOAT *workspace); +; + +%define coef_block ebp + 8 ; JCOEFPTR coef_block +%define divisors ebp + 12 ; FAST_FLOAT *divisors +%define workspace ebp + 16 ; FAST_FLOAT *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_quantize_float_sse2) + +EXTN(jsimd_quantize_float_sse2): + push ebp + mov ebp, esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/16 + alignx 16, 7 +.quantloop: + movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)] + mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] + mulps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)] + mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] + mulps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] + + cvtps2dq xmm0, xmm0 + cvtps2dq xmm1, xmm1 + cvtps2dq xmm2, xmm2 + cvtps2dq xmm3, xmm3 + + packssdw xmm0, xmm1 + packssdw xmm2, xmm3 + + movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_JCOEF)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_JCOEF)], xmm2 + + add esi, byte 16*SIZEOF_FAST_FLOAT + add edx, byte 16*SIZEOF_FAST_FLOAT + add edi, byte 16*SIZEOF_JCOEF + dec eax + jnz short .quantloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jquanti-avx2.asm b/simd/i386/jquanti-avx2.asm new file mode 100644 index 0000000..ea8e1a1 --- /dev/null +++ b/simd/i386/jquanti-avx2.asm @@ -0,0 +1,190 @@ +; +; jquanti.asm - sample data conversion and quantization (AVX2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, 2018, D. R. Commander. +; Copyright (C) 2016, Matthieu Darbois. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_avx2(JSAMPARRAY sample_data, JDIMENSION start_col, +; DCTELEM *workspace); +; + +%define sample_data ebp + 8 ; JSAMPARRAY sample_data +%define start_col ebp + 12 ; JDIMENSION start_col +%define workspace ebp + 16 ; DCTELEM *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_convsamp_avx2) + +EXTN(jsimd_convsamp_avx2): + push ebp + mov ebp, esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + movq xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] + movq xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] + + mov ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) + movq xmm2, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] + movq xmm3, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] + + mov ebx, JSAMPROW [esi+4*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+5*SIZEOF_JSAMPROW] ; (JSAMPLE *) + movq xmm4, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] + movq xmm5, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] + + mov ebx, JSAMPROW [esi+6*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+7*SIZEOF_JSAMPROW] ; (JSAMPLE *) + movq xmm6, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] + movq xmm7, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] + + vinserti128 ymm0, ymm0, xmm1, 1 + vinserti128 ymm2, ymm2, xmm3, 1 + vinserti128 ymm4, ymm4, xmm5, 1 + vinserti128 ymm6, ymm6, xmm7, 1 + + vpxor ymm1, ymm1, ymm1 ; ymm1=(all 0's) + vpunpcklbw ymm0, ymm0, ymm1 + vpunpcklbw ymm2, ymm2, ymm1 + vpunpcklbw ymm4, ymm4, ymm1 + vpunpcklbw ymm6, ymm6, ymm1 + + vpcmpeqw ymm7, ymm7, ymm7 + vpsllw ymm7, ymm7, 7 ; ymm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + vpaddw ymm0, ymm0, ymm7 + vpaddw ymm2, ymm2, ymm7 + vpaddw ymm4, ymm4, ymm7 + vpaddw ymm6, ymm6, ymm7 + + vmovdqu YMMWORD [YMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], ymm0 + vmovdqu YMMWORD [YMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], ymm2 + vmovdqu YMMWORD [YMMBLOCK(4,0,edi,SIZEOF_DCTELEM)], ymm4 + vmovdqu YMMWORD [YMMBLOCK(6,0,edi,SIZEOF_DCTELEM)], ymm6 + + vzeroupper + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; This implementation is based on an algorithm described in +; "How to optimize for the Pentium family of microprocessors" +; (http://www.agner.org/assem/). +; +; GLOBAL(void) +; jsimd_quantize_avx2(JCOEFPTR coef_block, DCTELEM *divisors, +; DCTELEM *workspace); +; + +%define RECIPROCAL(m, n, b) \ + YMMBLOCK(DCTSIZE * 0 + (m), (n), (b), SIZEOF_DCTELEM) +%define CORRECTION(m, n, b) \ + YMMBLOCK(DCTSIZE * 1 + (m), (n), (b), SIZEOF_DCTELEM) +%define SCALE(m, n, b) \ + YMMBLOCK(DCTSIZE * 2 + (m), (n), (b), SIZEOF_DCTELEM) + +%define coef_block ebp + 8 ; JCOEFPTR coef_block +%define divisors ebp + 12 ; DCTELEM *divisors +%define workspace ebp + 16 ; DCTELEM *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_quantize_avx2) + +EXTN(jsimd_quantize_avx2): + push ebp + mov ebp, esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + + vmovdqu ymm4, [YMMBLOCK(0,0,esi,SIZEOF_DCTELEM)] + vmovdqu ymm5, [YMMBLOCK(2,0,esi,SIZEOF_DCTELEM)] + vmovdqu ymm6, [YMMBLOCK(4,0,esi,SIZEOF_DCTELEM)] + vmovdqu ymm7, [YMMBLOCK(6,0,esi,SIZEOF_DCTELEM)] + vpabsw ymm0, ymm4 + vpabsw ymm1, ymm5 + vpabsw ymm2, ymm6 + vpabsw ymm3, ymm7 + + vpaddw ymm0, YMMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor + vpaddw ymm1, YMMWORD [CORRECTION(2,0,edx)] + vpaddw ymm2, YMMWORD [CORRECTION(4,0,edx)] + vpaddw ymm3, YMMWORD [CORRECTION(6,0,edx)] + vpmulhuw ymm0, YMMWORD [RECIPROCAL(0,0,edx)] ; reciprocal + vpmulhuw ymm1, YMMWORD [RECIPROCAL(2,0,edx)] + vpmulhuw ymm2, YMMWORD [RECIPROCAL(4,0,edx)] + vpmulhuw ymm3, YMMWORD [RECIPROCAL(6,0,edx)] + vpmulhuw ymm0, YMMWORD [SCALE(0,0,edx)] ; scale + vpmulhuw ymm1, YMMWORD [SCALE(2,0,edx)] + vpmulhuw ymm2, YMMWORD [SCALE(4,0,edx)] + vpmulhuw ymm3, YMMWORD [SCALE(6,0,edx)] + + vpsignw ymm0, ymm0, ymm4 + vpsignw ymm1, ymm1, ymm5 + vpsignw ymm2, ymm2, ymm6 + vpsignw ymm3, ymm3, ymm7 + + vmovdqu [YMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], ymm0 + vmovdqu [YMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], ymm1 + vmovdqu [YMMBLOCK(4,0,edi,SIZEOF_DCTELEM)], ymm2 + vmovdqu [YMMBLOCK(6,0,edi,SIZEOF_DCTELEM)], ymm3 + + vzeroupper + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jquanti-sse2.asm b/simd/i386/jquanti-sse2.asm new file mode 100644 index 0000000..2a69494 --- /dev/null +++ b/simd/i386/jquanti-sse2.asm @@ -0,0 +1,203 @@ +; +; jquanti.asm - sample data conversion and quantization (SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_sse2(JSAMPARRAY sample_data, JDIMENSION start_col, +; DCTELEM *workspace); +; + +%define sample_data ebp + 8 ; JSAMPARRAY sample_data +%define start_col ebp + 12 ; JDIMENSION start_col +%define workspace ebp + 16 ; DCTELEM *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_convsamp_sse2) + +EXTN(jsimd_convsamp_sse2): + push ebp + mov ebp, esp + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved + push esi + push edi + + pxor xmm6, xmm6 ; xmm6=(all 0's) + pcmpeqw xmm7, xmm7 + psllw xmm7, 7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) + mov eax, JDIMENSION [start_col] + mov edi, POINTER [workspace] ; (DCTELEM *) + mov ecx, DCTSIZE/4 + alignx 16, 7 +.convloop: + mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; xmm0=(01234567) + movq xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF) + + mov ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm2, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; xmm2=(GHIJKLMN) + movq xmm3, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] ; xmm3=(OPQRSTUV) + + punpcklbw xmm0, xmm6 ; xmm0=(01234567) + punpcklbw xmm1, xmm6 ; xmm1=(89ABCDEF) + paddw xmm0, xmm7 + paddw xmm1, xmm7 + punpcklbw xmm2, xmm6 ; xmm2=(GHIJKLMN) + punpcklbw xmm3, xmm6 ; xmm3=(OPQRSTUV) + paddw xmm2, xmm7 + paddw xmm3, xmm7 + + movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2 + movdqa XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3 + + add esi, byte 4*SIZEOF_JSAMPROW + add edi, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec ecx + jnz short .convloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + pop ebp + ret + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; This implementation is based on an algorithm described in +; "How to optimize for the Pentium family of microprocessors" +; (http://www.agner.org/assem/). +; +; GLOBAL(void) +; jsimd_quantize_sse2(JCOEFPTR coef_block, DCTELEM *divisors, +; DCTELEM *workspace); +; + +%define RECIPROCAL(m, n, b) \ + XMMBLOCK(DCTSIZE * 0 + (m), (n), (b), SIZEOF_DCTELEM) +%define CORRECTION(m, n, b) \ + XMMBLOCK(DCTSIZE * 1 + (m), (n), (b), SIZEOF_DCTELEM) +%define SCALE(m, n, b) \ + XMMBLOCK(DCTSIZE * 2 + (m), (n), (b), SIZEOF_DCTELEM) + +%define coef_block ebp + 8 ; JCOEFPTR coef_block +%define divisors ebp + 12 ; DCTELEM *divisors +%define workspace ebp + 16 ; DCTELEM *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_quantize_sse2) + +EXTN(jsimd_quantize_sse2): + push ebp + mov ebp, esp +; push ebx ; unused +; push ecx ; unused +; push edx ; need not be preserved + push esi + push edi + + mov esi, POINTER [workspace] + mov edx, POINTER [divisors] + mov edi, JCOEFPTR [coef_block] + mov eax, DCTSIZE2/32 + alignx 16, 7 +.quantloop: + movdqa xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)] + movdqa xmm5, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_DCTELEM)] + movdqa xmm6, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_DCTELEM)] + movdqa xmm0, xmm4 + movdqa xmm1, xmm5 + movdqa xmm2, xmm6 + movdqa xmm3, xmm7 + psraw xmm4, (WORD_BIT-1) + psraw xmm5, (WORD_BIT-1) + psraw xmm6, (WORD_BIT-1) + psraw xmm7, (WORD_BIT-1) + pxor xmm0, xmm4 + pxor xmm1, xmm5 + pxor xmm2, xmm6 + pxor xmm3, xmm7 + psubw xmm0, xmm4 ; if (xmm0 < 0) xmm0 = -xmm0; + psubw xmm1, xmm5 ; if (xmm1 < 0) xmm1 = -xmm1; + psubw xmm2, xmm6 ; if (xmm2 < 0) xmm2 = -xmm2; + psubw xmm3, xmm7 ; if (xmm3 < 0) xmm3 = -xmm3; + + paddw xmm0, XMMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor + paddw xmm1, XMMWORD [CORRECTION(1,0,edx)] + paddw xmm2, XMMWORD [CORRECTION(2,0,edx)] + paddw xmm3, XMMWORD [CORRECTION(3,0,edx)] + pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,edx)] ; reciprocal + pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,edx)] + pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,edx)] + pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,edx)] + pmulhuw xmm0, XMMWORD [SCALE(0,0,edx)] ; scale + pmulhuw xmm1, XMMWORD [SCALE(1,0,edx)] + pmulhuw xmm2, XMMWORD [SCALE(2,0,edx)] + pmulhuw xmm3, XMMWORD [SCALE(3,0,edx)] + + pxor xmm0, xmm4 + pxor xmm1, xmm5 + pxor xmm2, xmm6 + pxor xmm3, xmm7 + psubw xmm0, xmm4 + psubw xmm1, xmm5 + psubw xmm2, xmm6 + psubw xmm3, xmm7 + movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2 + movdqa XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3 + + add esi, byte 32*SIZEOF_DCTELEM + add edx, byte 32*SIZEOF_DCTELEM + add edi, byte 32*SIZEOF_JCOEF + dec eax + jnz near .quantloop + + pop edi + pop esi +; pop edx ; need not be preserved +; pop ecx ; unused +; pop ebx ; unused + pop ebp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/i386/jsimd.c b/simd/i386/jsimd.c new file mode 100644 index 0000000..563949a --- /dev/null +++ b/simd/i386/jsimd.c @@ -0,0 +1,1253 @@ +/* + * jsimd_i386.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, D. R. Commander. + * Copyright (C) 2015-2016, 2018, Matthieu Darbois. + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains the interface between the "normal" portions + * of the library and the SIMD implementations when running on a + * 32-bit x86 architecture. + */ + +#define JPEG_INTERNALS +#include "../../jinclude.h" +#include "../../jpeglib.h" +#include "../../jsimd.h" +#include "../../jdct.h" +#include "../../jsimddct.h" +#include "../jsimd.h" +#include "jconfigint.h" + +/* + * In the PIC cases, we have no guarantee that constants will keep + * their alignment. This macro allows us to verify it at runtime. + */ +#define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0) + +#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ +#define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */ + +static unsigned int simd_support = (unsigned int)(~0); +static unsigned int simd_huffman = 1; + +/* + * Check what SIMD accelerations are supported. + * + * FIXME: This code is racy under a multi-threaded environment. + */ +LOCAL(void) +init_simd(void) +{ +#ifndef NO_GETENV + char *env = NULL; +#endif + + if (simd_support != ~0U) + return; + + simd_support = jpeg_simd_cpu_support(); + +#ifndef NO_GETENV + /* Force different settings through environment variables */ + env = getenv("JSIMD_FORCEMMX"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_MMX; + env = getenv("JSIMD_FORCE3DNOW"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_3DNOW | JSIMD_MMX; + env = getenv("JSIMD_FORCESSE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_SSE | JSIMD_MMX; + env = getenv("JSIMD_FORCESSE2"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_SSE2; + env = getenv("JSIMD_FORCEAVX2"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_AVX2; + env = getenv("JSIMD_FORCENONE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = 0; + env = getenv("JSIMD_NOHUFFENC"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_huffman = 0; +#endif +} + +GLOBAL(int) +jsimd_can_rgb_ycc(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_AVX2) && + IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_AVX2) && + IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_AVX2) && + IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) +{ + void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_extrgb_ycc_convert_avx2; + sse2fct = jsimd_extrgb_ycc_convert_sse2; + mmxfct = jsimd_extrgb_ycc_convert_mmx; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + avx2fct = jsimd_extrgbx_ycc_convert_avx2; + sse2fct = jsimd_extrgbx_ycc_convert_sse2; + mmxfct = jsimd_extrgbx_ycc_convert_mmx; + break; + case JCS_EXT_BGR: + avx2fct = jsimd_extbgr_ycc_convert_avx2; + sse2fct = jsimd_extbgr_ycc_convert_sse2; + mmxfct = jsimd_extbgr_ycc_convert_mmx; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + avx2fct = jsimd_extbgrx_ycc_convert_avx2; + sse2fct = jsimd_extbgrx_ycc_convert_sse2; + mmxfct = jsimd_extbgrx_ycc_convert_mmx; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + avx2fct = jsimd_extxbgr_ycc_convert_avx2; + sse2fct = jsimd_extxbgr_ycc_convert_sse2; + mmxfct = jsimd_extxbgr_ycc_convert_mmx; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + avx2fct = jsimd_extxrgb_ycc_convert_avx2; + sse2fct = jsimd_extxrgb_ycc_convert_sse2; + mmxfct = jsimd_extxrgb_ycc_convert_mmx; + break; + default: + avx2fct = jsimd_rgb_ycc_convert_avx2; + sse2fct = jsimd_rgb_ycc_convert_sse2; + mmxfct = jsimd_rgb_ycc_convert_mmx; + break; + } + + if (simd_support & JSIMD_AVX2) + avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); + else if (simd_support & JSIMD_SSE2) + sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); + else + mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) +{ + void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_extrgb_gray_convert_avx2; + sse2fct = jsimd_extrgb_gray_convert_sse2; + mmxfct = jsimd_extrgb_gray_convert_mmx; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + avx2fct = jsimd_extrgbx_gray_convert_avx2; + sse2fct = jsimd_extrgbx_gray_convert_sse2; + mmxfct = jsimd_extrgbx_gray_convert_mmx; + break; + case JCS_EXT_BGR: + avx2fct = jsimd_extbgr_gray_convert_avx2; + sse2fct = jsimd_extbgr_gray_convert_sse2; + mmxfct = jsimd_extbgr_gray_convert_mmx; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + avx2fct = jsimd_extbgrx_gray_convert_avx2; + sse2fct = jsimd_extbgrx_gray_convert_sse2; + mmxfct = jsimd_extbgrx_gray_convert_mmx; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + avx2fct = jsimd_extxbgr_gray_convert_avx2; + sse2fct = jsimd_extxbgr_gray_convert_sse2; + mmxfct = jsimd_extxbgr_gray_convert_mmx; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + avx2fct = jsimd_extxrgb_gray_convert_avx2; + sse2fct = jsimd_extxrgb_gray_convert_sse2; + mmxfct = jsimd_extxrgb_gray_convert_mmx; + break; + default: + avx2fct = jsimd_rgb_gray_convert_avx2; + sse2fct = jsimd_rgb_gray_convert_sse2; + mmxfct = jsimd_rgb_gray_convert_mmx; + break; + } + + if (simd_support & JSIMD_AVX2) + avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); + else if (simd_support & JSIMD_SSE2) + sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); + else + mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) +{ + void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_ycc_extrgb_convert_avx2; + sse2fct = jsimd_ycc_extrgb_convert_sse2; + mmxfct = jsimd_ycc_extrgb_convert_mmx; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + avx2fct = jsimd_ycc_extrgbx_convert_avx2; + sse2fct = jsimd_ycc_extrgbx_convert_sse2; + mmxfct = jsimd_ycc_extrgbx_convert_mmx; + break; + case JCS_EXT_BGR: + avx2fct = jsimd_ycc_extbgr_convert_avx2; + sse2fct = jsimd_ycc_extbgr_convert_sse2; + mmxfct = jsimd_ycc_extbgr_convert_mmx; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + avx2fct = jsimd_ycc_extbgrx_convert_avx2; + sse2fct = jsimd_ycc_extbgrx_convert_sse2; + mmxfct = jsimd_ycc_extbgrx_convert_mmx; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + avx2fct = jsimd_ycc_extxbgr_convert_avx2; + sse2fct = jsimd_ycc_extxbgr_convert_sse2; + mmxfct = jsimd_ycc_extxbgr_convert_mmx; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + avx2fct = jsimd_ycc_extxrgb_convert_avx2; + sse2fct = jsimd_ycc_extxrgb_convert_sse2; + mmxfct = jsimd_ycc_extxrgb_convert_mmx; + break; + default: + avx2fct = jsimd_ycc_rgb_convert_avx2; + sse2fct = jsimd_ycc_rgb_convert_sse2; + mmxfct = jsimd_ycc_rgb_convert_mmx; + break; + } + + if (simd_support & JSIMD_AVX2) + avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); + else if (simd_support & JSIMD_SSE2) + sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); + else + mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_downsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_AVX2) + return 1; + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_AVX2) + return 1; + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + if (simd_support & JSIMD_AVX2) + jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, input_data, + output_data); + else if (simd_support & JSIMD_SSE2) + jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, input_data, + output_data); + else + jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(void) +jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + if (simd_support & JSIMD_AVX2) + jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, input_data, + output_data); + else if (simd_support & JSIMD_SSE2) + jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, input_data, + output_data); + else + jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(int) +jsimd_can_h2v2_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_AVX2) + return 1; + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_AVX2) + return 1; + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_AVX2) + jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); + else if (simd_support & JSIMD_SSE2) + jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); + else + jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_AVX2) + jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); + else if (simd_support & JSIMD_SSE2) + jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); + else + jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_AVX2) && + IS_ALIGNED_AVX(jconst_fancy_upsample_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_AVX2) && + IS_ALIGNED_AVX(jconst_fancy_upsample_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_AVX2) + jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); + else if (simd_support & JSIMD_SSE2) + jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); + else + jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_AVX2) + jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); + else if (simd_support & JSIMD_SSE2) + jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); + else + jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_AVX2) && + IS_ALIGNED_AVX(jconst_merged_upsample_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_AVX2) && + IS_ALIGNED_AVX(jconst_merged_upsample_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) +{ + void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2; + sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2; + mmxfct = jsimd_h2v2_extrgb_merged_upsample_mmx; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2; + sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2; + mmxfct = jsimd_h2v2_extrgbx_merged_upsample_mmx; + break; + case JCS_EXT_BGR: + avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2; + sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2; + mmxfct = jsimd_h2v2_extbgr_merged_upsample_mmx; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2; + sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2; + mmxfct = jsimd_h2v2_extbgrx_merged_upsample_mmx; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2; + sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2; + mmxfct = jsimd_h2v2_extxbgr_merged_upsample_mmx; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2; + sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2; + mmxfct = jsimd_h2v2_extxrgb_merged_upsample_mmx; + break; + default: + avx2fct = jsimd_h2v2_merged_upsample_avx2; + sse2fct = jsimd_h2v2_merged_upsample_sse2; + mmxfct = jsimd_h2v2_merged_upsample_mmx; + break; + } + + if (simd_support & JSIMD_AVX2) + avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); + else if (simd_support & JSIMD_SSE2) + sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); + else + mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) +{ + void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2; + sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2; + mmxfct = jsimd_h2v1_extrgb_merged_upsample_mmx; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2; + sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2; + mmxfct = jsimd_h2v1_extrgbx_merged_upsample_mmx; + break; + case JCS_EXT_BGR: + avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2; + sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2; + mmxfct = jsimd_h2v1_extbgr_merged_upsample_mmx; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2; + sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2; + mmxfct = jsimd_h2v1_extbgrx_merged_upsample_mmx; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2; + sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2; + mmxfct = jsimd_h2v1_extxbgr_merged_upsample_mmx; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2; + sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2; + mmxfct = jsimd_h2v1_extxrgb_merged_upsample_mmx; + break; + default: + avx2fct = jsimd_h2v1_merged_upsample_avx2; + sse2fct = jsimd_h2v1_merged_upsample_sse2; + mmxfct = jsimd_h2v1_merged_upsample_mmx; + break; + } + + if (simd_support & JSIMD_AVX2) + avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); + else if (simd_support & JSIMD_SSE2) + sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); + else + mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); +} + +GLOBAL(int) +jsimd_can_convsamp(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_AVX2) + return 1; + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_SSE) + return 1; + if (simd_support & JSIMD_3DNOW) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ + if (simd_support & JSIMD_AVX2) + jsimd_convsamp_avx2(sample_data, start_col, workspace); + else if (simd_support & JSIMD_SSE2) + jsimd_convsamp_sse2(sample_data, start_col, workspace); + else + jsimd_convsamp_mmx(sample_data, start_col, workspace); +} + +GLOBAL(void) +jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) +{ + if (simd_support & JSIMD_SSE2) + jsimd_convsamp_float_sse2(sample_data, start_col, workspace); + else if (simd_support & JSIMD_SSE) + jsimd_convsamp_float_sse(sample_data, start_col, workspace); + else + jsimd_convsamp_float_3dnow(sample_data, start_col, workspace); +} + +GLOBAL(int) +jsimd_can_fdct_islow(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) + return 1; + if (simd_support & JSIMD_3DNOW) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow(DCTELEM *data) +{ + if (simd_support & JSIMD_AVX2) + jsimd_fdct_islow_avx2(data); + else if (simd_support & JSIMD_SSE2) + jsimd_fdct_islow_sse2(data); + else + jsimd_fdct_islow_mmx(data); +} + +GLOBAL(void) +jsimd_fdct_ifast(DCTELEM *data) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) + jsimd_fdct_ifast_sse2(data); + else + jsimd_fdct_ifast_mmx(data); +} + +GLOBAL(void) +jsimd_fdct_float(FAST_FLOAT *data) +{ + if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) + jsimd_fdct_float_sse(data); + else if (simd_support & JSIMD_3DNOW) + jsimd_fdct_float_3dnow(data); +} + +GLOBAL(int) +jsimd_can_quantize(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_AVX2) + return 1; + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + if (simd_support & JSIMD_SSE) + return 1; + if (simd_support & JSIMD_3DNOW) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) +{ + if (simd_support & JSIMD_AVX2) + jsimd_quantize_avx2(coef_block, divisors, workspace); + else if (simd_support & JSIMD_SSE2) + jsimd_quantize_sse2(coef_block, divisors, workspace); + else + jsimd_quantize_mmx(coef_block, divisors, workspace); +} + +GLOBAL(void) +jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) +{ + if (simd_support & JSIMD_SSE2) + jsimd_quantize_float_sse2(coef_block, divisors, workspace); + else if (simd_support & JSIMD_SSE) + jsimd_quantize_float_sse(coef_block, divisors, workspace); + else + jsimd_quantize_float_3dnow(coef_block, divisors, workspace); +} + +GLOBAL(int) +jsimd_can_idct_2x2(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, + output_col); + else + jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col); +} + +GLOBAL(void) +jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, + output_col); + else + jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col); +} + +GLOBAL(int) +jsimd_can_idct_islow(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(IFAST_MULT_TYPE) != 2) + return 0; + if (IFAST_SCALE_BITS != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) + return 1; + if (simd_support & JSIMD_MMX) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float(void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + if (sizeof(FLOAT_MULT_TYPE) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) + return 1; + if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) + return 1; + if (simd_support & JSIMD_3DNOW) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if (simd_support & JSIMD_AVX2) + jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf, + output_col); + else if (simd_support & JSIMD_SSE2) + jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, + output_col); + else + jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) + jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, + output_col); + else + jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) + jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, + output_col); + else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) + jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf, + output_col); + else + jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(int) +jsimd_can_huff_encode_one_block(void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && simd_huffman && + IS_ALIGNED_SSE(jconst_huff_encode_one_block)) + return 1; + + return 0; +} + +GLOBAL(JOCTET *) +jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val, + dctbl, actbl); +} + +GLOBAL(int) +jsimd_can_encode_mcu_AC_first_prepare(void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (SIZEOF_SIZE_T != 4) + return 0; + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *values, size_t *zerobits) +{ + jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start, + Sl, Al, values, zerobits); +} + +GLOBAL(int) +jsimd_can_encode_mcu_AC_refine_prepare(void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (SIZEOF_SIZE_T != 4) + return 0; + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *absvalues, size_t *bits) +{ + return jsimd_encode_mcu_AC_refine_prepare_sse2(block, + jpeg_natural_order_start, + Sl, Al, absvalues, bits); +} diff --git a/simd/i386/jsimdcpu.asm b/simd/i386/jsimdcpu.asm new file mode 100644 index 0000000..faddd38 --- /dev/null +++ b/simd/i386/jsimdcpu.asm @@ -0,0 +1,132 @@ +; +; jsimdcpu.asm - SIMD instruction support check +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 32 +; +; Check if the CPU supports SIMD instructions +; +; GLOBAL(unsigned int) +; jpeg_simd_cpu_support(void) +; + + align 32 + GLOBAL_FUNCTION(jpeg_simd_cpu_support) + +EXTN(jpeg_simd_cpu_support): + push ebx +; push ecx ; need not be preserved +; push edx ; need not be preserved +; push esi ; unused + push edi + + xor edi, edi ; simd support flag + + pushfd + pop eax + mov edx, eax + xor eax, 1<<21 ; flip ID bit in EFLAGS + push eax + popfd + pushfd + pop eax + xor eax, edx + jz near .return ; CPUID is not supported + + ; Check for MMX instruction support + xor eax, eax + cpuid + test eax, eax + jz near .return + + xor eax, eax + inc eax + cpuid + mov eax, edx ; eax = Standard feature flags + + test eax, 1<<23 ; bit23:MMX + jz short .no_mmx + or edi, byte JSIMD_MMX +.no_mmx: + test eax, 1<<25 ; bit25:SSE + jz short .no_sse + or edi, byte JSIMD_SSE +.no_sse: + test eax, 1<<26 ; bit26:SSE2 + jz short .no_sse2 + or edi, byte JSIMD_SSE2 +.no_sse2: + + ; Check for AVX2 instruction support + mov eax, 7 + xor ecx, ecx + cpuid + mov eax, ebx + test eax, 1<<5 ; bit5:AVX2 + jz short .no_avx2 + + ; Check for AVX2 O/S support + mov eax, 1 + xor ecx, ecx + cpuid + test ecx, 1<<27 + jz short .no_avx2 ; O/S does not support XSAVE + test ecx, 1<<28 + jz short .no_avx2 ; CPU does not support AVX2 + + xor ecx, ecx + xgetbv + and eax, 6 + cmp eax, 6 ; O/S does not manage XMM/YMM state + ; using XSAVE + jnz short .no_avx2 + + or edi, JSIMD_AVX2 +.no_avx2: + + ; Check for 3DNow! instruction support + mov eax, 0x80000000 + cpuid + cmp eax, 0x80000000 + jbe short .return + + mov eax, 0x80000001 + cpuid + mov eax, edx ; eax = Extended feature flags + + test eax, 1<<31 ; bit31:3DNow!(vendor independent) + jz short .no_3dnow + or edi, byte JSIMD_3DNOW +.no_3dnow: + +.return: + mov eax, edi + + pop edi +; pop esi ; unused +; pop edx ; need not be preserved +; pop ecx ; need not be preserved + pop ebx + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/jccolext-mmx.asm b/simd/jccolext-mmx.asm deleted file mode 100644 index 96a0372..0000000 --- a/simd/jccolext-mmx.asm +++ /dev/null @@ -1,476 +0,0 @@ -; -; jccolext.asm - colorspace conversion (MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jcolsamp.inc" - -; -------------------------------------------------------------------------- -; -; Convert some rows of samples to the output colorspace. -; -; GLOBAL(void) -; jsimd_rgb_ycc_convert_mmx (JDIMENSION img_width, -; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, -; JDIMENSION output_row, int num_rows); -; - -%define img_width(b) (b)+8 ; JDIMENSION img_width -%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf -%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf -%define output_row(b) (b)+20 ; JDIMENSION output_row -%define num_rows(b) (b)+24 ; int num_rows - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] -%define WK_NUM 8 -%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr - - align 16 - global EXTN(jsimd_rgb_ycc_convert_mmx) - -EXTN(jsimd_rgb_ycc_convert_mmx): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic eax ; make a room for GOT address - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - movpic POINTER [gotptr], ebx ; save GOT address - - mov ecx, JDIMENSION [img_width(eax)] ; num_cols - test ecx,ecx - jz near .return - - push ecx - - mov esi, JSAMPIMAGE [output_buf(eax)] - mov ecx, JDIMENSION [output_row(eax)] - mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] - mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY] - mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY] - lea edi, [edi+ecx*SIZEOF_JSAMPROW] - lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] - lea edx, [edx+ecx*SIZEOF_JSAMPROW] - - pop ecx - - mov esi, JSAMPARRAY [input_buf(eax)] - mov eax, INT [num_rows(eax)] - test eax,eax - jle near .return - alignx 16,7 -.rowloop: - pushpic eax - push edx - push ebx - push edi - push esi - push ecx ; col - - mov esi, JSAMPROW [esi] ; inptr - mov edi, JSAMPROW [edi] ; outptr0 - mov ebx, JSAMPROW [ebx] ; outptr1 - mov edx, JSAMPROW [edx] ; outptr2 - movpic eax, POINTER [gotptr] ; load GOT address (eax) - - cmp ecx, byte SIZEOF_MMWORD - jae short .columnloop - alignx 16,7 - -%if RGB_PIXELSIZE == 3 ; --------------- - -.column_ld1: - push eax - push edx - lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE - test cl, SIZEOF_BYTE - jz short .column_ld2 - sub ecx, byte SIZEOF_BYTE - xor eax,eax - mov al, BYTE [esi+ecx] -.column_ld2: - test cl, SIZEOF_WORD - jz short .column_ld4 - sub ecx, byte SIZEOF_WORD - xor edx,edx - mov dx, WORD [esi+ecx] - shl eax, WORD_BIT - or eax,edx -.column_ld4: - movd mmA,eax - pop edx - pop eax - test cl, SIZEOF_DWORD - jz short .column_ld8 - sub ecx, byte SIZEOF_DWORD - movd mmG, DWORD [esi+ecx] - psllq mmA, DWORD_BIT - por mmA,mmG -.column_ld8: - test cl, SIZEOF_MMWORD - jz short .column_ld16 - movq mmG,mmA - movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] - mov ecx, SIZEOF_MMWORD - jmp short .rgb_ycc_cnv -.column_ld16: - test cl, 2*SIZEOF_MMWORD - mov ecx, SIZEOF_MMWORD - jz short .rgb_ycc_cnv - movq mmF,mmA - movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] - movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] - jmp short .rgb_ycc_cnv - alignx 16,7 - -.columnloop: - movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] - movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] - movq mmF, MMWORD [esi+2*SIZEOF_MMWORD] - -.rgb_ycc_cnv: - ; mmA=(00 10 20 01 11 21 02 12) - ; mmG=(22 03 13 23 04 14 24 05) - ; mmF=(15 25 06 16 26 07 17 27) - - movq mmD,mmA - psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 10 20 01) - psrlq mmD,4*BYTE_BIT ; mmD=(11 21 02 12 -- -- -- --) - - punpckhbw mmA,mmG ; mmA=(00 04 10 14 20 24 01 05) - psllq mmG,4*BYTE_BIT ; mmG=(-- -- -- -- 22 03 13 23) - - punpcklbw mmD,mmF ; mmD=(11 15 21 25 02 06 12 16) - punpckhbw mmG,mmF ; mmG=(22 26 03 07 13 17 23 27) - - movq mmE,mmA - psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 04 10 14) - psrlq mmE,4*BYTE_BIT ; mmE=(20 24 01 05 -- -- -- --) - - punpckhbw mmA,mmD ; mmA=(00 02 04 06 10 12 14 16) - psllq mmD,4*BYTE_BIT ; mmD=(-- -- -- -- 11 15 21 25) - - punpcklbw mmE,mmG ; mmE=(20 22 24 26 01 03 05 07) - punpckhbw mmD,mmG ; mmD=(11 13 15 17 21 23 25 27) - - pxor mmH,mmH - - movq mmC,mmA - punpcklbw mmA,mmH ; mmA=(00 02 04 06) - punpckhbw mmC,mmH ; mmC=(10 12 14 16) - - movq mmB,mmE - punpcklbw mmE,mmH ; mmE=(20 22 24 26) - punpckhbw mmB,mmH ; mmB=(01 03 05 07) - - movq mmF,mmD - punpcklbw mmD,mmH ; mmD=(11 13 15 17) - punpckhbw mmF,mmH ; mmF=(21 23 25 27) - -%else ; RGB_PIXELSIZE == 4 ; ----------- - -.column_ld1: - test cl, SIZEOF_MMWORD/8 - jz short .column_ld2 - sub ecx, byte SIZEOF_MMWORD/8 - movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE] -.column_ld2: - test cl, SIZEOF_MMWORD/4 - jz short .column_ld4 - sub ecx, byte SIZEOF_MMWORD/4 - movq mmF,mmA - movq mmA, MMWORD [esi+ecx*RGB_PIXELSIZE] -.column_ld4: - test cl, SIZEOF_MMWORD/2 - mov ecx, SIZEOF_MMWORD - jz short .rgb_ycc_cnv - movq mmD,mmA - movq mmC,mmF - movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] - movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] - jmp short .rgb_ycc_cnv - alignx 16,7 - -.columnloop: - movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] - movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] - movq mmD, MMWORD [esi+2*SIZEOF_MMWORD] - movq mmC, MMWORD [esi+3*SIZEOF_MMWORD] - -.rgb_ycc_cnv: - ; mmA=(00 10 20 30 01 11 21 31) - ; mmF=(02 12 22 32 03 13 23 33) - ; mmD=(04 14 24 34 05 15 25 35) - ; mmC=(06 16 26 36 07 17 27 37) - - movq mmB,mmA - punpcklbw mmA,mmF ; mmA=(00 02 10 12 20 22 30 32) - punpckhbw mmB,mmF ; mmB=(01 03 11 13 21 23 31 33) - - movq mmG,mmD - punpcklbw mmD,mmC ; mmD=(04 06 14 16 24 26 34 36) - punpckhbw mmG,mmC ; mmG=(05 07 15 17 25 27 35 37) - - movq mmE,mmA - punpcklwd mmA,mmD ; mmA=(00 02 04 06 10 12 14 16) - punpckhwd mmE,mmD ; mmE=(20 22 24 26 30 32 34 36) - - movq mmH,mmB - punpcklwd mmB,mmG ; mmB=(01 03 05 07 11 13 15 17) - punpckhwd mmH,mmG ; mmH=(21 23 25 27 31 33 35 37) - - pxor mmF,mmF - - movq mmC,mmA - punpcklbw mmA,mmF ; mmA=(00 02 04 06) - punpckhbw mmC,mmF ; mmC=(10 12 14 16) - - movq mmD,mmB - punpcklbw mmB,mmF ; mmB=(01 03 05 07) - punpckhbw mmD,mmF ; mmD=(11 13 15 17) - - movq mmG,mmE - punpcklbw mmE,mmF ; mmE=(20 22 24 26) - punpckhbw mmG,mmF ; mmG=(30 32 34 36) - - punpcklbw mmF,mmH - punpckhbw mmH,mmH - psrlw mmF,BYTE_BIT ; mmF=(21 23 25 27) - psrlw mmH,BYTE_BIT ; mmH=(31 33 35 37) - -%endif ; RGB_PIXELSIZE ; --------------- - - ; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE - ; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO - - ; (Original) - ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B - ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE - ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE - ; - ; (This implementation) - ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G - ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE - ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE - - movq MMWORD [wk(0)], mm0 ; wk(0)=RE - movq MMWORD [wk(1)], mm1 ; wk(1)=RO - movq MMWORD [wk(2)], mm4 ; wk(2)=BE - movq MMWORD [wk(3)], mm5 ; wk(3)=BO - - movq mm6,mm1 - punpcklwd mm1,mm3 - punpckhwd mm6,mm3 - movq mm7,mm1 - movq mm4,mm6 - pmaddwd mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337) - pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337) - pmaddwd mm7,[GOTOFF(eax,PW_MF016_MF033)] ; mm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) - pmaddwd mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) - - movq MMWORD [wk(4)], mm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) - movq MMWORD [wk(5)], mm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) - - pxor mm1,mm1 - pxor mm6,mm6 - punpcklwd mm1,mm5 ; mm1=BOL - punpckhwd mm6,mm5 ; mm6=BOH - psrld mm1,1 ; mm1=BOL*FIX(0.500) - psrld mm6,1 ; mm6=BOH*FIX(0.500) - - movq mm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm5=[PD_ONEHALFM1_CJ] - - paddd mm7,mm1 - paddd mm4,mm6 - paddd mm7,mm5 - paddd mm4,mm5 - psrld mm7,SCALEBITS ; mm7=CbOL - psrld mm4,SCALEBITS ; mm4=CbOH - packssdw mm7,mm4 ; mm7=CbO - - movq mm1, MMWORD [wk(2)] ; mm1=BE - - movq mm6,mm0 - punpcklwd mm0,mm2 - punpckhwd mm6,mm2 - movq mm5,mm0 - movq mm4,mm6 - pmaddwd mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337) - pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337) - pmaddwd mm5,[GOTOFF(eax,PW_MF016_MF033)] ; mm5=REL*-FIX(0.168)+GEL*-FIX(0.331) - pmaddwd mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=REH*-FIX(0.168)+GEH*-FIX(0.331) - - movq MMWORD [wk(6)], mm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) - movq MMWORD [wk(7)], mm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) - - pxor mm0,mm0 - pxor mm6,mm6 - punpcklwd mm0,mm1 ; mm0=BEL - punpckhwd mm6,mm1 ; mm6=BEH - psrld mm0,1 ; mm0=BEL*FIX(0.500) - psrld mm6,1 ; mm6=BEH*FIX(0.500) - - movq mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ] - - paddd mm5,mm0 - paddd mm4,mm6 - paddd mm5,mm1 - paddd mm4,mm1 - psrld mm5,SCALEBITS ; mm5=CbEL - psrld mm4,SCALEBITS ; mm4=CbEH - packssdw mm5,mm4 ; mm5=CbE - - psllw mm7,BYTE_BIT - por mm5,mm7 ; mm5=Cb - movq MMWORD [ebx], mm5 ; Save Cb - - movq mm0, MMWORD [wk(3)] ; mm0=BO - movq mm6, MMWORD [wk(2)] ; mm6=BE - movq mm1, MMWORD [wk(1)] ; mm1=RO - - movq mm4,mm0 - punpcklwd mm0,mm3 - punpckhwd mm4,mm3 - movq mm7,mm0 - movq mm5,mm4 - pmaddwd mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250) - pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250) - pmaddwd mm7,[GOTOFF(eax,PW_MF008_MF041)] ; mm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) - pmaddwd mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) - - movq mm3,[GOTOFF(eax,PD_ONEHALF)] ; mm3=[PD_ONEHALF] - - paddd mm0, MMWORD [wk(4)] - paddd mm4, MMWORD [wk(5)] - paddd mm0,mm3 - paddd mm4,mm3 - psrld mm0,SCALEBITS ; mm0=YOL - psrld mm4,SCALEBITS ; mm4=YOH - packssdw mm0,mm4 ; mm0=YO - - pxor mm3,mm3 - pxor mm4,mm4 - punpcklwd mm3,mm1 ; mm3=ROL - punpckhwd mm4,mm1 ; mm4=ROH - psrld mm3,1 ; mm3=ROL*FIX(0.500) - psrld mm4,1 ; mm4=ROH*FIX(0.500) - - movq mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ] - - paddd mm7,mm3 - paddd mm5,mm4 - paddd mm7,mm1 - paddd mm5,mm1 - psrld mm7,SCALEBITS ; mm7=CrOL - psrld mm5,SCALEBITS ; mm5=CrOH - packssdw mm7,mm5 ; mm7=CrO - - movq mm3, MMWORD [wk(0)] ; mm3=RE - - movq mm4,mm6 - punpcklwd mm6,mm2 - punpckhwd mm4,mm2 - movq mm1,mm6 - movq mm5,mm4 - pmaddwd mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250) - pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250) - pmaddwd mm1,[GOTOFF(eax,PW_MF008_MF041)] ; mm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) - pmaddwd mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) - - movq mm2,[GOTOFF(eax,PD_ONEHALF)] ; mm2=[PD_ONEHALF] - - paddd mm6, MMWORD [wk(6)] - paddd mm4, MMWORD [wk(7)] - paddd mm6,mm2 - paddd mm4,mm2 - psrld mm6,SCALEBITS ; mm6=YEL - psrld mm4,SCALEBITS ; mm4=YEH - packssdw mm6,mm4 ; mm6=YE - - psllw mm0,BYTE_BIT - por mm6,mm0 ; mm6=Y - movq MMWORD [edi], mm6 ; Save Y - - pxor mm2,mm2 - pxor mm4,mm4 - punpcklwd mm2,mm3 ; mm2=REL - punpckhwd mm4,mm3 ; mm4=REH - psrld mm2,1 ; mm2=REL*FIX(0.500) - psrld mm4,1 ; mm4=REH*FIX(0.500) - - movq mm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm0=[PD_ONEHALFM1_CJ] - - paddd mm1,mm2 - paddd mm5,mm4 - paddd mm1,mm0 - paddd mm5,mm0 - psrld mm1,SCALEBITS ; mm1=CrEL - psrld mm5,SCALEBITS ; mm5=CrEH - packssdw mm1,mm5 ; mm1=CrE - - psllw mm7,BYTE_BIT - por mm1,mm7 ; mm1=Cr - movq MMWORD [edx], mm1 ; Save Cr - - sub ecx, byte SIZEOF_MMWORD - add esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; inptr - add edi, byte SIZEOF_MMWORD ; outptr0 - add ebx, byte SIZEOF_MMWORD ; outptr1 - add edx, byte SIZEOF_MMWORD ; outptr2 - cmp ecx, byte SIZEOF_MMWORD - jae near .columnloop - test ecx,ecx - jnz near .column_ld1 - - pop ecx ; col - pop esi - pop edi - pop ebx - pop edx - poppic eax - - add esi, byte SIZEOF_JSAMPROW ; input_buf - add edi, byte SIZEOF_JSAMPROW - add ebx, byte SIZEOF_JSAMPROW - add edx, byte SIZEOF_JSAMPROW - dec eax ; num_rows - jg near .rowloop - - emms ; empty MMX state - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jccolext-sse2-64.asm b/simd/jccolext-sse2-64.asm deleted file mode 100644 index 8e4642d..0000000 --- a/simd/jccolext-sse2-64.asm +++ /dev/null @@ -1,486 +0,0 @@ -; -; jccolext.asm - colorspace conversion (64-bit SSE2) -; -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jcolsamp.inc" - -; -------------------------------------------------------------------------- -; -; Convert some rows of samples to the output colorspace. -; -; GLOBAL(void) -; jsimd_rgb_ycc_convert_sse2 (JDIMENSION img_width, -; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, -; JDIMENSION output_row, int num_rows); -; - -; r10 = JDIMENSION img_width -; r11 = JSAMPARRAY input_buf -; r12 = JSAMPIMAGE output_buf -; r13 = JDIMENSION output_row -; r14 = int num_rows - -%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 8 - - align 16 - - global EXTN(jsimd_rgb_ycc_convert_sse2) - -EXTN(jsimd_rgb_ycc_convert_sse2): - push rbp - mov rax,rsp ; rax = original rbp - sub rsp, byte 4 - and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [rsp],rax - mov rbp,rsp ; rbp = aligned rbp - lea rsp, [wk(0)] - collect_args - push rbx - - mov ecx, r10d - test rcx,rcx - jz near .return - - push rcx - - mov rsi, r12 - mov ecx, r13d - mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY] - mov rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY] - mov rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY] - lea rdi, [rdi+rcx*SIZEOF_JSAMPROW] - lea rbx, [rbx+rcx*SIZEOF_JSAMPROW] - lea rdx, [rdx+rcx*SIZEOF_JSAMPROW] - - pop rcx - - mov rsi, r11 - mov eax, r14d - test rax,rax - jle near .return -.rowloop: - push rdx - push rbx - push rdi - push rsi - push rcx ; col - - mov rsi, JSAMPROW [rsi] ; inptr - mov rdi, JSAMPROW [rdi] ; outptr0 - mov rbx, JSAMPROW [rbx] ; outptr1 - mov rdx, JSAMPROW [rdx] ; outptr2 - - cmp rcx, byte SIZEOF_XMMWORD - jae near .columnloop - -%if RGB_PIXELSIZE == 3 ; --------------- - -.column_ld1: - push rax - push rdx - lea rcx,[rcx+rcx*2] ; imul ecx,RGB_PIXELSIZE - test cl, SIZEOF_BYTE - jz short .column_ld2 - sub rcx, byte SIZEOF_BYTE - movzx rax, BYTE [rsi+rcx] -.column_ld2: - test cl, SIZEOF_WORD - jz short .column_ld4 - sub rcx, byte SIZEOF_WORD - movzx rdx, WORD [rsi+rcx] - shl rax, WORD_BIT - or rax,rdx -.column_ld4: - movd xmmA,eax - pop rdx - pop rax - test cl, SIZEOF_DWORD - jz short .column_ld8 - sub rcx, byte SIZEOF_DWORD - movd xmmF, XMM_DWORD [rsi+rcx] - pslldq xmmA, SIZEOF_DWORD - por xmmA,xmmF -.column_ld8: - test cl, SIZEOF_MMWORD - jz short .column_ld16 - sub rcx, byte SIZEOF_MMWORD - movq xmmB, XMM_MMWORD [rsi+rcx] - pslldq xmmA, SIZEOF_MMWORD - por xmmA,xmmB -.column_ld16: - test cl, SIZEOF_XMMWORD - jz short .column_ld32 - movdqa xmmF,xmmA - movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] - mov rcx, SIZEOF_XMMWORD - jmp short .rgb_ycc_cnv -.column_ld32: - test cl, 2*SIZEOF_XMMWORD - mov rcx, SIZEOF_XMMWORD - jz short .rgb_ycc_cnv - movdqa xmmB,xmmA - movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] - movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD] - jmp short .rgb_ycc_cnv - -.columnloop: - movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] - movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD] - movdqu xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD] - -.rgb_ycc_cnv: - ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) - ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) - ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) - - movdqa xmmG,xmmA - pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) - psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) - - punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) - pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) - - punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) - punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) - - movdqa xmmD,xmmA - pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) - psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) - - punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) - pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) - - punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) - punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) - - movdqa xmmE,xmmA - pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) - psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) - - punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) - pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) - - punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) - punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) - - pxor xmmH,xmmH - - movdqa xmmC,xmmA - punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) - punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) - - movdqa xmmB,xmmE - punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) - punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) - - movdqa xmmF,xmmD - punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) - punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) - -%else ; RGB_PIXELSIZE == 4 ; ----------- - -.column_ld1: - test cl, SIZEOF_XMMWORD/16 - jz short .column_ld2 - sub rcx, byte SIZEOF_XMMWORD/16 - movd xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE] -.column_ld2: - test cl, SIZEOF_XMMWORD/8 - jz short .column_ld4 - sub rcx, byte SIZEOF_XMMWORD/8 - movq xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE] - pslldq xmmA, SIZEOF_MMWORD - por xmmA,xmmE -.column_ld4: - test cl, SIZEOF_XMMWORD/4 - jz short .column_ld8 - sub rcx, byte SIZEOF_XMMWORD/4 - movdqa xmmE,xmmA - movdqu xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE] -.column_ld8: - test cl, SIZEOF_XMMWORD/2 - mov rcx, SIZEOF_XMMWORD - jz short .rgb_ycc_cnv - movdqa xmmF,xmmA - movdqa xmmH,xmmE - movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] - movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD] - jmp short .rgb_ycc_cnv - -.columnloop: - movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] - movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD] - movdqu xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD] - movdqu xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD] - -.rgb_ycc_cnv: - ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) - ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) - ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) - ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) - - movdqa xmmD,xmmA - punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) - punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) - - movdqa xmmC,xmmF - punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) - punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) - - movdqa xmmB,xmmA - punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) - punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) - - movdqa xmmG,xmmD - punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) - punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) - - movdqa xmmE,xmmA - punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) - punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) - - movdqa xmmH,xmmB - punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) - punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) - - pxor xmmF,xmmF - - movdqa xmmC,xmmA - punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) - punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) - - movdqa xmmD,xmmB - punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) - punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) - - movdqa xmmG,xmmE - punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) - punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) - - punpcklbw xmmF,xmmH - punpckhbw xmmH,xmmH - psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) - psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) - -%endif ; RGB_PIXELSIZE ; --------------- - - ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE - ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO - - ; (Original) - ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B - ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE - ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE - ; - ; (This implementation) - ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G - ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE - ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE - - movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=RE - movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=RO - movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=BE - movdqa XMMWORD [wk(3)], xmm5 ; wk(3)=BO - - movdqa xmm6,xmm1 - punpcklwd xmm1,xmm3 - punpckhwd xmm6,xmm3 - movdqa xmm7,xmm1 - movdqa xmm4,xmm6 - pmaddwd xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) - pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) - pmaddwd xmm7,[rel PW_MF016_MF033] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) - pmaddwd xmm4,[rel PW_MF016_MF033] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) - - movdqa XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) - movdqa XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) - - pxor xmm1,xmm1 - pxor xmm6,xmm6 - punpcklwd xmm1,xmm5 ; xmm1=BOL - punpckhwd xmm6,xmm5 ; xmm6=BOH - psrld xmm1,1 ; xmm1=BOL*FIX(0.500) - psrld xmm6,1 ; xmm6=BOH*FIX(0.500) - - movdqa xmm5,[rel PD_ONEHALFM1_CJ] ; xmm5=[PD_ONEHALFM1_CJ] - - paddd xmm7,xmm1 - paddd xmm4,xmm6 - paddd xmm7,xmm5 - paddd xmm4,xmm5 - psrld xmm7,SCALEBITS ; xmm7=CbOL - psrld xmm4,SCALEBITS ; xmm4=CbOH - packssdw xmm7,xmm4 ; xmm7=CbO - - movdqa xmm1, XMMWORD [wk(2)] ; xmm1=BE - - movdqa xmm6,xmm0 - punpcklwd xmm0,xmm2 - punpckhwd xmm6,xmm2 - movdqa xmm5,xmm0 - movdqa xmm4,xmm6 - pmaddwd xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) - pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) - pmaddwd xmm5,[rel PW_MF016_MF033] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331) - pmaddwd xmm4,[rel PW_MF016_MF033] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331) - - movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) - movdqa XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) - - pxor xmm0,xmm0 - pxor xmm6,xmm6 - punpcklwd xmm0,xmm1 ; xmm0=BEL - punpckhwd xmm6,xmm1 ; xmm6=BEH - psrld xmm0,1 ; xmm0=BEL*FIX(0.500) - psrld xmm6,1 ; xmm6=BEH*FIX(0.500) - - movdqa xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ] - - paddd xmm5,xmm0 - paddd xmm4,xmm6 - paddd xmm5,xmm1 - paddd xmm4,xmm1 - psrld xmm5,SCALEBITS ; xmm5=CbEL - psrld xmm4,SCALEBITS ; xmm4=CbEH - packssdw xmm5,xmm4 ; xmm5=CbE - - psllw xmm7,BYTE_BIT - por xmm5,xmm7 ; xmm5=Cb - movdqa XMMWORD [rbx], xmm5 ; Save Cb - - movdqa xmm0, XMMWORD [wk(3)] ; xmm0=BO - movdqa xmm6, XMMWORD [wk(2)] ; xmm6=BE - movdqa xmm1, XMMWORD [wk(1)] ; xmm1=RO - - movdqa xmm4,xmm0 - punpcklwd xmm0,xmm3 - punpckhwd xmm4,xmm3 - movdqa xmm7,xmm0 - movdqa xmm5,xmm4 - pmaddwd xmm0,[rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) - pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) - pmaddwd xmm7,[rel PW_MF008_MF041] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) - pmaddwd xmm5,[rel PW_MF008_MF041] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) - - movdqa xmm3,[rel PD_ONEHALF] ; xmm3=[PD_ONEHALF] - - paddd xmm0, XMMWORD [wk(4)] - paddd xmm4, XMMWORD [wk(5)] - paddd xmm0,xmm3 - paddd xmm4,xmm3 - psrld xmm0,SCALEBITS ; xmm0=YOL - psrld xmm4,SCALEBITS ; xmm4=YOH - packssdw xmm0,xmm4 ; xmm0=YO - - pxor xmm3,xmm3 - pxor xmm4,xmm4 - punpcklwd xmm3,xmm1 ; xmm3=ROL - punpckhwd xmm4,xmm1 ; xmm4=ROH - psrld xmm3,1 ; xmm3=ROL*FIX(0.500) - psrld xmm4,1 ; xmm4=ROH*FIX(0.500) - - movdqa xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ] - - paddd xmm7,xmm3 - paddd xmm5,xmm4 - paddd xmm7,xmm1 - paddd xmm5,xmm1 - psrld xmm7,SCALEBITS ; xmm7=CrOL - psrld xmm5,SCALEBITS ; xmm5=CrOH - packssdw xmm7,xmm5 ; xmm7=CrO - - movdqa xmm3, XMMWORD [wk(0)] ; xmm3=RE - - movdqa xmm4,xmm6 - punpcklwd xmm6,xmm2 - punpckhwd xmm4,xmm2 - movdqa xmm1,xmm6 - movdqa xmm5,xmm4 - pmaddwd xmm6,[rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) - pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) - pmaddwd xmm1,[rel PW_MF008_MF041] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) - pmaddwd xmm5,[rel PW_MF008_MF041] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) - - movdqa xmm2,[rel PD_ONEHALF] ; xmm2=[PD_ONEHALF] - - paddd xmm6, XMMWORD [wk(6)] - paddd xmm4, XMMWORD [wk(7)] - paddd xmm6,xmm2 - paddd xmm4,xmm2 - psrld xmm6,SCALEBITS ; xmm6=YEL - psrld xmm4,SCALEBITS ; xmm4=YEH - packssdw xmm6,xmm4 ; xmm6=YE - - psllw xmm0,BYTE_BIT - por xmm6,xmm0 ; xmm6=Y - movdqa XMMWORD [rdi], xmm6 ; Save Y - - pxor xmm2,xmm2 - pxor xmm4,xmm4 - punpcklwd xmm2,xmm3 ; xmm2=REL - punpckhwd xmm4,xmm3 ; xmm4=REH - psrld xmm2,1 ; xmm2=REL*FIX(0.500) - psrld xmm4,1 ; xmm4=REH*FIX(0.500) - - movdqa xmm0,[rel PD_ONEHALFM1_CJ] ; xmm0=[PD_ONEHALFM1_CJ] - - paddd xmm1,xmm2 - paddd xmm5,xmm4 - paddd xmm1,xmm0 - paddd xmm5,xmm0 - psrld xmm1,SCALEBITS ; xmm1=CrEL - psrld xmm5,SCALEBITS ; xmm5=CrEH - packssdw xmm1,xmm5 ; xmm1=CrE - - psllw xmm7,BYTE_BIT - por xmm1,xmm7 ; xmm1=Cr - movdqa XMMWORD [rdx], xmm1 ; Save Cr - - sub rcx, byte SIZEOF_XMMWORD - add rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr - add rdi, byte SIZEOF_XMMWORD ; outptr0 - add rbx, byte SIZEOF_XMMWORD ; outptr1 - add rdx, byte SIZEOF_XMMWORD ; outptr2 - cmp rcx, byte SIZEOF_XMMWORD - jae near .columnloop - test rcx,rcx - jnz near .column_ld1 - - pop rcx ; col - pop rsi - pop rdi - pop rbx - pop rdx - - add rsi, byte SIZEOF_JSAMPROW ; input_buf - add rdi, byte SIZEOF_JSAMPROW - add rbx, byte SIZEOF_JSAMPROW - add rdx, byte SIZEOF_JSAMPROW - dec rax ; num_rows - jg near .rowloop - -.return: - pop rbx - uncollect_args - mov rsp,rbp ; rsp <- aligned rbp - pop rsp ; rsp <- original rbp - pop rbp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jccolext-sse2.asm b/simd/jccolext-sse2.asm deleted file mode 100644 index cc38e98..0000000 --- a/simd/jccolext-sse2.asm +++ /dev/null @@ -1,503 +0,0 @@ -; -; jccolext.asm - colorspace conversion (SSE2) -; -; x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jcolsamp.inc" - -; -------------------------------------------------------------------------- -; -; Convert some rows of samples to the output colorspace. -; -; GLOBAL(void) -; jsimd_rgb_ycc_convert_sse2 (JDIMENSION img_width, -; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, -; JDIMENSION output_row, int num_rows); -; - -%define img_width(b) (b)+8 ; JDIMENSION img_width -%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf -%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf -%define output_row(b) (b)+20 ; JDIMENSION output_row -%define num_rows(b) (b)+24 ; int num_rows - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 8 -%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr - - align 16 - - global EXTN(jsimd_rgb_ycc_convert_sse2) - -EXTN(jsimd_rgb_ycc_convert_sse2): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic eax ; make a room for GOT address - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - movpic POINTER [gotptr], ebx ; save GOT address - - mov ecx, JDIMENSION [img_width(eax)] - test ecx,ecx - jz near .return - - push ecx - - mov esi, JSAMPIMAGE [output_buf(eax)] - mov ecx, JDIMENSION [output_row(eax)] - mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] - mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY] - mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY] - lea edi, [edi+ecx*SIZEOF_JSAMPROW] - lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] - lea edx, [edx+ecx*SIZEOF_JSAMPROW] - - pop ecx - - mov esi, JSAMPARRAY [input_buf(eax)] - mov eax, INT [num_rows(eax)] - test eax,eax - jle near .return - alignx 16,7 -.rowloop: - pushpic eax - push edx - push ebx - push edi - push esi - push ecx ; col - - mov esi, JSAMPROW [esi] ; inptr - mov edi, JSAMPROW [edi] ; outptr0 - mov ebx, JSAMPROW [ebx] ; outptr1 - mov edx, JSAMPROW [edx] ; outptr2 - movpic eax, POINTER [gotptr] ; load GOT address (eax) - - cmp ecx, byte SIZEOF_XMMWORD - jae near .columnloop - alignx 16,7 - -%if RGB_PIXELSIZE == 3 ; --------------- - -.column_ld1: - push eax - push edx - lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE - test cl, SIZEOF_BYTE - jz short .column_ld2 - sub ecx, byte SIZEOF_BYTE - movzx eax, BYTE [esi+ecx] -.column_ld2: - test cl, SIZEOF_WORD - jz short .column_ld4 - sub ecx, byte SIZEOF_WORD - movzx edx, WORD [esi+ecx] - shl eax, WORD_BIT - or eax,edx -.column_ld4: - movd xmmA,eax - pop edx - pop eax - test cl, SIZEOF_DWORD - jz short .column_ld8 - sub ecx, byte SIZEOF_DWORD - movd xmmF, XMM_DWORD [esi+ecx] - pslldq xmmA, SIZEOF_DWORD - por xmmA,xmmF -.column_ld8: - test cl, SIZEOF_MMWORD - jz short .column_ld16 - sub ecx, byte SIZEOF_MMWORD - movq xmmB, XMM_MMWORD [esi+ecx] - pslldq xmmA, SIZEOF_MMWORD - por xmmA,xmmB -.column_ld16: - test cl, SIZEOF_XMMWORD - jz short .column_ld32 - movdqa xmmF,xmmA - movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] - mov ecx, SIZEOF_XMMWORD - jmp short .rgb_ycc_cnv -.column_ld32: - test cl, 2*SIZEOF_XMMWORD - mov ecx, SIZEOF_XMMWORD - jz short .rgb_ycc_cnv - movdqa xmmB,xmmA - movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] - movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] - jmp short .rgb_ycc_cnv - alignx 16,7 - -.columnloop: - movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] - movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] - movdqu xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD] - -.rgb_ycc_cnv: - ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) - ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) - ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) - - movdqa xmmG,xmmA - pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) - psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) - - punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) - pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) - - punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) - punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) - - movdqa xmmD,xmmA - pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) - psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) - - punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) - pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) - - punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) - punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) - - movdqa xmmE,xmmA - pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) - psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) - - punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) - pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) - - punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) - punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) - - pxor xmmH,xmmH - - movdqa xmmC,xmmA - punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) - punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) - - movdqa xmmB,xmmE - punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) - punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) - - movdqa xmmF,xmmD - punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) - punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) - -%else ; RGB_PIXELSIZE == 4 ; ----------- - -.column_ld1: - test cl, SIZEOF_XMMWORD/16 - jz short .column_ld2 - sub ecx, byte SIZEOF_XMMWORD/16 - movd xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE] -.column_ld2: - test cl, SIZEOF_XMMWORD/8 - jz short .column_ld4 - sub ecx, byte SIZEOF_XMMWORD/8 - movq xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE] - pslldq xmmA, SIZEOF_MMWORD - por xmmA,xmmE -.column_ld4: - test cl, SIZEOF_XMMWORD/4 - jz short .column_ld8 - sub ecx, byte SIZEOF_XMMWORD/4 - movdqa xmmE,xmmA - movdqu xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE] -.column_ld8: - test cl, SIZEOF_XMMWORD/2 - mov ecx, SIZEOF_XMMWORD - jz short .rgb_ycc_cnv - movdqa xmmF,xmmA - movdqa xmmH,xmmE - movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] - movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] - jmp short .rgb_ycc_cnv - alignx 16,7 - -.columnloop: - movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] - movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] - movdqu xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD] - movdqu xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD] - -.rgb_ycc_cnv: - ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) - ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) - ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) - ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) - - movdqa xmmD,xmmA - punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) - punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) - - movdqa xmmC,xmmF - punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) - punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) - - movdqa xmmB,xmmA - punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) - punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) - - movdqa xmmG,xmmD - punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) - punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) - - movdqa xmmE,xmmA - punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) - punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) - - movdqa xmmH,xmmB - punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) - punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) - - pxor xmmF,xmmF - - movdqa xmmC,xmmA - punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) - punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) - - movdqa xmmD,xmmB - punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) - punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) - - movdqa xmmG,xmmE - punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) - punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) - - punpcklbw xmmF,xmmH - punpckhbw xmmH,xmmH - psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) - psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) - -%endif ; RGB_PIXELSIZE ; --------------- - - ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE - ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO - - ; (Original) - ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B - ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE - ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE - ; - ; (This implementation) - ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G - ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE - ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE - - movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=RE - movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=RO - movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=BE - movdqa XMMWORD [wk(3)], xmm5 ; wk(3)=BO - - movdqa xmm6,xmm1 - punpcklwd xmm1,xmm3 - punpckhwd xmm6,xmm3 - movdqa xmm7,xmm1 - movdqa xmm4,xmm6 - pmaddwd xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) - pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) - pmaddwd xmm7,[GOTOFF(eax,PW_MF016_MF033)] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) - pmaddwd xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) - - movdqa XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) - movdqa XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) - - pxor xmm1,xmm1 - pxor xmm6,xmm6 - punpcklwd xmm1,xmm5 ; xmm1=BOL - punpckhwd xmm6,xmm5 ; xmm6=BOH - psrld xmm1,1 ; xmm1=BOL*FIX(0.500) - psrld xmm6,1 ; xmm6=BOH*FIX(0.500) - - movdqa xmm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm5=[PD_ONEHALFM1_CJ] - - paddd xmm7,xmm1 - paddd xmm4,xmm6 - paddd xmm7,xmm5 - paddd xmm4,xmm5 - psrld xmm7,SCALEBITS ; xmm7=CbOL - psrld xmm4,SCALEBITS ; xmm4=CbOH - packssdw xmm7,xmm4 ; xmm7=CbO - - movdqa xmm1, XMMWORD [wk(2)] ; xmm1=BE - - movdqa xmm6,xmm0 - punpcklwd xmm0,xmm2 - punpckhwd xmm6,xmm2 - movdqa xmm5,xmm0 - movdqa xmm4,xmm6 - pmaddwd xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) - pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) - pmaddwd xmm5,[GOTOFF(eax,PW_MF016_MF033)] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331) - pmaddwd xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331) - - movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) - movdqa XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) - - pxor xmm0,xmm0 - pxor xmm6,xmm6 - punpcklwd xmm0,xmm1 ; xmm0=BEL - punpckhwd xmm6,xmm1 ; xmm6=BEH - psrld xmm0,1 ; xmm0=BEL*FIX(0.500) - psrld xmm6,1 ; xmm6=BEH*FIX(0.500) - - movdqa xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ] - - paddd xmm5,xmm0 - paddd xmm4,xmm6 - paddd xmm5,xmm1 - paddd xmm4,xmm1 - psrld xmm5,SCALEBITS ; xmm5=CbEL - psrld xmm4,SCALEBITS ; xmm4=CbEH - packssdw xmm5,xmm4 ; xmm5=CbE - - psllw xmm7,BYTE_BIT - por xmm5,xmm7 ; xmm5=Cb - movdqa XMMWORD [ebx], xmm5 ; Save Cb - - movdqa xmm0, XMMWORD [wk(3)] ; xmm0=BO - movdqa xmm6, XMMWORD [wk(2)] ; xmm6=BE - movdqa xmm1, XMMWORD [wk(1)] ; xmm1=RO - - movdqa xmm4,xmm0 - punpcklwd xmm0,xmm3 - punpckhwd xmm4,xmm3 - movdqa xmm7,xmm0 - movdqa xmm5,xmm4 - pmaddwd xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) - pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) - pmaddwd xmm7,[GOTOFF(eax,PW_MF008_MF041)] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) - pmaddwd xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) - - movdqa xmm3,[GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF] - - paddd xmm0, XMMWORD [wk(4)] - paddd xmm4, XMMWORD [wk(5)] - paddd xmm0,xmm3 - paddd xmm4,xmm3 - psrld xmm0,SCALEBITS ; xmm0=YOL - psrld xmm4,SCALEBITS ; xmm4=YOH - packssdw xmm0,xmm4 ; xmm0=YO - - pxor xmm3,xmm3 - pxor xmm4,xmm4 - punpcklwd xmm3,xmm1 ; xmm3=ROL - punpckhwd xmm4,xmm1 ; xmm4=ROH - psrld xmm3,1 ; xmm3=ROL*FIX(0.500) - psrld xmm4,1 ; xmm4=ROH*FIX(0.500) - - movdqa xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ] - - paddd xmm7,xmm3 - paddd xmm5,xmm4 - paddd xmm7,xmm1 - paddd xmm5,xmm1 - psrld xmm7,SCALEBITS ; xmm7=CrOL - psrld xmm5,SCALEBITS ; xmm5=CrOH - packssdw xmm7,xmm5 ; xmm7=CrO - - movdqa xmm3, XMMWORD [wk(0)] ; xmm3=RE - - movdqa xmm4,xmm6 - punpcklwd xmm6,xmm2 - punpckhwd xmm4,xmm2 - movdqa xmm1,xmm6 - movdqa xmm5,xmm4 - pmaddwd xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) - pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) - pmaddwd xmm1,[GOTOFF(eax,PW_MF008_MF041)] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) - pmaddwd xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) - - movdqa xmm2,[GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF] - - paddd xmm6, XMMWORD [wk(6)] - paddd xmm4, XMMWORD [wk(7)] - paddd xmm6,xmm2 - paddd xmm4,xmm2 - psrld xmm6,SCALEBITS ; xmm6=YEL - psrld xmm4,SCALEBITS ; xmm4=YEH - packssdw xmm6,xmm4 ; xmm6=YE - - psllw xmm0,BYTE_BIT - por xmm6,xmm0 ; xmm6=Y - movdqa XMMWORD [edi], xmm6 ; Save Y - - pxor xmm2,xmm2 - pxor xmm4,xmm4 - punpcklwd xmm2,xmm3 ; xmm2=REL - punpckhwd xmm4,xmm3 ; xmm4=REH - psrld xmm2,1 ; xmm2=REL*FIX(0.500) - psrld xmm4,1 ; xmm4=REH*FIX(0.500) - - movdqa xmm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm0=[PD_ONEHALFM1_CJ] - - paddd xmm1,xmm2 - paddd xmm5,xmm4 - paddd xmm1,xmm0 - paddd xmm5,xmm0 - psrld xmm1,SCALEBITS ; xmm1=CrEL - psrld xmm5,SCALEBITS ; xmm5=CrEH - packssdw xmm1,xmm5 ; xmm1=CrE - - psllw xmm7,BYTE_BIT - por xmm1,xmm7 ; xmm1=Cr - movdqa XMMWORD [edx], xmm1 ; Save Cr - - sub ecx, byte SIZEOF_XMMWORD - add esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr - add edi, byte SIZEOF_XMMWORD ; outptr0 - add ebx, byte SIZEOF_XMMWORD ; outptr1 - add edx, byte SIZEOF_XMMWORD ; outptr2 - cmp ecx, byte SIZEOF_XMMWORD - jae near .columnloop - test ecx,ecx - jnz near .column_ld1 - - pop ecx ; col - pop esi - pop edi - pop ebx - pop edx - poppic eax - - add esi, byte SIZEOF_JSAMPROW ; input_buf - add edi, byte SIZEOF_JSAMPROW - add ebx, byte SIZEOF_JSAMPROW - add edx, byte SIZEOF_JSAMPROW - dec eax ; num_rows - jg near .rowloop - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jccolor-altivec.c b/simd/jccolor-altivec.c deleted file mode 100644 index ec47332..0000000 --- a/simd/jccolor-altivec.c +++ /dev/null @@ -1,104 +0,0 @@ -/* - * AltiVec optimizations for libjpeg-turbo - * - * Copyright (C) 2014, D. R. Commander. All Rights Reserved. - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgment in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* RGB --> YCC CONVERSION */ - -#include "jsimd_altivec.h" - - -#define F_0_081 5329 /* FIX(0.08131) */ -#define F_0_114 7471 /* FIX(0.11400) */ -#define F_0_168 11059 /* FIX(0.16874) */ -#define F_0_250 16384 /* FIX(0.25000) */ -#define F_0_299 19595 /* FIX(0.29900) */ -#define F_0_331 21709 /* FIX(0.33126) */ -#define F_0_418 27439 /* FIX(0.41869) */ -#define F_0_500 32768 /* FIX(0.50000) */ -#define F_0_587 38470 /* FIX(0.58700) */ -#define F_0_337 (F_0_587 - F_0_250) /* FIX(0.58700) - FIX(0.25000) */ - -#define SCALEBITS 16 -#define ONE_HALF (1 << (SCALEBITS - 1)) - - -#define RGBG_INDEX0 {0,1,3,4,6,7,9,10,2,1,5,4,8,7,11,10} -#define RGBG_INDEX1 {12,13,15,16,18,19,21,22,14,13,17,16,20,19,23,22} -#define RGBG_INDEX2 {8,9,11,12,14,15,17,18,10,9,13,12,16,15,19,18} -#define RGBG_INDEX3 {4,5,7,8,10,11,13,14,6,5,9,8,12,11,15,14} -#include "jccolext-altivec.c" -#undef RGB_PIXELSIZE - -#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -#define jsimd_rgb_ycc_convert_altivec jsimd_extrgb_ycc_convert_altivec -#include "jccolext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGBG_INDEX0 -#undef RGBG_INDEX1 -#undef RGBG_INDEX2 -#undef RGBG_INDEX3 -#undef jsimd_rgb_ycc_convert_altivec - -#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -#define RGBG_INDEX {0,1,4,5,8,9,12,13,2,1,6,5,10,9,14,13} -#define jsimd_rgb_ycc_convert_altivec jsimd_extrgbx_ycc_convert_altivec -#include "jccolext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGBG_INDEX -#undef jsimd_rgb_ycc_convert_altivec - -#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -#define RGBG_INDEX0 {2,1,5,4,8,7,11,10,0,1,3,4,6,7,9,10} -#define RGBG_INDEX1 {14,13,17,16,20,19,23,22,12,13,15,16,18,19,21,22} -#define RGBG_INDEX2 {10,9,13,12,16,15,19,18,8,9,11,12,14,15,17,18} -#define RGBG_INDEX3 {6,5,9,8,12,11,15,14,4,5,7,8,10,11,13,14} -#define jsimd_rgb_ycc_convert_altivec jsimd_extbgr_ycc_convert_altivec -#include "jccolext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGBG_INDEX0 -#undef RGBG_INDEX1 -#undef RGBG_INDEX2 -#undef RGBG_INDEX3 -#undef jsimd_rgb_ycc_convert_altivec - -#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -#define RGBG_INDEX {2,1,6,5,10,9,14,13,0,1,4,5,8,9,12,13} -#define jsimd_rgb_ycc_convert_altivec jsimd_extbgrx_ycc_convert_altivec -#include "jccolext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGBG_INDEX -#undef jsimd_rgb_ycc_convert_altivec - -#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -#define RGBG_INDEX {3,2,7,6,11,10,15,14,1,2,5,6,9,10,13,14} -#define jsimd_rgb_ycc_convert_altivec jsimd_extxbgr_ycc_convert_altivec -#include "jccolext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGBG_INDEX -#undef jsimd_rgb_ycc_convert_altivec - -#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -#define RGBG_INDEX {1,2,5,6,9,10,13,14,3,2,7,6,11,10,15,14} -#define jsimd_rgb_ycc_convert_altivec jsimd_extxrgb_ycc_convert_altivec -#include "jccolext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGBG_INDEX -#undef jsimd_rgb_ycc_convert_altivec diff --git a/simd/jccolor-mmx.asm b/simd/jccolor-mmx.asm deleted file mode 100644 index c4e6d88..0000000 --- a/simd/jccolor-mmx.asm +++ /dev/null @@ -1,122 +0,0 @@ -; -; jccolor.asm - colorspace conversion (MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - -%define SCALEBITS 16 - -F_0_081 equ 5329 ; FIX(0.08131) -F_0_114 equ 7471 ; FIX(0.11400) -F_0_168 equ 11059 ; FIX(0.16874) -F_0_250 equ 16384 ; FIX(0.25000) -F_0_299 equ 19595 ; FIX(0.29900) -F_0_331 equ 21709 ; FIX(0.33126) -F_0_418 equ 27439 ; FIX(0.41869) -F_0_587 equ 38470 ; FIX(0.58700) -F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_rgb_ycc_convert_mmx) - -EXTN(jconst_rgb_ycc_convert_mmx): - -PW_F0299_F0337 times 2 dw F_0_299, F_0_337 -PW_F0114_F0250 times 2 dw F_0_114, F_0_250 -PW_MF016_MF033 times 2 dw -F_0_168,-F_0_331 -PW_MF008_MF041 times 2 dw -F_0_081,-F_0_418 -PD_ONEHALFM1_CJ times 2 dd (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS) -PD_ONEHALF times 2 dd (1 << (SCALEBITS-1)) - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 - -%include "jccolext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGB_RED -%define RGB_GREEN EXT_RGB_GREEN -%define RGB_BLUE EXT_RGB_BLUE -%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -%define jsimd_rgb_ycc_convert_mmx jsimd_extrgb_ycc_convert_mmx -%include "jccolext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGBX_RED -%define RGB_GREEN EXT_RGBX_GREEN -%define RGB_BLUE EXT_RGBX_BLUE -%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -%define jsimd_rgb_ycc_convert_mmx jsimd_extrgbx_ycc_convert_mmx -%include "jccolext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGR_RED -%define RGB_GREEN EXT_BGR_GREEN -%define RGB_BLUE EXT_BGR_BLUE -%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -%define jsimd_rgb_ycc_convert_mmx jsimd_extbgr_ycc_convert_mmx -%include "jccolext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGRX_RED -%define RGB_GREEN EXT_BGRX_GREEN -%define RGB_BLUE EXT_BGRX_BLUE -%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -%define jsimd_rgb_ycc_convert_mmx jsimd_extbgrx_ycc_convert_mmx -%include "jccolext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XBGR_RED -%define RGB_GREEN EXT_XBGR_GREEN -%define RGB_BLUE EXT_XBGR_BLUE -%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -%define jsimd_rgb_ycc_convert_mmx jsimd_extxbgr_ycc_convert_mmx -%include "jccolext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XRGB_RED -%define RGB_GREEN EXT_XRGB_GREEN -%define RGB_BLUE EXT_XRGB_BLUE -%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -%define jsimd_rgb_ycc_convert_mmx jsimd_extxrgb_ycc_convert_mmx -%include "jccolext-mmx.asm" diff --git a/simd/jccolor-sse2-64.asm b/simd/jccolor-sse2-64.asm deleted file mode 100644 index bd2188b..0000000 --- a/simd/jccolor-sse2-64.asm +++ /dev/null @@ -1,121 +0,0 @@ -; -; jccolor.asm - colorspace conversion (64-bit SSE2) -; -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - -%define SCALEBITS 16 - -F_0_081 equ 5329 ; FIX(0.08131) -F_0_114 equ 7471 ; FIX(0.11400) -F_0_168 equ 11059 ; FIX(0.16874) -F_0_250 equ 16384 ; FIX(0.25000) -F_0_299 equ 19595 ; FIX(0.29900) -F_0_331 equ 21709 ; FIX(0.33126) -F_0_418 equ 27439 ; FIX(0.41869) -F_0_587 equ 38470 ; FIX(0.58700) -F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_rgb_ycc_convert_sse2) - -EXTN(jconst_rgb_ycc_convert_sse2): - -PW_F0299_F0337 times 4 dw F_0_299, F_0_337 -PW_F0114_F0250 times 4 dw F_0_114, F_0_250 -PW_MF016_MF033 times 4 dw -F_0_168,-F_0_331 -PW_MF008_MF041 times 4 dw -F_0_081,-F_0_418 -PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS) -PD_ONEHALF times 4 dd (1 << (SCALEBITS-1)) - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 64 - -%include "jccolext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGB_RED -%define RGB_GREEN EXT_RGB_GREEN -%define RGB_BLUE EXT_RGB_BLUE -%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2 -%include "jccolext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGBX_RED -%define RGB_GREEN EXT_RGBX_GREEN -%define RGB_BLUE EXT_RGBX_BLUE -%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2 -%include "jccolext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGR_RED -%define RGB_GREEN EXT_BGR_GREEN -%define RGB_BLUE EXT_BGR_BLUE -%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2 -%include "jccolext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGRX_RED -%define RGB_GREEN EXT_BGRX_GREEN -%define RGB_BLUE EXT_BGRX_BLUE -%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2 -%include "jccolext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XBGR_RED -%define RGB_GREEN EXT_XBGR_GREEN -%define RGB_BLUE EXT_XBGR_BLUE -%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -%define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2 -%include "jccolext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XRGB_RED -%define RGB_GREEN EXT_XRGB_GREEN -%define RGB_BLUE EXT_XRGB_BLUE -%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -%define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2 -%include "jccolext-sse2-64.asm" diff --git a/simd/jccolor-sse2.asm b/simd/jccolor-sse2.asm deleted file mode 100644 index 13124d1..0000000 --- a/simd/jccolor-sse2.asm +++ /dev/null @@ -1,121 +0,0 @@ -; -; jccolor.asm - colorspace conversion (SSE2) -; -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - -%define SCALEBITS 16 - -F_0_081 equ 5329 ; FIX(0.08131) -F_0_114 equ 7471 ; FIX(0.11400) -F_0_168 equ 11059 ; FIX(0.16874) -F_0_250 equ 16384 ; FIX(0.25000) -F_0_299 equ 19595 ; FIX(0.29900) -F_0_331 equ 21709 ; FIX(0.33126) -F_0_418 equ 27439 ; FIX(0.41869) -F_0_587 equ 38470 ; FIX(0.58700) -F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_rgb_ycc_convert_sse2) - -EXTN(jconst_rgb_ycc_convert_sse2): - -PW_F0299_F0337 times 4 dw F_0_299, F_0_337 -PW_F0114_F0250 times 4 dw F_0_114, F_0_250 -PW_MF016_MF033 times 4 dw -F_0_168,-F_0_331 -PW_MF008_MF041 times 4 dw -F_0_081,-F_0_418 -PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS) -PD_ONEHALF times 4 dd (1 << (SCALEBITS-1)) - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 - -%include "jccolext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGB_RED -%define RGB_GREEN EXT_RGB_GREEN -%define RGB_BLUE EXT_RGB_BLUE -%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2 -%include "jccolext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGBX_RED -%define RGB_GREEN EXT_RGBX_GREEN -%define RGB_BLUE EXT_RGBX_BLUE -%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2 -%include "jccolext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGR_RED -%define RGB_GREEN EXT_BGR_GREEN -%define RGB_BLUE EXT_BGR_BLUE -%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2 -%include "jccolext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGRX_RED -%define RGB_GREEN EXT_BGRX_GREEN -%define RGB_BLUE EXT_BGRX_BLUE -%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2 -%include "jccolext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XBGR_RED -%define RGB_GREEN EXT_XBGR_GREEN -%define RGB_BLUE EXT_XBGR_BLUE -%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -%define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2 -%include "jccolext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XRGB_RED -%define RGB_GREEN EXT_XRGB_GREEN -%define RGB_BLUE EXT_XRGB_BLUE -%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -%define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2 -%include "jccolext-sse2.asm" diff --git a/simd/jcgray-altivec.c b/simd/jcgray-altivec.c deleted file mode 100644 index 684df5e..0000000 --- a/simd/jcgray-altivec.c +++ /dev/null @@ -1,99 +0,0 @@ -/* - * AltiVec optimizations for libjpeg-turbo - * - * Copyright (C) 2014, D. R. Commander. All Rights Reserved. - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgment in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* RGB --> GRAYSCALE CONVERSION */ - -#include "jsimd_altivec.h" - - -#define F_0_114 7471 /* FIX(0.11400) */ -#define F_0_250 16384 /* FIX(0.25000) */ -#define F_0_299 19595 /* FIX(0.29900) */ -#define F_0_587 38470 /* FIX(0.58700) */ -#define F_0_337 (F_0_587 - F_0_250) /* FIX(0.58700) - FIX(0.25000) */ - -#define SCALEBITS 16 -#define ONE_HALF (1 << (SCALEBITS - 1)) - - -#define RGBG_INDEX0 {0,1,3,4,6,7,9,10,2,1,5,4,8,7,11,10} -#define RGBG_INDEX1 {12,13,15,16,18,19,21,22,14,13,17,16,20,19,23,22} -#define RGBG_INDEX2 {8,9,11,12,14,15,17,18,10,9,13,12,16,15,19,18} -#define RGBG_INDEX3 {4,5,7,8,10,11,13,14,6,5,9,8,12,11,15,14} -#include "jcgryext-altivec.c" -#undef RGB_PIXELSIZE - -#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -#define jsimd_rgb_gray_convert_altivec jsimd_extrgb_gray_convert_altivec -#include "jcgryext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGBG_INDEX0 -#undef RGBG_INDEX1 -#undef RGBG_INDEX2 -#undef RGBG_INDEX3 -#undef jsimd_rgb_gray_convert_altivec - -#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -#define RGBG_INDEX {0,1,4,5,8,9,12,13,2,1,6,5,10,9,14,13} -#define jsimd_rgb_gray_convert_altivec jsimd_extrgbx_gray_convert_altivec -#include "jcgryext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGBG_INDEX -#undef jsimd_rgb_gray_convert_altivec - -#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -#define RGBG_INDEX0 {2,1,5,4,8,7,11,10,0,1,3,4,6,7,9,10} -#define RGBG_INDEX1 {14,13,17,16,20,19,23,22,12,13,15,16,18,19,21,22} -#define RGBG_INDEX2 {10,9,13,12,16,15,19,18,8,9,11,12,14,15,17,18} -#define RGBG_INDEX3 {6,5,9,8,12,11,15,14,4,5,7,8,10,11,13,14} -#define jsimd_rgb_gray_convert_altivec jsimd_extbgr_gray_convert_altivec -#include "jcgryext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGBG_INDEX0 -#undef RGBG_INDEX1 -#undef RGBG_INDEX2 -#undef RGBG_INDEX3 -#undef jsimd_rgb_gray_convert_altivec - -#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -#define RGBG_INDEX {2,1,6,5,10,9,14,13,0,1,4,5,8,9,12,13} -#define jsimd_rgb_gray_convert_altivec jsimd_extbgrx_gray_convert_altivec -#include "jcgryext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGBG_INDEX -#undef jsimd_rgb_gray_convert_altivec - -#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -#define RGBG_INDEX {3,2,7,6,11,10,15,14,1,2,5,6,9,10,13,14} -#define jsimd_rgb_gray_convert_altivec jsimd_extxbgr_gray_convert_altivec -#include "jcgryext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGBG_INDEX -#undef jsimd_rgb_gray_convert_altivec - -#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -#define RGBG_INDEX {1,2,5,6,9,10,13,14,3,2,7,6,11,10,15,14} -#define jsimd_rgb_gray_convert_altivec jsimd_extxrgb_gray_convert_altivec -#include "jcgryext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGBG_INDEX -#undef jsimd_rgb_gray_convert_altivec diff --git a/simd/jcgray-mmx.asm b/simd/jcgray-mmx.asm deleted file mode 100644 index 0819b6c..0000000 --- a/simd/jcgray-mmx.asm +++ /dev/null @@ -1,115 +0,0 @@ -; -; jcgray.asm - grayscale colorspace conversion (MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2011, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - -%define SCALEBITS 16 - -F_0_114 equ 7471 ; FIX(0.11400) -F_0_250 equ 16384 ; FIX(0.25000) -F_0_299 equ 19595 ; FIX(0.29900) -F_0_587 equ 38470 ; FIX(0.58700) -F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_rgb_gray_convert_mmx) - -EXTN(jconst_rgb_gray_convert_mmx): - -PW_F0299_F0337 times 2 dw F_0_299, F_0_337 -PW_F0114_F0250 times 2 dw F_0_114, F_0_250 -PD_ONEHALF times 2 dd (1 << (SCALEBITS-1)) - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 - -%include "jcgryext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGB_RED -%define RGB_GREEN EXT_RGB_GREEN -%define RGB_BLUE EXT_RGB_BLUE -%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -%define jsimd_rgb_gray_convert_mmx jsimd_extrgb_gray_convert_mmx -%include "jcgryext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGBX_RED -%define RGB_GREEN EXT_RGBX_GREEN -%define RGB_BLUE EXT_RGBX_BLUE -%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -%define jsimd_rgb_gray_convert_mmx jsimd_extrgbx_gray_convert_mmx -%include "jcgryext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGR_RED -%define RGB_GREEN EXT_BGR_GREEN -%define RGB_BLUE EXT_BGR_BLUE -%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -%define jsimd_rgb_gray_convert_mmx jsimd_extbgr_gray_convert_mmx -%include "jcgryext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGRX_RED -%define RGB_GREEN EXT_BGRX_GREEN -%define RGB_BLUE EXT_BGRX_BLUE -%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -%define jsimd_rgb_gray_convert_mmx jsimd_extbgrx_gray_convert_mmx -%include "jcgryext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XBGR_RED -%define RGB_GREEN EXT_XBGR_GREEN -%define RGB_BLUE EXT_XBGR_BLUE -%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -%define jsimd_rgb_gray_convert_mmx jsimd_extxbgr_gray_convert_mmx -%include "jcgryext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XRGB_RED -%define RGB_GREEN EXT_XRGB_GREEN -%define RGB_BLUE EXT_XRGB_BLUE -%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -%define jsimd_rgb_gray_convert_mmx jsimd_extxrgb_gray_convert_mmx -%include "jcgryext-mmx.asm" diff --git a/simd/jcgray-sse2-64.asm b/simd/jcgray-sse2-64.asm deleted file mode 100644 index bafd302..0000000 --- a/simd/jcgray-sse2-64.asm +++ /dev/null @@ -1,114 +0,0 @@ -; -; jcgray.asm - grayscale colorspace conversion (64-bit SSE2) -; -; Copyright (C) 2011, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - -%define SCALEBITS 16 - -F_0_114 equ 7471 ; FIX(0.11400) -F_0_250 equ 16384 ; FIX(0.25000) -F_0_299 equ 19595 ; FIX(0.29900) -F_0_587 equ 38470 ; FIX(0.58700) -F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_rgb_gray_convert_sse2) - -EXTN(jconst_rgb_gray_convert_sse2): - -PW_F0299_F0337 times 4 dw F_0_299, F_0_337 -PW_F0114_F0250 times 4 dw F_0_114, F_0_250 -PD_ONEHALF times 4 dd (1 << (SCALEBITS-1)) - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 64 - -%include "jcgryext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGB_RED -%define RGB_GREEN EXT_RGB_GREEN -%define RGB_BLUE EXT_RGB_BLUE -%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -%define jsimd_rgb_gray_convert_sse2 jsimd_extrgb_gray_convert_sse2 -%include "jcgryext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGBX_RED -%define RGB_GREEN EXT_RGBX_GREEN -%define RGB_BLUE EXT_RGBX_BLUE -%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -%define jsimd_rgb_gray_convert_sse2 jsimd_extrgbx_gray_convert_sse2 -%include "jcgryext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGR_RED -%define RGB_GREEN EXT_BGR_GREEN -%define RGB_BLUE EXT_BGR_BLUE -%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -%define jsimd_rgb_gray_convert_sse2 jsimd_extbgr_gray_convert_sse2 -%include "jcgryext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGRX_RED -%define RGB_GREEN EXT_BGRX_GREEN -%define RGB_BLUE EXT_BGRX_BLUE -%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -%define jsimd_rgb_gray_convert_sse2 jsimd_extbgrx_gray_convert_sse2 -%include "jcgryext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XBGR_RED -%define RGB_GREEN EXT_XBGR_GREEN -%define RGB_BLUE EXT_XBGR_BLUE -%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -%define jsimd_rgb_gray_convert_sse2 jsimd_extxbgr_gray_convert_sse2 -%include "jcgryext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XRGB_RED -%define RGB_GREEN EXT_XRGB_GREEN -%define RGB_BLUE EXT_XRGB_BLUE -%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -%define jsimd_rgb_gray_convert_sse2 jsimd_extxrgb_gray_convert_sse2 -%include "jcgryext-sse2-64.asm" diff --git a/simd/jcgray-sse2.asm b/simd/jcgray-sse2.asm deleted file mode 100644 index 5b0b466..0000000 --- a/simd/jcgray-sse2.asm +++ /dev/null @@ -1,114 +0,0 @@ -; -; jcgray.asm - grayscale colorspace conversion (SSE2) -; -; Copyright (C) 2011, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - -%define SCALEBITS 16 - -F_0_114 equ 7471 ; FIX(0.11400) -F_0_250 equ 16384 ; FIX(0.25000) -F_0_299 equ 19595 ; FIX(0.29900) -F_0_587 equ 38470 ; FIX(0.58700) -F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_rgb_gray_convert_sse2) - -EXTN(jconst_rgb_gray_convert_sse2): - -PW_F0299_F0337 times 4 dw F_0_299, F_0_337 -PW_F0114_F0250 times 4 dw F_0_114, F_0_250 -PD_ONEHALF times 4 dd (1 << (SCALEBITS-1)) - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 - -%include "jcgryext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGB_RED -%define RGB_GREEN EXT_RGB_GREEN -%define RGB_BLUE EXT_RGB_BLUE -%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -%define jsimd_rgb_gray_convert_sse2 jsimd_extrgb_gray_convert_sse2 -%include "jcgryext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGBX_RED -%define RGB_GREEN EXT_RGBX_GREEN -%define RGB_BLUE EXT_RGBX_BLUE -%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -%define jsimd_rgb_gray_convert_sse2 jsimd_extrgbx_gray_convert_sse2 -%include "jcgryext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGR_RED -%define RGB_GREEN EXT_BGR_GREEN -%define RGB_BLUE EXT_BGR_BLUE -%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -%define jsimd_rgb_gray_convert_sse2 jsimd_extbgr_gray_convert_sse2 -%include "jcgryext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGRX_RED -%define RGB_GREEN EXT_BGRX_GREEN -%define RGB_BLUE EXT_BGRX_BLUE -%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -%define jsimd_rgb_gray_convert_sse2 jsimd_extbgrx_gray_convert_sse2 -%include "jcgryext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XBGR_RED -%define RGB_GREEN EXT_XBGR_GREEN -%define RGB_BLUE EXT_XBGR_BLUE -%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -%define jsimd_rgb_gray_convert_sse2 jsimd_extxbgr_gray_convert_sse2 -%include "jcgryext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XRGB_RED -%define RGB_GREEN EXT_XRGB_GREEN -%define RGB_BLUE EXT_XRGB_BLUE -%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -%define jsimd_rgb_gray_convert_sse2 jsimd_extxrgb_gray_convert_sse2 -%include "jcgryext-sse2.asm" diff --git a/simd/jcgryext-mmx.asm b/simd/jcgryext-mmx.asm deleted file mode 100644 index 1c1b8d8..0000000 --- a/simd/jcgryext-mmx.asm +++ /dev/null @@ -1,356 +0,0 @@ -; -; jcgryext.asm - grayscale colorspace conversion (MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2011, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jcolsamp.inc" - -; -------------------------------------------------------------------------- -; -; Convert some rows of samples to the output colorspace. -; -; GLOBAL(void) -; jsimd_rgb_gray_convert_mmx (JDIMENSION img_width, -; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, -; JDIMENSION output_row, int num_rows); -; - -%define img_width(b) (b)+8 ; JDIMENSION img_width -%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf -%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf -%define output_row(b) (b)+20 ; JDIMENSION output_row -%define num_rows(b) (b)+24 ; int num_rows - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] -%define WK_NUM 2 -%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr - - align 16 - global EXTN(jsimd_rgb_gray_convert_mmx) - -EXTN(jsimd_rgb_gray_convert_mmx): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic eax ; make a room for GOT address - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - movpic POINTER [gotptr], ebx ; save GOT address - - mov ecx, JDIMENSION [img_width(eax)] ; num_cols - test ecx,ecx - jz near .return - - push ecx - - mov esi, JSAMPIMAGE [output_buf(eax)] - mov ecx, JDIMENSION [output_row(eax)] - mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] - lea edi, [edi+ecx*SIZEOF_JSAMPROW] - - pop ecx - - mov esi, JSAMPARRAY [input_buf(eax)] - mov eax, INT [num_rows(eax)] - test eax,eax - jle near .return - alignx 16,7 -.rowloop: - pushpic eax - push edi - push esi - push ecx ; col - - mov esi, JSAMPROW [esi] ; inptr - mov edi, JSAMPROW [edi] ; outptr0 - movpic eax, POINTER [gotptr] ; load GOT address (eax) - - cmp ecx, byte SIZEOF_MMWORD - jae short .columnloop - alignx 16,7 - -%if RGB_PIXELSIZE == 3 ; --------------- - -.column_ld1: - push eax - push edx - lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE - test cl, SIZEOF_BYTE - jz short .column_ld2 - sub ecx, byte SIZEOF_BYTE - xor eax,eax - mov al, BYTE [esi+ecx] -.column_ld2: - test cl, SIZEOF_WORD - jz short .column_ld4 - sub ecx, byte SIZEOF_WORD - xor edx,edx - mov dx, WORD [esi+ecx] - shl eax, WORD_BIT - or eax,edx -.column_ld4: - movd mmA,eax - pop edx - pop eax - test cl, SIZEOF_DWORD - jz short .column_ld8 - sub ecx, byte SIZEOF_DWORD - movd mmG, DWORD [esi+ecx] - psllq mmA, DWORD_BIT - por mmA,mmG -.column_ld8: - test cl, SIZEOF_MMWORD - jz short .column_ld16 - movq mmG,mmA - movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] - mov ecx, SIZEOF_MMWORD - jmp short .rgb_gray_cnv -.column_ld16: - test cl, 2*SIZEOF_MMWORD - mov ecx, SIZEOF_MMWORD - jz short .rgb_gray_cnv - movq mmF,mmA - movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] - movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] - jmp short .rgb_gray_cnv - alignx 16,7 - -.columnloop: - movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] - movq mmG, MMWORD [esi+1*SIZEOF_MMWORD] - movq mmF, MMWORD [esi+2*SIZEOF_MMWORD] - -.rgb_gray_cnv: - ; mmA=(00 10 20 01 11 21 02 12) - ; mmG=(22 03 13 23 04 14 24 05) - ; mmF=(15 25 06 16 26 07 17 27) - - movq mmD,mmA - psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 10 20 01) - psrlq mmD,4*BYTE_BIT ; mmD=(11 21 02 12 -- -- -- --) - - punpckhbw mmA,mmG ; mmA=(00 04 10 14 20 24 01 05) - psllq mmG,4*BYTE_BIT ; mmG=(-- -- -- -- 22 03 13 23) - - punpcklbw mmD,mmF ; mmD=(11 15 21 25 02 06 12 16) - punpckhbw mmG,mmF ; mmG=(22 26 03 07 13 17 23 27) - - movq mmE,mmA - psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 04 10 14) - psrlq mmE,4*BYTE_BIT ; mmE=(20 24 01 05 -- -- -- --) - - punpckhbw mmA,mmD ; mmA=(00 02 04 06 10 12 14 16) - psllq mmD,4*BYTE_BIT ; mmD=(-- -- -- -- 11 15 21 25) - - punpcklbw mmE,mmG ; mmE=(20 22 24 26 01 03 05 07) - punpckhbw mmD,mmG ; mmD=(11 13 15 17 21 23 25 27) - - pxor mmH,mmH - - movq mmC,mmA - punpcklbw mmA,mmH ; mmA=(00 02 04 06) - punpckhbw mmC,mmH ; mmC=(10 12 14 16) - - movq mmB,mmE - punpcklbw mmE,mmH ; mmE=(20 22 24 26) - punpckhbw mmB,mmH ; mmB=(01 03 05 07) - - movq mmF,mmD - punpcklbw mmD,mmH ; mmD=(11 13 15 17) - punpckhbw mmF,mmH ; mmF=(21 23 25 27) - -%else ; RGB_PIXELSIZE == 4 ; ----------- - -.column_ld1: - test cl, SIZEOF_MMWORD/8 - jz short .column_ld2 - sub ecx, byte SIZEOF_MMWORD/8 - movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE] -.column_ld2: - test cl, SIZEOF_MMWORD/4 - jz short .column_ld4 - sub ecx, byte SIZEOF_MMWORD/4 - movq mmF,mmA - movq mmA, MMWORD [esi+ecx*RGB_PIXELSIZE] -.column_ld4: - test cl, SIZEOF_MMWORD/2 - mov ecx, SIZEOF_MMWORD - jz short .rgb_gray_cnv - movq mmD,mmA - movq mmC,mmF - movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] - movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] - jmp short .rgb_gray_cnv - alignx 16,7 - -.columnloop: - movq mmA, MMWORD [esi+0*SIZEOF_MMWORD] - movq mmF, MMWORD [esi+1*SIZEOF_MMWORD] - movq mmD, MMWORD [esi+2*SIZEOF_MMWORD] - movq mmC, MMWORD [esi+3*SIZEOF_MMWORD] - -.rgb_gray_cnv: - ; mmA=(00 10 20 30 01 11 21 31) - ; mmF=(02 12 22 32 03 13 23 33) - ; mmD=(04 14 24 34 05 15 25 35) - ; mmC=(06 16 26 36 07 17 27 37) - - movq mmB,mmA - punpcklbw mmA,mmF ; mmA=(00 02 10 12 20 22 30 32) - punpckhbw mmB,mmF ; mmB=(01 03 11 13 21 23 31 33) - - movq mmG,mmD - punpcklbw mmD,mmC ; mmD=(04 06 14 16 24 26 34 36) - punpckhbw mmG,mmC ; mmG=(05 07 15 17 25 27 35 37) - - movq mmE,mmA - punpcklwd mmA,mmD ; mmA=(00 02 04 06 10 12 14 16) - punpckhwd mmE,mmD ; mmE=(20 22 24 26 30 32 34 36) - - movq mmH,mmB - punpcklwd mmB,mmG ; mmB=(01 03 05 07 11 13 15 17) - punpckhwd mmH,mmG ; mmH=(21 23 25 27 31 33 35 37) - - pxor mmF,mmF - - movq mmC,mmA - punpcklbw mmA,mmF ; mmA=(00 02 04 06) - punpckhbw mmC,mmF ; mmC=(10 12 14 16) - - movq mmD,mmB - punpcklbw mmB,mmF ; mmB=(01 03 05 07) - punpckhbw mmD,mmF ; mmD=(11 13 15 17) - - movq mmG,mmE - punpcklbw mmE,mmF ; mmE=(20 22 24 26) - punpckhbw mmG,mmF ; mmG=(30 32 34 36) - - punpcklbw mmF,mmH - punpckhbw mmH,mmH - psrlw mmF,BYTE_BIT ; mmF=(21 23 25 27) - psrlw mmH,BYTE_BIT ; mmH=(31 33 35 37) - -%endif ; RGB_PIXELSIZE ; --------------- - - ; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE - ; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO - - ; (Original) - ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B - ; - ; (This implementation) - ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G - - movq mm6,mm1 - punpcklwd mm1,mm3 - punpckhwd mm6,mm3 - pmaddwd mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337) - pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337) - - movq mm7, mm6 ; mm7=ROH*FIX(0.299)+GOH*FIX(0.337) - - movq mm6,mm0 - punpcklwd mm0,mm2 - punpckhwd mm6,mm2 - pmaddwd mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337) - pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337) - - movq MMWORD [wk(0)], mm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337) - movq MMWORD [wk(1)], mm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337) - - movq mm0, mm5 ; mm0=BO - movq mm6, mm4 ; mm6=BE - - movq mm4,mm0 - punpcklwd mm0,mm3 - punpckhwd mm4,mm3 - pmaddwd mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250) - pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250) - - movq mm3,[GOTOFF(eax,PD_ONEHALF)] ; mm3=[PD_ONEHALF] - - paddd mm0, mm1 - paddd mm4, mm7 - paddd mm0,mm3 - paddd mm4,mm3 - psrld mm0,SCALEBITS ; mm0=YOL - psrld mm4,SCALEBITS ; mm4=YOH - packssdw mm0,mm4 ; mm0=YO - - movq mm4,mm6 - punpcklwd mm6,mm2 - punpckhwd mm4,mm2 - pmaddwd mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250) - pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250) - - movq mm2,[GOTOFF(eax,PD_ONEHALF)] ; mm2=[PD_ONEHALF] - - paddd mm6, MMWORD [wk(0)] - paddd mm4, MMWORD [wk(1)] - paddd mm6,mm2 - paddd mm4,mm2 - psrld mm6,SCALEBITS ; mm6=YEL - psrld mm4,SCALEBITS ; mm4=YEH - packssdw mm6,mm4 ; mm6=YE - - psllw mm0,BYTE_BIT - por mm6,mm0 ; mm6=Y - movq MMWORD [edi], mm6 ; Save Y - - sub ecx, byte SIZEOF_MMWORD - add esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; inptr - add edi, byte SIZEOF_MMWORD ; outptr0 - cmp ecx, byte SIZEOF_MMWORD - jae near .columnloop - test ecx,ecx - jnz near .column_ld1 - - pop ecx ; col - pop esi - pop edi - poppic eax - - add esi, byte SIZEOF_JSAMPROW ; input_buf - add edi, byte SIZEOF_JSAMPROW - dec eax ; num_rows - jg near .rowloop - - emms ; empty MMX state - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jcgryext-sse2-64.asm b/simd/jcgryext-sse2-64.asm deleted file mode 100644 index 541355a..0000000 --- a/simd/jcgryext-sse2-64.asm +++ /dev/null @@ -1,365 +0,0 @@ -; -; jcgryext.asm - grayscale colorspace conversion (64-bit SSE2) -; -; Copyright (C) 2011, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jcolsamp.inc" - -; -------------------------------------------------------------------------- -; -; Convert some rows of samples to the output colorspace. -; -; GLOBAL(void) -; jsimd_rgb_gray_convert_sse2 (JDIMENSION img_width, -; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, -; JDIMENSION output_row, int num_rows); -; - -; r10 = JDIMENSION img_width -; r11 = JSAMPARRAY input_buf -; r12 = JSAMPIMAGE output_buf -; r13 = JDIMENSION output_row -; r14 = int num_rows - -%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 2 - - align 16 - - global EXTN(jsimd_rgb_gray_convert_sse2) - -EXTN(jsimd_rgb_gray_convert_sse2): - push rbp - mov rax,rsp ; rax = original rbp - sub rsp, byte 4 - and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [rsp],rax - mov rbp,rsp ; rbp = aligned rbp - lea rsp, [wk(0)] - collect_args - push rbx - - mov ecx, r10d - test rcx,rcx - jz near .return - - push rcx - - mov rsi, r12 - mov ecx, r13d - mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY] - lea rdi, [rdi+rcx*SIZEOF_JSAMPROW] - - pop rcx - - mov rsi, r11 - mov eax, r14d - test rax,rax - jle near .return -.rowloop: - push rdi - push rsi - push rcx ; col - - mov rsi, JSAMPROW [rsi] ; inptr - mov rdi, JSAMPROW [rdi] ; outptr0 - - cmp rcx, byte SIZEOF_XMMWORD - jae near .columnloop - -%if RGB_PIXELSIZE == 3 ; --------------- - -.column_ld1: - push rax - push rdx - lea rcx,[rcx+rcx*2] ; imul ecx,RGB_PIXELSIZE - test cl, SIZEOF_BYTE - jz short .column_ld2 - sub rcx, byte SIZEOF_BYTE - movzx rax, BYTE [rsi+rcx] -.column_ld2: - test cl, SIZEOF_WORD - jz short .column_ld4 - sub rcx, byte SIZEOF_WORD - movzx rdx, WORD [rsi+rcx] - shl rax, WORD_BIT - or rax,rdx -.column_ld4: - movd xmmA,eax - pop rdx - pop rax - test cl, SIZEOF_DWORD - jz short .column_ld8 - sub rcx, byte SIZEOF_DWORD - movd xmmF, XMM_DWORD [rsi+rcx] - pslldq xmmA, SIZEOF_DWORD - por xmmA,xmmF -.column_ld8: - test cl, SIZEOF_MMWORD - jz short .column_ld16 - sub rcx, byte SIZEOF_MMWORD - movq xmmB, XMM_MMWORD [rsi+rcx] - pslldq xmmA, SIZEOF_MMWORD - por xmmA,xmmB -.column_ld16: - test cl, SIZEOF_XMMWORD - jz short .column_ld32 - movdqa xmmF,xmmA - movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] - mov rcx, SIZEOF_XMMWORD - jmp short .rgb_gray_cnv -.column_ld32: - test cl, 2*SIZEOF_XMMWORD - mov rcx, SIZEOF_XMMWORD - jz short .rgb_gray_cnv - movdqa xmmB,xmmA - movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] - movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD] - jmp short .rgb_gray_cnv - -.columnloop: - movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] - movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD] - movdqu xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD] - -.rgb_gray_cnv: - ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) - ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) - ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) - - movdqa xmmG,xmmA - pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) - psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) - - punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) - pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) - - punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) - punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) - - movdqa xmmD,xmmA - pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) - psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) - - punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) - pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) - - punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) - punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) - - movdqa xmmE,xmmA - pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) - psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) - - punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) - pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) - - punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) - punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) - - pxor xmmH,xmmH - - movdqa xmmC,xmmA - punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) - punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) - - movdqa xmmB,xmmE - punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) - punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) - - movdqa xmmF,xmmD - punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) - punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) - -%else ; RGB_PIXELSIZE == 4 ; ----------- - -.column_ld1: - test cl, SIZEOF_XMMWORD/16 - jz short .column_ld2 - sub rcx, byte SIZEOF_XMMWORD/16 - movd xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE] -.column_ld2: - test cl, SIZEOF_XMMWORD/8 - jz short .column_ld4 - sub rcx, byte SIZEOF_XMMWORD/8 - movq xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE] - pslldq xmmA, SIZEOF_MMWORD - por xmmA,xmmE -.column_ld4: - test cl, SIZEOF_XMMWORD/4 - jz short .column_ld8 - sub rcx, byte SIZEOF_XMMWORD/4 - movdqa xmmE,xmmA - movdqu xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE] -.column_ld8: - test cl, SIZEOF_XMMWORD/2 - mov rcx, SIZEOF_XMMWORD - jz short .rgb_gray_cnv - movdqa xmmF,xmmA - movdqa xmmH,xmmE - movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] - movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD] - jmp short .rgb_gray_cnv - -.columnloop: - movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] - movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD] - movdqu xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD] - movdqu xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD] - -.rgb_gray_cnv: - ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) - ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) - ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) - ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) - - movdqa xmmD,xmmA - punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) - punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) - - movdqa xmmC,xmmF - punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) - punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) - - movdqa xmmB,xmmA - punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) - punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) - - movdqa xmmG,xmmD - punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) - punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) - - movdqa xmmE,xmmA - punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) - punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) - - movdqa xmmH,xmmB - punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) - punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) - - pxor xmmF,xmmF - - movdqa xmmC,xmmA - punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) - punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) - - movdqa xmmD,xmmB - punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) - punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) - - movdqa xmmG,xmmE - punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) - punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) - - punpcklbw xmmF,xmmH - punpckhbw xmmH,xmmH - psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) - psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) - -%endif ; RGB_PIXELSIZE ; --------------- - - ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE - ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO - - ; (Original) - ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B - ; - ; (This implementation) - ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G - - movdqa xmm6,xmm1 - punpcklwd xmm1,xmm3 - punpckhwd xmm6,xmm3 - pmaddwd xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) - pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) - - movdqa xmm7, xmm6 ; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337) - - movdqa xmm6,xmm0 - punpcklwd xmm0,xmm2 - punpckhwd xmm6,xmm2 - pmaddwd xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) - pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) - - movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337) - movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337) - - movdqa xmm0, xmm5 ; xmm0=BO - movdqa xmm6, xmm4 ; xmm6=BE - - movdqa xmm4,xmm0 - punpcklwd xmm0,xmm3 - punpckhwd xmm4,xmm3 - pmaddwd xmm0,[rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) - pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) - - movdqa xmm3,[rel PD_ONEHALF] ; xmm3=[PD_ONEHALF] - - paddd xmm0, xmm1 - paddd xmm4, xmm7 - paddd xmm0,xmm3 - paddd xmm4,xmm3 - psrld xmm0,SCALEBITS ; xmm0=YOL - psrld xmm4,SCALEBITS ; xmm4=YOH - packssdw xmm0,xmm4 ; xmm0=YO - - movdqa xmm4,xmm6 - punpcklwd xmm6,xmm2 - punpckhwd xmm4,xmm2 - pmaddwd xmm6,[rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) - pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) - - movdqa xmm2,[rel PD_ONEHALF] ; xmm2=[PD_ONEHALF] - - paddd xmm6, XMMWORD [wk(0)] - paddd xmm4, XMMWORD [wk(1)] - paddd xmm6,xmm2 - paddd xmm4,xmm2 - psrld xmm6,SCALEBITS ; xmm6=YEL - psrld xmm4,SCALEBITS ; xmm4=YEH - packssdw xmm6,xmm4 ; xmm6=YE - - psllw xmm0,BYTE_BIT - por xmm6,xmm0 ; xmm6=Y - movdqa XMMWORD [rdi], xmm6 ; Save Y - - sub rcx, byte SIZEOF_XMMWORD - add rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr - add rdi, byte SIZEOF_XMMWORD ; outptr0 - cmp rcx, byte SIZEOF_XMMWORD - jae near .columnloop - test rcx,rcx - jnz near .column_ld1 - - pop rcx ; col - pop rsi - pop rdi - - add rsi, byte SIZEOF_JSAMPROW ; input_buf - add rdi, byte SIZEOF_JSAMPROW - dec rax ; num_rows - jg near .rowloop - -.return: - pop rbx - uncollect_args - mov rsp,rbp ; rsp <- aligned rbp - pop rsp ; rsp <- original rbp - pop rbp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jcgryext-sse2.asm b/simd/jcgryext-sse2.asm deleted file mode 100644 index cd16dd1..0000000 --- a/simd/jcgryext-sse2.asm +++ /dev/null @@ -1,384 +0,0 @@ -; -; jcgryext.asm - grayscale colorspace conversion (SSE2) -; -; Copyright (C) 2011, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jcolsamp.inc" - -; -------------------------------------------------------------------------- -; -; Convert some rows of samples to the output colorspace. -; -; GLOBAL(void) -; jsimd_rgb_gray_convert_sse2 (JDIMENSION img_width, -; JSAMPARRAY input_buf, JSAMPIMAGE output_buf, -; JDIMENSION output_row, int num_rows); -; - -%define img_width(b) (b)+8 ; JDIMENSION img_width -%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf -%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf -%define output_row(b) (b)+20 ; JDIMENSION output_row -%define num_rows(b) (b)+24 ; int num_rows - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 2 -%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr - - align 16 - - global EXTN(jsimd_rgb_gray_convert_sse2) - -EXTN(jsimd_rgb_gray_convert_sse2): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic eax ; make a room for GOT address - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - movpic POINTER [gotptr], ebx ; save GOT address - - mov ecx, JDIMENSION [img_width(eax)] - test ecx,ecx - jz near .return - - push ecx - - mov esi, JSAMPIMAGE [output_buf(eax)] - mov ecx, JDIMENSION [output_row(eax)] - mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY] - lea edi, [edi+ecx*SIZEOF_JSAMPROW] - - pop ecx - - mov esi, JSAMPARRAY [input_buf(eax)] - mov eax, INT [num_rows(eax)] - test eax,eax - jle near .return - alignx 16,7 -.rowloop: - pushpic eax - push edi - push esi - push ecx ; col - - mov esi, JSAMPROW [esi] ; inptr - mov edi, JSAMPROW [edi] ; outptr0 - movpic eax, POINTER [gotptr] ; load GOT address (eax) - - cmp ecx, byte SIZEOF_XMMWORD - jae near .columnloop - alignx 16,7 - -%if RGB_PIXELSIZE == 3 ; --------------- - -.column_ld1: - push eax - push edx - lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE - test cl, SIZEOF_BYTE - jz short .column_ld2 - sub ecx, byte SIZEOF_BYTE - movzx eax, BYTE [esi+ecx] -.column_ld2: - test cl, SIZEOF_WORD - jz short .column_ld4 - sub ecx, byte SIZEOF_WORD - movzx edx, WORD [esi+ecx] - shl eax, WORD_BIT - or eax,edx -.column_ld4: - movd xmmA,eax - pop edx - pop eax - test cl, SIZEOF_DWORD - jz short .column_ld8 - sub ecx, byte SIZEOF_DWORD - movd xmmF, XMM_DWORD [esi+ecx] - pslldq xmmA, SIZEOF_DWORD - por xmmA,xmmF -.column_ld8: - test cl, SIZEOF_MMWORD - jz short .column_ld16 - sub ecx, byte SIZEOF_MMWORD - movq xmmB, XMM_MMWORD [esi+ecx] - pslldq xmmA, SIZEOF_MMWORD - por xmmA,xmmB -.column_ld16: - test cl, SIZEOF_XMMWORD - jz short .column_ld32 - movdqa xmmF,xmmA - movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] - mov ecx, SIZEOF_XMMWORD - jmp short .rgb_gray_cnv -.column_ld32: - test cl, 2*SIZEOF_XMMWORD - mov ecx, SIZEOF_XMMWORD - jz short .rgb_gray_cnv - movdqa xmmB,xmmA - movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] - movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] - jmp short .rgb_gray_cnv - alignx 16,7 - -.columnloop: - movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] - movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD] - movdqu xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD] - -.rgb_gray_cnv: - ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) - ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) - ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) - - movdqa xmmG,xmmA - pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) - psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) - - punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) - pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) - - punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) - punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) - - movdqa xmmD,xmmA - pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) - psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) - - punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) - pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) - - punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) - punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) - - movdqa xmmE,xmmA - pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) - psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) - - punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) - pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) - - punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) - punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) - - pxor xmmH,xmmH - - movdqa xmmC,xmmA - punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) - punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) - - movdqa xmmB,xmmE - punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) - punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) - - movdqa xmmF,xmmD - punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) - punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) - -%else ; RGB_PIXELSIZE == 4 ; ----------- - -.column_ld1: - test cl, SIZEOF_XMMWORD/16 - jz short .column_ld2 - sub ecx, byte SIZEOF_XMMWORD/16 - movd xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE] -.column_ld2: - test cl, SIZEOF_XMMWORD/8 - jz short .column_ld4 - sub ecx, byte SIZEOF_XMMWORD/8 - movq xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE] - pslldq xmmA, SIZEOF_MMWORD - por xmmA,xmmE -.column_ld4: - test cl, SIZEOF_XMMWORD/4 - jz short .column_ld8 - sub ecx, byte SIZEOF_XMMWORD/4 - movdqa xmmE,xmmA - movdqu xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE] -.column_ld8: - test cl, SIZEOF_XMMWORD/2 - mov ecx, SIZEOF_XMMWORD - jz short .rgb_gray_cnv - movdqa xmmF,xmmA - movdqa xmmH,xmmE - movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] - movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] - jmp short .rgb_gray_cnv - alignx 16,7 - -.columnloop: - movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD] - movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD] - movdqu xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD] - movdqu xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD] - -.rgb_gray_cnv: - ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) - ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) - ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) - ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) - - movdqa xmmD,xmmA - punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) - punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) - - movdqa xmmC,xmmF - punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) - punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) - - movdqa xmmB,xmmA - punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) - punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) - - movdqa xmmG,xmmD - punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) - punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) - - movdqa xmmE,xmmA - punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) - punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) - - movdqa xmmH,xmmB - punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) - punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) - - pxor xmmF,xmmF - - movdqa xmmC,xmmA - punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) - punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) - - movdqa xmmD,xmmB - punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) - punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) - - movdqa xmmG,xmmE - punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) - punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) - - punpcklbw xmmF,xmmH - punpckhbw xmmH,xmmH - psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) - psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) - -%endif ; RGB_PIXELSIZE ; --------------- - - ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE - ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO - - ; (Original) - ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B - ; - ; (This implementation) - ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G - - movdqa xmm6,xmm1 - punpcklwd xmm1,xmm3 - punpckhwd xmm6,xmm3 - pmaddwd xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) - pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) - - movdqa xmm7, xmm6 ; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337) - - movdqa xmm6,xmm0 - punpcklwd xmm0,xmm2 - punpckhwd xmm6,xmm2 - pmaddwd xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) - pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) - - movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337) - movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337) - - movdqa xmm0, xmm5 ; xmm0=BO - movdqa xmm6, xmm4 ; xmm6=BE - - movdqa xmm4,xmm0 - punpcklwd xmm0,xmm3 - punpckhwd xmm4,xmm3 - pmaddwd xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) - pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) - - movdqa xmm3,[GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF] - - paddd xmm0, xmm1 - paddd xmm4, xmm7 - paddd xmm0,xmm3 - paddd xmm4,xmm3 - psrld xmm0,SCALEBITS ; xmm0=YOL - psrld xmm4,SCALEBITS ; xmm4=YOH - packssdw xmm0,xmm4 ; xmm0=YO - - movdqa xmm4,xmm6 - punpcklwd xmm6,xmm2 - punpckhwd xmm4,xmm2 - pmaddwd xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) - pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) - - movdqa xmm2,[GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF] - - paddd xmm6, XMMWORD [wk(0)] - paddd xmm4, XMMWORD [wk(1)] - paddd xmm6,xmm2 - paddd xmm4,xmm2 - psrld xmm6,SCALEBITS ; xmm6=YEL - psrld xmm4,SCALEBITS ; xmm4=YEH - packssdw xmm6,xmm4 ; xmm6=YE - - psllw xmm0,BYTE_BIT - por xmm6,xmm0 ; xmm6=Y - movdqa XMMWORD [edi], xmm6 ; Save Y - - sub ecx, byte SIZEOF_XMMWORD - add esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr - add edi, byte SIZEOF_XMMWORD ; outptr0 - cmp ecx, byte SIZEOF_XMMWORD - jae near .columnloop - test ecx,ecx - jnz near .column_ld1 - - pop ecx ; col - pop esi - pop edi - poppic eax - - add esi, byte SIZEOF_JSAMPROW ; input_buf - add edi, byte SIZEOF_JSAMPROW - dec eax ; num_rows - jg near .rowloop - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jchuff-sse2-64.asm b/simd/jchuff-sse2-64.asm deleted file mode 100644 index b1144d1..0000000 --- a/simd/jchuff-sse2-64.asm +++ /dev/null @@ -1,360 +0,0 @@ -; -; jchuff-sse2-64.asm - Huffman entropy encoding (64-bit SSE2) -; -; Copyright (C) 2009-2011, 2014-2016, D. R. Commander. -; Copyright (C) 2015, Matthieu Darbois. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains an SSE2 implementation for Huffman coding of one block. -; The following code is based directly on jchuff.c; see jchuff.c for more -; details. -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_huff_encode_one_block) - -EXTN(jconst_huff_encode_one_block): - -%include "jpeg_nbits_table.inc" - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 64 - -; These macros perform the same task as the emit_bits() function in the -; original libjpeg code. In addition to reducing overhead by explicitly -; inlining the code, additional performance is achieved by taking into -; account the size of the bit buffer and waiting until it is almost full -; before emptying it. This mostly benefits 64-bit platforms, since 6 -; bytes can be stored in a 64-bit bit buffer before it has to be emptied. - -%macro EMIT_BYTE 0 - sub put_bits, 8 ; put_bits -= 8; - mov rdx, put_buffer - mov ecx, put_bits - shr rdx, cl ; c = (JOCTET)GETJOCTET(put_buffer >> put_bits); - mov byte [buffer], dl ; *buffer++ = c; - add buffer, 1 - cmp dl, 0xFF ; need to stuff a zero byte? - jne %%.EMIT_BYTE_END - mov byte [buffer], 0 ; *buffer++ = 0; - add buffer, 1 -%%.EMIT_BYTE_END: -%endmacro - -%macro PUT_BITS 1 - add put_bits, ecx ; put_bits += size; - shl put_buffer, cl ; put_buffer = (put_buffer << size); - or put_buffer, %1 -%endmacro - -%macro CHECKBUF31 0 - cmp put_bits, 32 ; if (put_bits > 31) { - jl %%.CHECKBUF31_END - EMIT_BYTE - EMIT_BYTE - EMIT_BYTE - EMIT_BYTE -%%.CHECKBUF31_END: -%endmacro - -%macro CHECKBUF47 0 - cmp put_bits, 48 ; if (put_bits > 47) { - jl %%.CHECKBUF47_END - EMIT_BYTE - EMIT_BYTE - EMIT_BYTE - EMIT_BYTE - EMIT_BYTE - EMIT_BYTE -%%.CHECKBUF47_END: -%endmacro - -%macro EMIT_BITS 2 - CHECKBUF47 - mov ecx, %2 - PUT_BITS %1 -%endmacro - -%macro kloop_prepare 37 ;(ko, jno0, ..., jno31, xmm0, xmm1, xmm2, xmm3) - pxor xmm8, xmm8 ; __m128i neg = _mm_setzero_si128(); - pxor xmm9, xmm9 ; __m128i neg = _mm_setzero_si128(); - pxor xmm10, xmm10 ; __m128i neg = _mm_setzero_si128(); - pxor xmm11, xmm11 ; __m128i neg = _mm_setzero_si128(); - pinsrw %34, word [r12 + %2 * SIZEOF_WORD], 0 ; xmm_shadow[0] = block[jno0]; - pinsrw %35, word [r12 + %10 * SIZEOF_WORD], 0 ; xmm_shadow[8] = block[jno8]; - pinsrw %36, word [r12 + %18 * SIZEOF_WORD], 0 ; xmm_shadow[16] = block[jno16]; - pinsrw %37, word [r12 + %26 * SIZEOF_WORD], 0 ; xmm_shadow[24] = block[jno24]; - pinsrw %34, word [r12 + %3 * SIZEOF_WORD], 1 ; xmm_shadow[1] = block[jno1]; - pinsrw %35, word [r12 + %11 * SIZEOF_WORD], 1 ; xmm_shadow[9] = block[jno9]; - pinsrw %36, word [r12 + %19 * SIZEOF_WORD], 1 ; xmm_shadow[17] = block[jno17]; - pinsrw %37, word [r12 + %27 * SIZEOF_WORD], 1 ; xmm_shadow[25] = block[jno25]; - pinsrw %34, word [r12 + %4 * SIZEOF_WORD], 2 ; xmm_shadow[2] = block[jno2]; - pinsrw %35, word [r12 + %12 * SIZEOF_WORD], 2 ; xmm_shadow[10] = block[jno10]; - pinsrw %36, word [r12 + %20 * SIZEOF_WORD], 2 ; xmm_shadow[18] = block[jno18]; - pinsrw %37, word [r12 + %28 * SIZEOF_WORD], 2 ; xmm_shadow[26] = block[jno26]; - pinsrw %34, word [r12 + %5 * SIZEOF_WORD], 3 ; xmm_shadow[3] = block[jno3]; - pinsrw %35, word [r12 + %13 * SIZEOF_WORD], 3 ; xmm_shadow[11] = block[jno11]; - pinsrw %36, word [r12 + %21 * SIZEOF_WORD], 3 ; xmm_shadow[19] = block[jno19]; - pinsrw %37, word [r12 + %29 * SIZEOF_WORD], 3 ; xmm_shadow[27] = block[jno27]; - pinsrw %34, word [r12 + %6 * SIZEOF_WORD], 4 ; xmm_shadow[4] = block[jno4]; - pinsrw %35, word [r12 + %14 * SIZEOF_WORD], 4 ; xmm_shadow[12] = block[jno12]; - pinsrw %36, word [r12 + %22 * SIZEOF_WORD], 4 ; xmm_shadow[20] = block[jno20]; - pinsrw %37, word [r12 + %30 * SIZEOF_WORD], 4 ; xmm_shadow[28] = block[jno28]; - pinsrw %34, word [r12 + %7 * SIZEOF_WORD], 5 ; xmm_shadow[5] = block[jno5]; - pinsrw %35, word [r12 + %15 * SIZEOF_WORD], 5 ; xmm_shadow[13] = block[jno13]; - pinsrw %36, word [r12 + %23 * SIZEOF_WORD], 5 ; xmm_shadow[21] = block[jno21]; - pinsrw %37, word [r12 + %31 * SIZEOF_WORD], 5 ; xmm_shadow[29] = block[jno29]; - pinsrw %34, word [r12 + %8 * SIZEOF_WORD], 6 ; xmm_shadow[6] = block[jno6]; - pinsrw %35, word [r12 + %16 * SIZEOF_WORD], 6 ; xmm_shadow[14] = block[jno14]; - pinsrw %36, word [r12 + %24 * SIZEOF_WORD], 6 ; xmm_shadow[22] = block[jno22]; - pinsrw %37, word [r12 + %32 * SIZEOF_WORD], 6 ; xmm_shadow[30] = block[jno30]; - pinsrw %34, word [r12 + %9 * SIZEOF_WORD], 7 ; xmm_shadow[7] = block[jno7]; - pinsrw %35, word [r12 + %17 * SIZEOF_WORD], 7 ; xmm_shadow[15] = block[jno15]; - pinsrw %36, word [r12 + %25 * SIZEOF_WORD], 7 ; xmm_shadow[23] = block[jno23]; -%if %1 != 32 - pinsrw %37, word [r12 + %33 * SIZEOF_WORD], 7 ; xmm_shadow[31] = block[jno31]; -%else - pinsrw %37, ebx, 7 ; xmm_shadow[31] = block[jno31]; -%endif - pcmpgtw xmm8, %34 ; neg = _mm_cmpgt_epi16(neg, x1); - pcmpgtw xmm9, %35 ; neg = _mm_cmpgt_epi16(neg, x1); - pcmpgtw xmm10, %36 ; neg = _mm_cmpgt_epi16(neg, x1); - pcmpgtw xmm11, %37 ; neg = _mm_cmpgt_epi16(neg, x1); - paddw %34, xmm8 ; x1 = _mm_add_epi16(x1, neg); - paddw %35, xmm9 ; x1 = _mm_add_epi16(x1, neg); - paddw %36, xmm10 ; x1 = _mm_add_epi16(x1, neg); - paddw %37, xmm11 ; x1 = _mm_add_epi16(x1, neg); - pxor %34, xmm8 ; x1 = _mm_xor_si128(x1, neg); - pxor %35, xmm9 ; x1 = _mm_xor_si128(x1, neg); - pxor %36, xmm10 ; x1 = _mm_xor_si128(x1, neg); - pxor %37, xmm11 ; x1 = _mm_xor_si128(x1, neg); - pxor xmm8, %34 ; neg = _mm_xor_si128(neg, x1); - pxor xmm9, %35 ; neg = _mm_xor_si128(neg, x1); - pxor xmm10, %36 ; neg = _mm_xor_si128(neg, x1); - pxor xmm11, %37 ; neg = _mm_xor_si128(neg, x1); - movdqa XMMWORD [t1 + %1 * SIZEOF_WORD], %34 ; _mm_storeu_si128((__m128i *)(t1 + ko), x1); - movdqa XMMWORD [t1 + (%1 + 8) * SIZEOF_WORD], %35 ; _mm_storeu_si128((__m128i *)(t1 + ko + 8), x1); - movdqa XMMWORD [t1 + (%1 + 16) * SIZEOF_WORD], %36 ; _mm_storeu_si128((__m128i *)(t1 + ko + 16), x1); - movdqa XMMWORD [t1 + (%1 + 24) * SIZEOF_WORD], %37 ; _mm_storeu_si128((__m128i *)(t1 + ko + 24), x1); - movdqa XMMWORD [t2 + %1 * SIZEOF_WORD], xmm8 ; _mm_storeu_si128((__m128i *)(t2 + ko), neg); - movdqa XMMWORD [t2 + (%1 + 8) * SIZEOF_WORD], xmm9 ; _mm_storeu_si128((__m128i *)(t2 + ko + 8), neg); - movdqa XMMWORD [t2 + (%1 + 16) * SIZEOF_WORD], xmm10 ; _mm_storeu_si128((__m128i *)(t2 + ko + 16), neg); - movdqa XMMWORD [t2 + (%1 + 24) * SIZEOF_WORD], xmm11 ; _mm_storeu_si128((__m128i *)(t2 + ko + 24), neg); -%endmacro - -; -; Encode a single block's worth of coefficients. -; -; GLOBAL(JOCTET*) -; jsimd_huff_encode_one_block_sse2 (working_state *state, JOCTET *buffer, -; JCOEFPTR block, int last_dc_val, -; c_derived_tbl *dctbl, c_derived_tbl *actbl) -; - -; r10 = working_state *state -; r11 = JOCTET *buffer -; r12 = JCOEFPTR block -; r13 = int last_dc_val -; r14 = c_derived_tbl *dctbl -; r15 = c_derived_tbl *actbl - -%define t1 rbp-(DCTSIZE2*SIZEOF_WORD) -%define t2 t1-(DCTSIZE2*SIZEOF_WORD) -%define put_buffer r8 -%define put_bits r9d -%define buffer rax - - align 16 - global EXTN(jsimd_huff_encode_one_block_sse2) - -EXTN(jsimd_huff_encode_one_block_sse2): - push rbp - mov rax,rsp ; rax = original rbp - sub rsp, byte 4 - and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [rsp],rax - mov rbp,rsp ; rbp = aligned rbp - lea rsp, [t2] - collect_args -%ifdef WIN64 - movaps XMMWORD [rsp-1*SIZEOF_XMMWORD], xmm8 - movaps XMMWORD [rsp-2*SIZEOF_XMMWORD], xmm9 - movaps XMMWORD [rsp-3*SIZEOF_XMMWORD], xmm10 - movaps XMMWORD [rsp-4*SIZEOF_XMMWORD], xmm11 - sub rsp, 4*SIZEOF_XMMWORD -%endif - push rbx - - mov buffer, r11 ; r11 is now sratch - - mov put_buffer, MMWORD [r10+16] ; put_buffer = state->cur.put_buffer; - mov put_bits, DWORD [r10+24] ; put_bits = state->cur.put_bits; - push r10 ; r10 is now scratch - - ; Encode the DC coefficient difference per section F.1.2.1 - movsx edi, word [r12] ; temp = temp2 = block[0] - last_dc_val; - sub edi, r13d ; r13 is not used anymore - mov ebx, edi - - ; This is a well-known technique for obtaining the absolute value - ; without a branch. It is derived from an assembly language technique - ; presented in "How to Optimize for the Pentium Processors", - ; Copyright (c) 1996, 1997 by Agner Fog. - mov esi, edi - sar esi, 31 ; temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); - xor edi, esi ; temp ^= temp3; - sub edi, esi ; temp -= temp3; - - ; For a negative input, want temp2 = bitwise complement of abs(input) - ; This code assumes we are on a two's complement machine - add ebx, esi ; temp2 += temp3; - - ; Find the number of bits needed for the magnitude of the coefficient - lea r11, [rel jpeg_nbits_table] - movzx rdi, byte [r11 + rdi] ; nbits = JPEG_NBITS(temp); - ; Emit the Huffman-coded symbol for the number of bits - mov r11d, INT [r14 + rdi * 4] ; code = dctbl->ehufco[nbits]; - movzx esi, byte [r14 + rdi + 1024] ; size = dctbl->ehufsi[nbits]; - EMIT_BITS r11, esi ; EMIT_BITS(code, size) - - ; Mask off any extra bits in code - mov esi, 1 - mov ecx, edi - shl esi, cl - dec esi - and ebx, esi ; temp2 &= (((JLONG) 1)<ehufco[0xf0]; - movzx r14d, byte [r15 + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0]; - lea rsi, [t1] -.BLOOP: - bsf r12, r11 ; r = __builtin_ctzl(index); - jz .ELOOP - mov rcx, r12 - lea rsi, [rsi+r12*2] ; k += r; - shr r11, cl ; index >>= r; - movzx rdi, word [rsi] ; temp = t1[k]; - lea rbx, [rel jpeg_nbits_table] - movzx rdi, byte [rbx + rdi] ; nbits = JPEG_NBITS(temp); -.BRLOOP: - cmp r12, 16 ; while (r > 15) { - jl .ERLOOP - EMIT_BITS r13, r14d ; EMIT_BITS(code_0xf0, size_0xf0) - sub r12, 16 ; r -= 16; - jmp .BRLOOP -.ERLOOP: - ; Emit Huffman symbol for run length / number of bits - CHECKBUF31 ; uses rcx, rdx - - shl r12, 4 ; temp3 = (r << 4) + nbits; - add r12, rdi - mov ebx, INT [r15 + r12 * 4] ; code = actbl->ehufco[temp3]; - movzx ecx, byte [r15 + r12 + 1024] ; size = actbl->ehufsi[temp3]; - PUT_BITS rbx - - ;EMIT_CODE(code, size) - - movsx ebx, word [rsi-DCTSIZE2*2] ; temp2 = t2[k]; - ; Mask off any extra bits in code - mov rcx, rdi - mov rdx, 1 - shl rdx, cl - dec rdx - and rbx, rdx ; temp2 &= (((JLONG) 1)<>= 1; - add rsi, 2 ; ++k; - jmp .BLOOP -.ELOOP: - ; If the last coef(s) were zero, emit an end-of-block code - lea rdi, [t1 + (DCTSIZE2-1) * 2] ; r = DCTSIZE2-1-k; - cmp rdi, rsi ; if (r > 0) { - je .EFN - mov ebx, INT [r15] ; code = actbl->ehufco[0]; - movzx r12d, byte [r15 + 1024] ; size = actbl->ehufsi[0]; - EMIT_BITS rbx, r12d -.EFN: - pop r10 - ; Save put_buffer & put_bits - mov MMWORD [r10+16], put_buffer ; state->cur.put_buffer = put_buffer; - mov DWORD [r10+24], put_bits ; state->cur.put_bits = put_bits; - - pop rbx -%ifdef WIN64 - movaps xmm11, XMMWORD [rsp+0*SIZEOF_XMMWORD] - movaps xmm10, XMMWORD [rsp+1*SIZEOF_XMMWORD] - movaps xmm9, XMMWORD [rsp+2*SIZEOF_XMMWORD] - movaps xmm8, XMMWORD [rsp+3*SIZEOF_XMMWORD] - add rsp, 4*SIZEOF_XMMWORD -%endif - uncollect_args - mov rsp,rbp ; rsp <- aligned rbp - pop rsp ; rsp <- original rbp - pop rbp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jchuff-sse2.asm b/simd/jchuff-sse2.asm deleted file mode 100644 index b81db75..0000000 --- a/simd/jchuff-sse2.asm +++ /dev/null @@ -1,426 +0,0 @@ -; -; jchuff-sse2.asm - Huffman entropy encoding (SSE2) -; -; Copyright (C) 2009-2011, 2014-2017, D. R. Commander. -; Copyright (C) 2015, Matthieu Darbois. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains an SSE2 implementation for Huffman coding of one block. -; The following code is based directly on jchuff.c; see jchuff.c for more -; details. -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_huff_encode_one_block) - -EXTN(jconst_huff_encode_one_block): - -%include "jpeg_nbits_table.inc" - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 - -; These macros perform the same task as the emit_bits() function in the -; original libjpeg code. In addition to reducing overhead by explicitly -; inlining the code, additional performance is achieved by taking into -; account the size of the bit buffer and waiting until it is almost full -; before emptying it. This mostly benefits 64-bit platforms, since 6 -; bytes can be stored in a 64-bit bit buffer before it has to be emptied. - -%macro EMIT_BYTE 0 - sub put_bits, 8 ; put_bits -= 8; - mov edx, put_buffer - mov ecx, put_bits - shr edx, cl ; c = (JOCTET)GETJOCTET(put_buffer >> put_bits); - mov byte [eax], dl ; *buffer++ = c; - add eax, 1 - cmp dl, 0xFF ; need to stuff a zero byte? - jne %%.EMIT_BYTE_END - mov byte [eax], 0 ; *buffer++ = 0; - add eax, 1 -%%.EMIT_BYTE_END: -%endmacro - -%macro PUT_BITS 1 - add put_bits, ecx ; put_bits += size; - shl put_buffer, cl ; put_buffer = (put_buffer << size); - or put_buffer, %1 -%endmacro - -%macro CHECKBUF15 0 - cmp put_bits, 16 ; if (put_bits > 31) { - jl %%.CHECKBUF15_END - mov eax, POINTER [esp+buffer] - EMIT_BYTE - EMIT_BYTE - mov POINTER [esp+buffer], eax -%%.CHECKBUF15_END: -%endmacro - -%macro EMIT_BITS 1 - PUT_BITS %1 - CHECKBUF15 -%endmacro - -%macro kloop_prepare 37 ;(ko, jno0, ..., jno31, xmm0, xmm1, xmm2, xmm3) - pxor xmm4, xmm4 ; __m128i neg = _mm_setzero_si128(); - pxor xmm5, xmm5 ; __m128i neg = _mm_setzero_si128(); - pxor xmm6, xmm6 ; __m128i neg = _mm_setzero_si128(); - pxor xmm7, xmm7 ; __m128i neg = _mm_setzero_si128(); - pinsrw %34, word [esi + %2 * SIZEOF_WORD], 0 ; xmm_shadow[0] = block[jno0]; - pinsrw %35, word [esi + %10 * SIZEOF_WORD], 0 ; xmm_shadow[8] = block[jno8]; - pinsrw %36, word [esi + %18 * SIZEOF_WORD], 0 ; xmm_shadow[16] = block[jno16]; - pinsrw %37, word [esi + %26 * SIZEOF_WORD], 0 ; xmm_shadow[24] = block[jno24]; - pinsrw %34, word [esi + %3 * SIZEOF_WORD], 1 ; xmm_shadow[1] = block[jno1]; - pinsrw %35, word [esi + %11 * SIZEOF_WORD], 1 ; xmm_shadow[9] = block[jno9]; - pinsrw %36, word [esi + %19 * SIZEOF_WORD], 1 ; xmm_shadow[17] = block[jno17]; - pinsrw %37, word [esi + %27 * SIZEOF_WORD], 1 ; xmm_shadow[25] = block[jno25]; - pinsrw %34, word [esi + %4 * SIZEOF_WORD], 2 ; xmm_shadow[2] = block[jno2]; - pinsrw %35, word [esi + %12 * SIZEOF_WORD], 2 ; xmm_shadow[10] = block[jno10]; - pinsrw %36, word [esi + %20 * SIZEOF_WORD], 2 ; xmm_shadow[18] = block[jno18]; - pinsrw %37, word [esi + %28 * SIZEOF_WORD], 2 ; xmm_shadow[26] = block[jno26]; - pinsrw %34, word [esi + %5 * SIZEOF_WORD], 3 ; xmm_shadow[3] = block[jno3]; - pinsrw %35, word [esi + %13 * SIZEOF_WORD], 3 ; xmm_shadow[11] = block[jno11]; - pinsrw %36, word [esi + %21 * SIZEOF_WORD], 3 ; xmm_shadow[19] = block[jno19]; - pinsrw %37, word [esi + %29 * SIZEOF_WORD], 3 ; xmm_shadow[27] = block[jno27]; - pinsrw %34, word [esi + %6 * SIZEOF_WORD], 4 ; xmm_shadow[4] = block[jno4]; - pinsrw %35, word [esi + %14 * SIZEOF_WORD], 4 ; xmm_shadow[12] = block[jno12]; - pinsrw %36, word [esi + %22 * SIZEOF_WORD], 4 ; xmm_shadow[20] = block[jno20]; - pinsrw %37, word [esi + %30 * SIZEOF_WORD], 4 ; xmm_shadow[28] = block[jno28]; - pinsrw %34, word [esi + %7 * SIZEOF_WORD], 5 ; xmm_shadow[5] = block[jno5]; - pinsrw %35, word [esi + %15 * SIZEOF_WORD], 5 ; xmm_shadow[13] = block[jno13]; - pinsrw %36, word [esi + %23 * SIZEOF_WORD], 5 ; xmm_shadow[21] = block[jno21]; - pinsrw %37, word [esi + %31 * SIZEOF_WORD], 5 ; xmm_shadow[29] = block[jno29]; - pinsrw %34, word [esi + %8 * SIZEOF_WORD], 6 ; xmm_shadow[6] = block[jno6]; - pinsrw %35, word [esi + %16 * SIZEOF_WORD], 6 ; xmm_shadow[14] = block[jno14]; - pinsrw %36, word [esi + %24 * SIZEOF_WORD], 6 ; xmm_shadow[22] = block[jno22]; - pinsrw %37, word [esi + %32 * SIZEOF_WORD], 6 ; xmm_shadow[30] = block[jno30]; - pinsrw %34, word [esi + %9 * SIZEOF_WORD], 7 ; xmm_shadow[7] = block[jno7]; - pinsrw %35, word [esi + %17 * SIZEOF_WORD], 7 ; xmm_shadow[15] = block[jno15]; - pinsrw %36, word [esi + %25 * SIZEOF_WORD], 7 ; xmm_shadow[23] = block[jno23]; -%if %1 != 32 - pinsrw %37, word [esi + %33 * SIZEOF_WORD], 7 ; xmm_shadow[31] = block[jno31]; -%else - pinsrw %37, ecx, 7 ; xmm_shadow[31] = block[jno31]; -%endif - pcmpgtw xmm4, %34 ; neg = _mm_cmpgt_epi16(neg, x1); - pcmpgtw xmm5, %35 ; neg = _mm_cmpgt_epi16(neg, x1); - pcmpgtw xmm6, %36 ; neg = _mm_cmpgt_epi16(neg, x1); - pcmpgtw xmm7, %37 ; neg = _mm_cmpgt_epi16(neg, x1); - paddw %34, xmm4 ; x1 = _mm_add_epi16(x1, neg); - paddw %35, xmm5 ; x1 = _mm_add_epi16(x1, neg); - paddw %36, xmm6 ; x1 = _mm_add_epi16(x1, neg); - paddw %37, xmm7 ; x1 = _mm_add_epi16(x1, neg); - pxor %34, xmm4 ; x1 = _mm_xor_si128(x1, neg); - pxor %35, xmm5 ; x1 = _mm_xor_si128(x1, neg); - pxor %36, xmm6 ; x1 = _mm_xor_si128(x1, neg); - pxor %37, xmm7 ; x1 = _mm_xor_si128(x1, neg); - pxor xmm4, %34 ; neg = _mm_xor_si128(neg, x1); - pxor xmm5, %35 ; neg = _mm_xor_si128(neg, x1); - pxor xmm6, %36 ; neg = _mm_xor_si128(neg, x1); - pxor xmm7, %37 ; neg = _mm_xor_si128(neg, x1); - movdqa XMMWORD [esp + t1 + %1 * SIZEOF_WORD], %34 ; _mm_storeu_si128((__m128i *)(t1 + ko), x1); - movdqa XMMWORD [esp + t1 + (%1 + 8) * SIZEOF_WORD], %35 ; _mm_storeu_si128((__m128i *)(t1 + ko + 8), x1); - movdqa XMMWORD [esp + t1 + (%1 + 16) * SIZEOF_WORD], %36 ; _mm_storeu_si128((__m128i *)(t1 + ko + 16), x1); - movdqa XMMWORD [esp + t1 + (%1 + 24) * SIZEOF_WORD], %37 ; _mm_storeu_si128((__m128i *)(t1 + ko + 24), x1); - movdqa XMMWORD [esp + t2 + %1 * SIZEOF_WORD], xmm4 ; _mm_storeu_si128((__m128i *)(t2 + ko), neg); - movdqa XMMWORD [esp + t2 + (%1 + 8) * SIZEOF_WORD], xmm5 ; _mm_storeu_si128((__m128i *)(t2 + ko + 8), neg); - movdqa XMMWORD [esp + t2 + (%1 + 16) * SIZEOF_WORD], xmm6 ; _mm_storeu_si128((__m128i *)(t2 + ko + 16), neg); - movdqa XMMWORD [esp + t2 + (%1 + 24) * SIZEOF_WORD], xmm7 ; _mm_storeu_si128((__m128i *)(t2 + ko + 24), neg); -%endmacro - -; -; Encode a single block's worth of coefficients. -; -; GLOBAL(JOCTET*) -; jsimd_huff_encode_one_block_sse2 (working_state *state, JOCTET *buffer, -; JCOEFPTR block, int last_dc_val, -; c_derived_tbl *dctbl, c_derived_tbl *actbl) -; - -; eax + 8 = working_state *state -; eax + 12 = JOCTET *buffer -; eax + 16 = JCOEFPTR block -; eax + 20 = int last_dc_val -; eax + 24 = c_derived_tbl *dctbl -; eax + 28 = c_derived_tbl *actbl - -%define pad 6*SIZEOF_DWORD ; Align to 16 bytes -%define t1 pad -%define t2 t1+(DCTSIZE2*SIZEOF_WORD) -%define block t2+(DCTSIZE2*SIZEOF_WORD) -%define actbl block+SIZEOF_DWORD -%define buffer actbl+SIZEOF_DWORD -%define temp buffer+SIZEOF_DWORD -%define temp2 temp+SIZEOF_DWORD -%define temp3 temp2+SIZEOF_DWORD -%define temp4 temp3+SIZEOF_DWORD -%define temp5 temp4+SIZEOF_DWORD -%define gotptr temp5+SIZEOF_DWORD ; void *gotptr -%define put_buffer ebx -%define put_bits edi - - align 16 - global EXTN(jsimd_huff_encode_one_block_sse2) - -EXTN(jsimd_huff_encode_one_block_sse2): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - sub esp, temp5+9*SIZEOF_DWORD-pad - push ebx - push ecx -; push edx ; need not be preserved - push esi - push edi - push ebp - - mov esi, POINTER [eax+8] ; (working_state *state) - mov put_buffer, DWORD [esi+8] ; put_buffer = state->cur.put_buffer; - mov put_bits, DWORD [esi+12] ; put_bits = state->cur.put_bits; - push esi ; esi is now scratch - - get_GOT edx ; get GOT address - movpic POINTER [esp+gotptr], edx ; save GOT address - - mov ecx, POINTER [eax+28] - mov edx, POINTER [eax+16] - mov esi, POINTER [eax+12] - mov POINTER [esp+actbl], ecx - mov POINTER [esp+block], edx - mov POINTER [esp+buffer], esi - - ; Encode the DC coefficient difference per section F.1.2.1 - mov esi, POINTER [esp+block] ; block - movsx ecx, word [esi] ; temp = temp2 = block[0] - last_dc_val; - sub ecx, DWORD [eax+20] - mov esi, ecx - - ; This is a well-known technique for obtaining the absolute value - ; without a branch. It is derived from an assembly language technique - ; presented in "How to Optimize for the Pentium Processors", - ; Copyright (c) 1996, 1997 by Agner Fog. - mov edx, ecx - sar edx, 31 ; temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); - xor ecx, edx ; temp ^= temp3; - sub ecx, edx ; temp -= temp3; - - ; For a negative input, want temp2 = bitwise complement of abs(input) - ; This code assumes we are on a two's complement machine - add esi, edx ; temp2 += temp3; - mov DWORD [esp+temp], esi ; backup temp2 in temp - - ; Find the number of bits needed for the magnitude of the coefficient - movpic ebp, POINTER [esp+gotptr] ; load GOT address (ebp) - movzx edx, byte [GOTOFF(ebp, jpeg_nbits_table + ecx)] ; nbits = JPEG_NBITS(temp); - mov DWORD [esp+temp2], edx ; backup nbits in temp2 - - ; Emit the Huffman-coded symbol for the number of bits - mov ebp, POINTER [eax+24] ; After this point, arguments are not accessible anymore - mov eax, INT [ebp + edx * 4] ; code = dctbl->ehufco[nbits]; - movzx ecx, byte [ebp + edx + 1024] ; size = dctbl->ehufsi[nbits]; - EMIT_BITS eax ; EMIT_BITS(code, size) - - mov ecx, DWORD [esp+temp2] ; restore nbits - - ; Mask off any extra bits in code - mov eax, 1 - shl eax, cl - dec eax - and eax, DWORD [esp+temp] ; temp2 &= (((JLONG) 1)<>= r; - mov DWORD [esp+temp3], edx -.BRLOOP: - cmp ecx, 16 ; while (r > 15) { - jl near .ERLOOP - sub ecx, 16 ; r -= 16; - mov DWORD [esp+temp], ecx - mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0]; - movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0]; - EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0) - mov ecx, DWORD [esp+temp] - jmp .BRLOOP -.ERLOOP: - movsx eax, word [esi] ; temp = t1[k]; - movpic edx, POINTER [esp+gotptr] ; load GOT address (edx) - movzx eax, byte [GOTOFF(edx, jpeg_nbits_table + eax)] ; nbits = JPEG_NBITS(temp); - mov DWORD [esp+temp2], eax - ; Emit Huffman symbol for run length / number of bits - shl ecx, 4 ; temp3 = (r << 4) + nbits; - add ecx, eax - mov eax, INT [ebp + ecx * 4] ; code = actbl->ehufco[temp3]; - movzx ecx, byte [ebp + ecx + 1024] ; size = actbl->ehufsi[temp3]; - EMIT_BITS eax - - movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k]; - ; Mask off any extra bits in code - mov ecx, DWORD [esp+temp2] - mov eax, 1 - shl eax, cl - dec eax - and eax, edx ; temp2 &= (((JLONG) 1)<>= 1; - - jmp .BLOOP -.ELOOP: - movdqa xmm0, XMMWORD [esp + t1 + 32 * SIZEOF_WORD] ; __m128i tmp0 = _mm_loadu_si128((__m128i *)(t1 + 0)); - movdqa xmm1, XMMWORD [esp + t1 + 40 * SIZEOF_WORD] ; __m128i tmp1 = _mm_loadu_si128((__m128i *)(t1 + 8)); - movdqa xmm2, XMMWORD [esp + t1 + 48 * SIZEOF_WORD] ; __m128i tmp2 = _mm_loadu_si128((__m128i *)(t1 + 16)); - movdqa xmm3, XMMWORD [esp + t1 + 56 * SIZEOF_WORD] ; __m128i tmp3 = _mm_loadu_si128((__m128i *)(t1 + 24)); - pcmpeqw xmm0, xmm7 ; tmp0 = _mm_cmpeq_epi16(tmp0, zero); - pcmpeqw xmm1, xmm7 ; tmp1 = _mm_cmpeq_epi16(tmp1, zero); - pcmpeqw xmm2, xmm7 ; tmp2 = _mm_cmpeq_epi16(tmp2, zero); - pcmpeqw xmm3, xmm7 ; tmp3 = _mm_cmpeq_epi16(tmp3, zero); - packsswb xmm0, xmm1 ; tmp0 = _mm_packs_epi16(tmp0, tmp1); - packsswb xmm2, xmm3 ; tmp2 = _mm_packs_epi16(tmp2, tmp3); - pmovmskb edx, xmm0 ; index = ((uint64_t)_mm_movemask_epi8(tmp0)) << 0; - pmovmskb ecx, xmm2 ; index = ((uint64_t)_mm_movemask_epi8(tmp2)) << 16; - shl ecx, 16 - or edx, ecx - not edx ; index = ~index; - - lea eax, [esp + t1 + (DCTSIZE2/2) * 2] - sub eax, esi - shr eax, 1 - bsf ecx, edx ; r = __builtin_ctzl(index); - jz near .ELOOP2 - shr edx, cl ; index >>= r; - add ecx, eax - lea esi, [esi+ecx*2] ; k += r; - mov DWORD [esp+temp3], edx - jmp .BRLOOP2 -.BLOOP2: - bsf ecx, edx ; r = __builtin_ctzl(index); - jz near .ELOOP2 - lea esi, [esi+ecx*2] ; k += r; - shr edx, cl ; index >>= r; - mov DWORD [esp+temp3], edx -.BRLOOP2: - cmp ecx, 16 ; while (r > 15) { - jl near .ERLOOP2 - sub ecx, 16 ; r -= 16; - mov DWORD [esp+temp], ecx - mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0]; - movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0]; - EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0) - mov ecx, DWORD [esp+temp] - jmp .BRLOOP2 -.ERLOOP2: - movsx eax, word [esi] ; temp = t1[k]; - bsr eax, eax ; nbits = 32 - __builtin_clz(temp); - inc eax - mov DWORD [esp+temp2], eax - ; Emit Huffman symbol for run length / number of bits - shl ecx, 4 ; temp3 = (r << 4) + nbits; - add ecx, eax - mov eax, INT [ebp + ecx * 4] ; code = actbl->ehufco[temp3]; - movzx ecx, byte [ebp + ecx + 1024] ; size = actbl->ehufsi[temp3]; - EMIT_BITS eax - - movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k]; - ; Mask off any extra bits in code - mov ecx, DWORD [esp+temp2] - mov eax, 1 - shl eax, cl - dec eax - and eax, edx ; temp2 &= (((JLONG) 1)<>= 1; - - jmp .BLOOP2 -.ELOOP2: - ; If the last coef(s) were zero, emit an end-of-block code - lea edx, [esp + t1 + (DCTSIZE2-1) * 2] ; r = DCTSIZE2-1-k; - cmp edx, esi ; if (r > 0) { - je .EFN - mov eax, INT [ebp] ; code = actbl->ehufco[0]; - movzx ecx, byte [ebp + 1024] ; size = actbl->ehufsi[0]; - EMIT_BITS eax -.EFN: - mov eax, [esp+buffer] - pop esi - ; Save put_buffer & put_bits - mov DWORD [esi+8], put_buffer ; state->cur.put_buffer = put_buffer; - mov DWORD [esi+12], put_bits ; state->cur.put_bits = put_bits; - - pop ebp - pop edi - pop esi -; pop edx ; need not be preserved - pop ecx - pop ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jcolsamp.inc b/simd/jcolsamp.inc deleted file mode 100644 index 3be446e..0000000 --- a/simd/jcolsamp.inc +++ /dev/null @@ -1,104 +0,0 @@ -; -; jcolsamp.inc - private declarations for color conversion & up/downsampling -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; [TAB8] - -; -------------------------------------------------------------------------- - -; pseudo-resisters to make ordering of RGB configurable -; -%if RGB_RED == 0 -%define mmA mm0 -%define mmB mm1 -%define xmmA xmm0 -%define xmmB xmm1 -%elif RGB_GREEN == 0 -%define mmA mm2 -%define mmB mm3 -%define xmmA xmm2 -%define xmmB xmm3 -%elif RGB_BLUE == 0 -%define mmA mm4 -%define mmB mm5 -%define xmmA xmm4 -%define xmmB xmm5 -%else -%define mmA mm6 -%define mmB mm7 -%define xmmA xmm6 -%define xmmB xmm7 -%endif - -%if RGB_RED == 1 -%define mmC mm0 -%define mmD mm1 -%define xmmC xmm0 -%define xmmD xmm1 -%elif RGB_GREEN == 1 -%define mmC mm2 -%define mmD mm3 -%define xmmC xmm2 -%define xmmD xmm3 -%elif RGB_BLUE == 1 -%define mmC mm4 -%define mmD mm5 -%define xmmC xmm4 -%define xmmD xmm5 -%else -%define mmC mm6 -%define mmD mm7 -%define xmmC xmm6 -%define xmmD xmm7 -%endif - -%if RGB_RED == 2 -%define mmE mm0 -%define mmF mm1 -%define xmmE xmm0 -%define xmmF xmm1 -%elif RGB_GREEN == 2 -%define mmE mm2 -%define mmF mm3 -%define xmmE xmm2 -%define xmmF xmm3 -%elif RGB_BLUE == 2 -%define mmE mm4 -%define mmF mm5 -%define xmmE xmm4 -%define xmmF xmm5 -%else -%define mmE mm6 -%define mmF mm7 -%define xmmE xmm6 -%define xmmF xmm7 -%endif - -%if RGB_RED == 3 -%define mmG mm0 -%define mmH mm1 -%define xmmG xmm0 -%define xmmH xmm1 -%elif RGB_GREEN == 3 -%define mmG mm2 -%define mmH mm3 -%define xmmG xmm2 -%define xmmH xmm3 -%elif RGB_BLUE == 3 -%define mmG mm4 -%define mmH mm5 -%define xmmG xmm4 -%define xmmH xmm5 -%else -%define mmG mm6 -%define mmH mm7 -%define xmmG xmm6 -%define xmmH xmm7 -%endif - -; -------------------------------------------------------------------------- diff --git a/simd/jcsample-mmx.asm b/simd/jcsample-mmx.asm deleted file mode 100644 index 6cd544e..0000000 --- a/simd/jcsample-mmx.asm +++ /dev/null @@ -1,323 +0,0 @@ -; -; jcsample.asm - downsampling (MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Downsample pixel values of a single component. -; This version handles the common case of 2:1 horizontal and 1:1 vertical, -; without smoothing. -; -; GLOBAL(void) -; jsimd_h2v1_downsample_mmx (JDIMENSION image_width, int max_v_samp_factor, -; JDIMENSION v_samp_factor, JDIMENSION width_blocks, -; JSAMPARRAY input_data, JSAMPARRAY output_data); -; - -%define img_width(b) (b)+8 ; JDIMENSION image_width -%define max_v_samp(b) (b)+12 ; int max_v_samp_factor -%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor -%define width_blks(b) (b)+20 ; JDIMENSION width_blocks -%define input_data(b) (b)+24 ; JSAMPARRAY input_data -%define output_data(b) (b)+28 ; JSAMPARRAY output_data - - align 16 - global EXTN(jsimd_h2v1_downsample_mmx) - -EXTN(jsimd_h2v1_downsample_mmx): - push ebp - mov ebp,esp -; push ebx ; unused -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - mov ecx, JDIMENSION [width_blks(ebp)] - shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) - jz near .return - - mov edx, JDIMENSION [img_width(ebp)] - - ; -- expand_right_edge - - push ecx - shl ecx,1 ; output_cols * 2 - sub ecx,edx - jle short .expand_end - - mov eax, INT [max_v_samp(ebp)] - test eax,eax - jle short .expand_end - - cld - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - alignx 16,7 -.expandloop: - push eax - push ecx - - mov edi, JSAMPROW [esi] - add edi,edx - mov al, JSAMPLE [edi-1] - - rep stosb - - pop ecx - pop eax - - add esi, byte SIZEOF_JSAMPROW - dec eax - jg short .expandloop - -.expand_end: - pop ecx ; output_cols - - ; -- h2v1_downsample - - mov eax, JDIMENSION [v_samp(ebp)] ; rowctr - test eax,eax - jle near .return - - mov edx, 0x00010000 ; bias pattern - movd mm7,edx - pcmpeqw mm6,mm6 - punpckldq mm7,mm7 ; mm7={0, 1, 0, 1} - psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} - - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - mov edi, JSAMPARRAY [output_data(ebp)] ; output_data - alignx 16,7 -.rowloop: - push ecx - push edi - push esi - - mov esi, JSAMPROW [esi] ; inptr - mov edi, JSAMPROW [edi] ; outptr - alignx 16,7 -.columnloop: - - movq mm0, MMWORD [esi+0*SIZEOF_MMWORD] - movq mm1, MMWORD [esi+1*SIZEOF_MMWORD] - movq mm2,mm0 - movq mm3,mm1 - - pand mm0,mm6 - psrlw mm2,BYTE_BIT - pand mm1,mm6 - psrlw mm3,BYTE_BIT - - paddw mm0,mm2 - paddw mm1,mm3 - paddw mm0,mm7 - paddw mm1,mm7 - psrlw mm0,1 - psrlw mm1,1 - - packuswb mm0,mm1 - - movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 - - add esi, byte 2*SIZEOF_MMWORD ; inptr - add edi, byte 1*SIZEOF_MMWORD ; outptr - sub ecx, byte SIZEOF_MMWORD ; outcol - jnz short .columnloop - - pop esi - pop edi - pop ecx - - add esi, byte SIZEOF_JSAMPROW ; input_data - add edi, byte SIZEOF_JSAMPROW ; output_data - dec eax ; rowctr - jg short .rowloop - - emms ; empty MMX state - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved -; pop ebx ; unused - pop ebp - ret - -; -------------------------------------------------------------------------- -; -; Downsample pixel values of a single component. -; This version handles the standard case of 2:1 horizontal and 2:1 vertical, -; without smoothing. -; -; GLOBAL(void) -; jsimd_h2v2_downsample_mmx (JDIMENSION image_width, int max_v_samp_factor, -; JDIMENSION v_samp_factor, JDIMENSION width_blocks, -; JSAMPARRAY input_data, JSAMPARRAY output_data); -; - -%define img_width(b) (b)+8 ; JDIMENSION image_width -%define max_v_samp(b) (b)+12 ; int max_v_samp_factor -%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor -%define width_blks(b) (b)+20 ; JDIMENSION width_blocks -%define input_data(b) (b)+24 ; JSAMPARRAY input_data -%define output_data(b) (b)+28 ; JSAMPARRAY output_data - - align 16 - global EXTN(jsimd_h2v2_downsample_mmx) - -EXTN(jsimd_h2v2_downsample_mmx): - push ebp - mov ebp,esp -; push ebx ; unused -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - mov ecx, JDIMENSION [width_blks(ebp)] - shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) - jz near .return - - mov edx, JDIMENSION [img_width(ebp)] - - ; -- expand_right_edge - - push ecx - shl ecx,1 ; output_cols * 2 - sub ecx,edx - jle short .expand_end - - mov eax, INT [max_v_samp(ebp)] - test eax,eax - jle short .expand_end - - cld - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - alignx 16,7 -.expandloop: - push eax - push ecx - - mov edi, JSAMPROW [esi] - add edi,edx - mov al, JSAMPLE [edi-1] - - rep stosb - - pop ecx - pop eax - - add esi, byte SIZEOF_JSAMPROW - dec eax - jg short .expandloop - -.expand_end: - pop ecx ; output_cols - - ; -- h2v2_downsample - - mov eax, JDIMENSION [v_samp(ebp)] ; rowctr - test eax,eax - jle near .return - - mov edx, 0x00020001 ; bias pattern - movd mm7,edx - pcmpeqw mm6,mm6 - punpckldq mm7,mm7 ; mm7={1, 2, 1, 2} - psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} - - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - mov edi, JSAMPARRAY [output_data(ebp)] ; output_data - alignx 16,7 -.rowloop: - push ecx - push edi - push esi - - mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 - mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1 - mov edi, JSAMPROW [edi] ; outptr - alignx 16,7 -.columnloop: - - movq mm0, MMWORD [edx+0*SIZEOF_MMWORD] - movq mm1, MMWORD [esi+0*SIZEOF_MMWORD] - movq mm2, MMWORD [edx+1*SIZEOF_MMWORD] - movq mm3, MMWORD [esi+1*SIZEOF_MMWORD] - - movq mm4,mm0 - movq mm5,mm1 - pand mm0,mm6 - psrlw mm4,BYTE_BIT - pand mm1,mm6 - psrlw mm5,BYTE_BIT - paddw mm0,mm4 - paddw mm1,mm5 - - movq mm4,mm2 - movq mm5,mm3 - pand mm2,mm6 - psrlw mm4,BYTE_BIT - pand mm3,mm6 - psrlw mm5,BYTE_BIT - paddw mm2,mm4 - paddw mm3,mm5 - - paddw mm0,mm1 - paddw mm2,mm3 - paddw mm0,mm7 - paddw mm2,mm7 - psrlw mm0,2 - psrlw mm2,2 - - packuswb mm0,mm2 - - movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 - - add edx, byte 2*SIZEOF_MMWORD ; inptr0 - add esi, byte 2*SIZEOF_MMWORD ; inptr1 - add edi, byte 1*SIZEOF_MMWORD ; outptr - sub ecx, byte SIZEOF_MMWORD ; outcol - jnz near .columnloop - - pop esi - pop edi - pop ecx - - add esi, byte 2*SIZEOF_JSAMPROW ; input_data - add edi, byte 1*SIZEOF_JSAMPROW ; output_data - dec eax ; rowctr - jg near .rowloop - - emms ; empty MMX state - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved -; pop ebx ; unused - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jcsample-sse2-64.asm b/simd/jcsample-sse2-64.asm deleted file mode 100644 index 40ee15f..0000000 --- a/simd/jcsample-sse2-64.asm +++ /dev/null @@ -1,329 +0,0 @@ -; -; jcsample.asm - downsampling (64-bit SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 64 -; -; Downsample pixel values of a single component. -; This version handles the common case of 2:1 horizontal and 1:1 vertical, -; without smoothing. -; -; GLOBAL(void) -; jsimd_h2v1_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor, -; JDIMENSION v_samp_factor, JDIMENSION width_blocks, -; JSAMPARRAY input_data, JSAMPARRAY output_data); -; - -; r10 = JDIMENSION image_width -; r11 = int max_v_samp_factor -; r12 = JDIMENSION v_samp_factor -; r13 = JDIMENSION width_blocks -; r14 = JSAMPARRAY input_data -; r15 = JSAMPARRAY output_data - - align 16 - global EXTN(jsimd_h2v1_downsample_sse2) - -EXTN(jsimd_h2v1_downsample_sse2): - push rbp - mov rax,rsp - mov rbp,rsp - collect_args - - mov ecx, r13d - shl rcx,3 ; imul rcx,DCTSIZE (rcx = output_cols) - jz near .return - - mov edx, r10d - - ; -- expand_right_edge - - push rcx - shl rcx,1 ; output_cols * 2 - sub rcx,rdx - jle short .expand_end - - mov rax, r11 - test rax,rax - jle short .expand_end - - cld - mov rsi, r14 ; input_data -.expandloop: - push rax - push rcx - - mov rdi, JSAMPROW [rsi] - add rdi,rdx - mov al, JSAMPLE [rdi-1] - - rep stosb - - pop rcx - pop rax - - add rsi, byte SIZEOF_JSAMPROW - dec rax - jg short .expandloop - -.expand_end: - pop rcx ; output_cols - - ; -- h2v1_downsample - - mov eax, r12d ; rowctr - test eax,eax - jle near .return - - mov rdx, 0x00010000 ; bias pattern - movd xmm7,edx - pcmpeqw xmm6,xmm6 - pshufd xmm7,xmm7,0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1} - psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} - - mov rsi, r14 ; input_data - mov rdi, r15 ; output_data -.rowloop: - push rcx - push rdi - push rsi - - mov rsi, JSAMPROW [rsi] ; inptr - mov rdi, JSAMPROW [rdi] ; outptr - - cmp rcx, byte SIZEOF_XMMWORD - jae short .columnloop - -.columnloop_r8: - movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] - pxor xmm1,xmm1 - mov rcx, SIZEOF_XMMWORD - jmp short .downsample - -.columnloop: - movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] - movdqa xmm1, XMMWORD [rsi+1*SIZEOF_XMMWORD] - -.downsample: - movdqa xmm2,xmm0 - movdqa xmm3,xmm1 - - pand xmm0,xmm6 - psrlw xmm2,BYTE_BIT - pand xmm1,xmm6 - psrlw xmm3,BYTE_BIT - - paddw xmm0,xmm2 - paddw xmm1,xmm3 - paddw xmm0,xmm7 - paddw xmm1,xmm7 - psrlw xmm0,1 - psrlw xmm1,1 - - packuswb xmm0,xmm1 - - movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 - - sub rcx, byte SIZEOF_XMMWORD ; outcol - add rsi, byte 2*SIZEOF_XMMWORD ; inptr - add rdi, byte 1*SIZEOF_XMMWORD ; outptr - cmp rcx, byte SIZEOF_XMMWORD - jae short .columnloop - test rcx,rcx - jnz short .columnloop_r8 - - pop rsi - pop rdi - pop rcx - - add rsi, byte SIZEOF_JSAMPROW ; input_data - add rdi, byte SIZEOF_JSAMPROW ; output_data - dec rax ; rowctr - jg near .rowloop - -.return: - uncollect_args - pop rbp - ret - -; -------------------------------------------------------------------------- -; -; Downsample pixel values of a single component. -; This version handles the standard case of 2:1 horizontal and 2:1 vertical, -; without smoothing. -; -; GLOBAL(void) -; jsimd_h2v2_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor, -; JDIMENSION v_samp_factor, JDIMENSION width_blocks, -; JSAMPARRAY input_data, JSAMPARRAY output_data); -; - -; r10 = JDIMENSION image_width -; r11 = int max_v_samp_factor -; r12 = JDIMENSION v_samp_factor -; r13 = JDIMENSION width_blocks -; r14 = JSAMPARRAY input_data -; r15 = JSAMPARRAY output_data - - align 16 - global EXTN(jsimd_h2v2_downsample_sse2) - -EXTN(jsimd_h2v2_downsample_sse2): - push rbp - mov rax,rsp - mov rbp,rsp - collect_args - - mov ecx, r13d - shl rcx,3 ; imul rcx,DCTSIZE (rcx = output_cols) - jz near .return - - mov edx, r10d - - ; -- expand_right_edge - - push rcx - shl rcx,1 ; output_cols * 2 - sub rcx,rdx - jle short .expand_end - - mov rax, r11 - test rax,rax - jle short .expand_end - - cld - mov rsi, r14 ; input_data -.expandloop: - push rax - push rcx - - mov rdi, JSAMPROW [rsi] - add rdi,rdx - mov al, JSAMPLE [rdi-1] - - rep stosb - - pop rcx - pop rax - - add rsi, byte SIZEOF_JSAMPROW - dec rax - jg short .expandloop - -.expand_end: - pop rcx ; output_cols - - ; -- h2v2_downsample - - mov eax, r12d ; rowctr - test rax,rax - jle near .return - - mov rdx, 0x00020001 ; bias pattern - movd xmm7,edx - pcmpeqw xmm6,xmm6 - pshufd xmm7,xmm7,0x00 ; xmm7={1, 2, 1, 2, 1, 2, 1, 2} - psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} - - mov rsi, r14 ; input_data - mov rdi, r15 ; output_data -.rowloop: - push rcx - push rdi - push rsi - - mov rdx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0 - mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1 - mov rdi, JSAMPROW [rdi] ; outptr - - cmp rcx, byte SIZEOF_XMMWORD - jae short .columnloop - -.columnloop_r8: - movdqa xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD] - movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD] - pxor xmm2,xmm2 - pxor xmm3,xmm3 - mov rcx, SIZEOF_XMMWORD - jmp short .downsample - -.columnloop: - movdqa xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD] - movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD] - movdqa xmm2, XMMWORD [rdx+1*SIZEOF_XMMWORD] - movdqa xmm3, XMMWORD [rsi+1*SIZEOF_XMMWORD] - -.downsample: - movdqa xmm4,xmm0 - movdqa xmm5,xmm1 - pand xmm0,xmm6 - psrlw xmm4,BYTE_BIT - pand xmm1,xmm6 - psrlw xmm5,BYTE_BIT - paddw xmm0,xmm4 - paddw xmm1,xmm5 - - movdqa xmm4,xmm2 - movdqa xmm5,xmm3 - pand xmm2,xmm6 - psrlw xmm4,BYTE_BIT - pand xmm3,xmm6 - psrlw xmm5,BYTE_BIT - paddw xmm2,xmm4 - paddw xmm3,xmm5 - - paddw xmm0,xmm1 - paddw xmm2,xmm3 - paddw xmm0,xmm7 - paddw xmm2,xmm7 - psrlw xmm0,2 - psrlw xmm2,2 - - packuswb xmm0,xmm2 - - movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 - - sub rcx, byte SIZEOF_XMMWORD ; outcol - add rdx, byte 2*SIZEOF_XMMWORD ; inptr0 - add rsi, byte 2*SIZEOF_XMMWORD ; inptr1 - add rdi, byte 1*SIZEOF_XMMWORD ; outptr - cmp rcx, byte SIZEOF_XMMWORD - jae near .columnloop - test rcx,rcx - jnz near .columnloop_r8 - - pop rsi - pop rdi - pop rcx - - add rsi, byte 2*SIZEOF_JSAMPROW ; input_data - add rdi, byte 1*SIZEOF_JSAMPROW ; output_data - dec rax ; rowctr - jg near .rowloop - -.return: - uncollect_args - pop rbp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jcsample-sse2.asm b/simd/jcsample-sse2.asm deleted file mode 100644 index 83c9d15..0000000 --- a/simd/jcsample-sse2.asm +++ /dev/null @@ -1,350 +0,0 @@ -; -; jcsample.asm - downsampling (SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Downsample pixel values of a single component. -; This version handles the common case of 2:1 horizontal and 1:1 vertical, -; without smoothing. -; -; GLOBAL(void) -; jsimd_h2v1_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor, -; JDIMENSION v_samp_factor, JDIMENSION width_blocks, -; JSAMPARRAY input_data, JSAMPARRAY output_data); -; - -%define img_width(b) (b)+8 ; JDIMENSION image_width -%define max_v_samp(b) (b)+12 ; int max_v_samp_factor -%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor -%define width_blks(b) (b)+20 ; JDIMENSION width_blocks -%define input_data(b) (b)+24 ; JSAMPARRAY input_data -%define output_data(b) (b)+28 ; JSAMPARRAY output_data - - align 16 - global EXTN(jsimd_h2v1_downsample_sse2) - -EXTN(jsimd_h2v1_downsample_sse2): - push ebp - mov ebp,esp -; push ebx ; unused -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - mov ecx, JDIMENSION [width_blks(ebp)] - shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) - jz near .return - - mov edx, JDIMENSION [img_width(ebp)] - - ; -- expand_right_edge - - push ecx - shl ecx,1 ; output_cols * 2 - sub ecx,edx - jle short .expand_end - - mov eax, INT [max_v_samp(ebp)] - test eax,eax - jle short .expand_end - - cld - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - alignx 16,7 -.expandloop: - push eax - push ecx - - mov edi, JSAMPROW [esi] - add edi,edx - mov al, JSAMPLE [edi-1] - - rep stosb - - pop ecx - pop eax - - add esi, byte SIZEOF_JSAMPROW - dec eax - jg short .expandloop - -.expand_end: - pop ecx ; output_cols - - ; -- h2v1_downsample - - mov eax, JDIMENSION [v_samp(ebp)] ; rowctr - test eax,eax - jle near .return - - mov edx, 0x00010000 ; bias pattern - movd xmm7,edx - pcmpeqw xmm6,xmm6 - pshufd xmm7,xmm7,0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1} - psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} - - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - mov edi, JSAMPARRAY [output_data(ebp)] ; output_data - alignx 16,7 -.rowloop: - push ecx - push edi - push esi - - mov esi, JSAMPROW [esi] ; inptr - mov edi, JSAMPROW [edi] ; outptr - - cmp ecx, byte SIZEOF_XMMWORD - jae short .columnloop - alignx 16,7 - -.columnloop_r8: - movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] - pxor xmm1,xmm1 - mov ecx, SIZEOF_XMMWORD - jmp short .downsample - alignx 16,7 - -.columnloop: - movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] - movdqa xmm1, XMMWORD [esi+1*SIZEOF_XMMWORD] - -.downsample: - movdqa xmm2,xmm0 - movdqa xmm3,xmm1 - - pand xmm0,xmm6 - psrlw xmm2,BYTE_BIT - pand xmm1,xmm6 - psrlw xmm3,BYTE_BIT - - paddw xmm0,xmm2 - paddw xmm1,xmm3 - paddw xmm0,xmm7 - paddw xmm1,xmm7 - psrlw xmm0,1 - psrlw xmm1,1 - - packuswb xmm0,xmm1 - - movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 - - sub ecx, byte SIZEOF_XMMWORD ; outcol - add esi, byte 2*SIZEOF_XMMWORD ; inptr - add edi, byte 1*SIZEOF_XMMWORD ; outptr - cmp ecx, byte SIZEOF_XMMWORD - jae short .columnloop - test ecx,ecx - jnz short .columnloop_r8 - - pop esi - pop edi - pop ecx - - add esi, byte SIZEOF_JSAMPROW ; input_data - add edi, byte SIZEOF_JSAMPROW ; output_data - dec eax ; rowctr - jg near .rowloop - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved -; pop ebx ; unused - pop ebp - ret - -; -------------------------------------------------------------------------- -; -; Downsample pixel values of a single component. -; This version handles the standard case of 2:1 horizontal and 2:1 vertical, -; without smoothing. -; -; GLOBAL(void) -; jsimd_h2v2_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor, -; JDIMENSION v_samp_factor, JDIMENSION width_blocks, -; JSAMPARRAY input_data, JSAMPARRAY output_data); -; - -%define img_width(b) (b)+8 ; JDIMENSION image_width -%define max_v_samp(b) (b)+12 ; int max_v_samp_factor -%define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor -%define width_blks(b) (b)+20 ; JDIMENSION width_blocks -%define input_data(b) (b)+24 ; JSAMPARRAY input_data -%define output_data(b) (b)+28 ; JSAMPARRAY output_data - - align 16 - global EXTN(jsimd_h2v2_downsample_sse2) - -EXTN(jsimd_h2v2_downsample_sse2): - push ebp - mov ebp,esp -; push ebx ; unused -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - mov ecx, JDIMENSION [width_blks(ebp)] - shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols) - jz near .return - - mov edx, JDIMENSION [img_width(ebp)] - - ; -- expand_right_edge - - push ecx - shl ecx,1 ; output_cols * 2 - sub ecx,edx - jle short .expand_end - - mov eax, INT [max_v_samp(ebp)] - test eax,eax - jle short .expand_end - - cld - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - alignx 16,7 -.expandloop: - push eax - push ecx - - mov edi, JSAMPROW [esi] - add edi,edx - mov al, JSAMPLE [edi-1] - - rep stosb - - pop ecx - pop eax - - add esi, byte SIZEOF_JSAMPROW - dec eax - jg short .expandloop - -.expand_end: - pop ecx ; output_cols - - ; -- h2v2_downsample - - mov eax, JDIMENSION [v_samp(ebp)] ; rowctr - test eax,eax - jle near .return - - mov edx, 0x00020001 ; bias pattern - movd xmm7,edx - pcmpeqw xmm6,xmm6 - pshufd xmm7,xmm7,0x00 ; xmm7={1, 2, 1, 2, 1, 2, 1, 2} - psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} - - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - mov edi, JSAMPARRAY [output_data(ebp)] ; output_data - alignx 16,7 -.rowloop: - push ecx - push edi - push esi - - mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 - mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1 - mov edi, JSAMPROW [edi] ; outptr - - cmp ecx, byte SIZEOF_XMMWORD - jae short .columnloop - alignx 16,7 - -.columnloop_r8: - movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD] - movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] - pxor xmm2,xmm2 - pxor xmm3,xmm3 - mov ecx, SIZEOF_XMMWORD - jmp short .downsample - alignx 16,7 - -.columnloop: - movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD] - movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] - movdqa xmm2, XMMWORD [edx+1*SIZEOF_XMMWORD] - movdqa xmm3, XMMWORD [esi+1*SIZEOF_XMMWORD] - -.downsample: - movdqa xmm4,xmm0 - movdqa xmm5,xmm1 - pand xmm0,xmm6 - psrlw xmm4,BYTE_BIT - pand xmm1,xmm6 - psrlw xmm5,BYTE_BIT - paddw xmm0,xmm4 - paddw xmm1,xmm5 - - movdqa xmm4,xmm2 - movdqa xmm5,xmm3 - pand xmm2,xmm6 - psrlw xmm4,BYTE_BIT - pand xmm3,xmm6 - psrlw xmm5,BYTE_BIT - paddw xmm2,xmm4 - paddw xmm3,xmm5 - - paddw xmm0,xmm1 - paddw xmm2,xmm3 - paddw xmm0,xmm7 - paddw xmm2,xmm7 - psrlw xmm0,2 - psrlw xmm2,2 - - packuswb xmm0,xmm2 - - movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 - - sub ecx, byte SIZEOF_XMMWORD ; outcol - add edx, byte 2*SIZEOF_XMMWORD ; inptr0 - add esi, byte 2*SIZEOF_XMMWORD ; inptr1 - add edi, byte 1*SIZEOF_XMMWORD ; outptr - cmp ecx, byte SIZEOF_XMMWORD - jae near .columnloop - test ecx,ecx - jnz near .columnloop_r8 - - pop esi - pop edi - pop ecx - - add esi, byte 2*SIZEOF_JSAMPROW ; input_data - add edi, byte 1*SIZEOF_JSAMPROW ; output_data - dec eax ; rowctr - jg near .rowloop - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved -; pop ebx ; unused - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jdcolext-mmx.asm b/simd/jdcolext-mmx.asm deleted file mode 100644 index 21e34f6..0000000 --- a/simd/jdcolext-mmx.asm +++ /dev/null @@ -1,404 +0,0 @@ -; -; jdcolext.asm - colorspace conversion (MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jcolsamp.inc" - -; -------------------------------------------------------------------------- -; -; Convert some rows of samples to the output colorspace. -; -; GLOBAL(void) -; jsimd_ycc_rgb_convert_mmx (JDIMENSION out_width, -; JSAMPIMAGE input_buf, JDIMENSION input_row, -; JSAMPARRAY output_buf, int num_rows) -; - -%define out_width(b) (b)+8 ; JDIMENSION out_width -%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf -%define input_row(b) (b)+16 ; JDIMENSION input_row -%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf -%define num_rows(b) (b)+24 ; int num_rows - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] -%define WK_NUM 2 -%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr - - align 16 - global EXTN(jsimd_ycc_rgb_convert_mmx) - -EXTN(jsimd_ycc_rgb_convert_mmx): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic eax ; make a room for GOT address - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - movpic POINTER [gotptr], ebx ; save GOT address - - mov ecx, JDIMENSION [out_width(eax)] ; num_cols - test ecx,ecx - jz near .return - - push ecx - - mov edi, JSAMPIMAGE [input_buf(eax)] - mov ecx, JDIMENSION [input_row(eax)] - mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] - mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] - mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] - lea esi, [esi+ecx*SIZEOF_JSAMPROW] - lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] - lea edx, [edx+ecx*SIZEOF_JSAMPROW] - - pop ecx - - mov edi, JSAMPARRAY [output_buf(eax)] - mov eax, INT [num_rows(eax)] - test eax,eax - jle near .return - alignx 16,7 -.rowloop: - push eax - push edi - push edx - push ebx - push esi - push ecx ; col - - mov esi, JSAMPROW [esi] ; inptr0 - mov ebx, JSAMPROW [ebx] ; inptr1 - mov edx, JSAMPROW [edx] ; inptr2 - mov edi, JSAMPROW [edi] ; outptr - movpic eax, POINTER [gotptr] ; load GOT address (eax) - alignx 16,7 -.columnloop: - - movq mm5, MMWORD [ebx] ; mm5=Cb(01234567) - movq mm1, MMWORD [edx] ; mm1=Cr(01234567) - - pcmpeqw mm4,mm4 - pcmpeqw mm7,mm7 - psrlw mm4,BYTE_BIT - psllw mm7,7 ; mm7={0xFF80 0xFF80 0xFF80 0xFF80} - movq mm0,mm4 ; mm0=mm4={0xFF 0x00 0xFF 0x00 ..} - - pand mm4,mm5 ; mm4=Cb(0246)=CbE - psrlw mm5,BYTE_BIT ; mm5=Cb(1357)=CbO - pand mm0,mm1 ; mm0=Cr(0246)=CrE - psrlw mm1,BYTE_BIT ; mm1=Cr(1357)=CrO - - paddw mm4,mm7 - paddw mm5,mm7 - paddw mm0,mm7 - paddw mm1,mm7 - - ; (Original) - ; R = Y + 1.40200 * Cr - ; G = Y - 0.34414 * Cb - 0.71414 * Cr - ; B = Y + 1.77200 * Cb - ; - ; (This implementation) - ; R = Y + 0.40200 * Cr + Cr - ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr - ; B = Y - 0.22800 * Cb + Cb + Cb - - movq mm2,mm4 ; mm2=CbE - movq mm3,mm5 ; mm3=CbO - paddw mm4,mm4 ; mm4=2*CbE - paddw mm5,mm5 ; mm5=2*CbO - movq mm6,mm0 ; mm6=CrE - movq mm7,mm1 ; mm7=CrO - paddw mm0,mm0 ; mm0=2*CrE - paddw mm1,mm1 ; mm1=2*CrO - - pmulhw mm4,[GOTOFF(eax,PW_MF0228)] ; mm4=(2*CbE * -FIX(0.22800)) - pmulhw mm5,[GOTOFF(eax,PW_MF0228)] ; mm5=(2*CbO * -FIX(0.22800)) - pmulhw mm0,[GOTOFF(eax,PW_F0402)] ; mm0=(2*CrE * FIX(0.40200)) - pmulhw mm1,[GOTOFF(eax,PW_F0402)] ; mm1=(2*CrO * FIX(0.40200)) - - paddw mm4,[GOTOFF(eax,PW_ONE)] - paddw mm5,[GOTOFF(eax,PW_ONE)] - psraw mm4,1 ; mm4=(CbE * -FIX(0.22800)) - psraw mm5,1 ; mm5=(CbO * -FIX(0.22800)) - paddw mm0,[GOTOFF(eax,PW_ONE)] - paddw mm1,[GOTOFF(eax,PW_ONE)] - psraw mm0,1 ; mm0=(CrE * FIX(0.40200)) - psraw mm1,1 ; mm1=(CrO * FIX(0.40200)) - - paddw mm4,mm2 - paddw mm5,mm3 - paddw mm4,mm2 ; mm4=(CbE * FIX(1.77200))=(B-Y)E - paddw mm5,mm3 ; mm5=(CbO * FIX(1.77200))=(B-Y)O - paddw mm0,mm6 ; mm0=(CrE * FIX(1.40200))=(R-Y)E - paddw mm1,mm7 ; mm1=(CrO * FIX(1.40200))=(R-Y)O - - movq MMWORD [wk(0)], mm4 ; wk(0)=(B-Y)E - movq MMWORD [wk(1)], mm5 ; wk(1)=(B-Y)O - - movq mm4,mm2 - movq mm5,mm3 - punpcklwd mm2,mm6 - punpckhwd mm4,mm6 - pmaddwd mm2,[GOTOFF(eax,PW_MF0344_F0285)] - pmaddwd mm4,[GOTOFF(eax,PW_MF0344_F0285)] - punpcklwd mm3,mm7 - punpckhwd mm5,mm7 - pmaddwd mm3,[GOTOFF(eax,PW_MF0344_F0285)] - pmaddwd mm5,[GOTOFF(eax,PW_MF0344_F0285)] - - paddd mm2,[GOTOFF(eax,PD_ONEHALF)] - paddd mm4,[GOTOFF(eax,PD_ONEHALF)] - psrad mm2,SCALEBITS - psrad mm4,SCALEBITS - paddd mm3,[GOTOFF(eax,PD_ONEHALF)] - paddd mm5,[GOTOFF(eax,PD_ONEHALF)] - psrad mm3,SCALEBITS - psrad mm5,SCALEBITS - - packssdw mm2,mm4 ; mm2=CbE*-FIX(0.344)+CrE*FIX(0.285) - packssdw mm3,mm5 ; mm3=CbO*-FIX(0.344)+CrO*FIX(0.285) - psubw mm2,mm6 ; mm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E - psubw mm3,mm7 ; mm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O - - movq mm5, MMWORD [esi] ; mm5=Y(01234567) - - pcmpeqw mm4,mm4 - psrlw mm4,BYTE_BIT ; mm4={0xFF 0x00 0xFF 0x00 ..} - pand mm4,mm5 ; mm4=Y(0246)=YE - psrlw mm5,BYTE_BIT ; mm5=Y(1357)=YO - - paddw mm0,mm4 ; mm0=((R-Y)E+YE)=RE=(R0 R2 R4 R6) - paddw mm1,mm5 ; mm1=((R-Y)O+YO)=RO=(R1 R3 R5 R7) - packuswb mm0,mm0 ; mm0=(R0 R2 R4 R6 ** ** ** **) - packuswb mm1,mm1 ; mm1=(R1 R3 R5 R7 ** ** ** **) - - paddw mm2,mm4 ; mm2=((G-Y)E+YE)=GE=(G0 G2 G4 G6) - paddw mm3,mm5 ; mm3=((G-Y)O+YO)=GO=(G1 G3 G5 G7) - packuswb mm2,mm2 ; mm2=(G0 G2 G4 G6 ** ** ** **) - packuswb mm3,mm3 ; mm3=(G1 G3 G5 G7 ** ** ** **) - - paddw mm4, MMWORD [wk(0)] ; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6) - paddw mm5, MMWORD [wk(1)] ; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7) - packuswb mm4,mm4 ; mm4=(B0 B2 B4 B6 ** ** ** **) - packuswb mm5,mm5 ; mm5=(B1 B3 B5 B7 ** ** ** **) - -%if RGB_PIXELSIZE == 3 ; --------------- - - ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) - ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) - ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) - ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **) - - punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16) - punpcklbw mmE,mmB ; mmE=(20 01 22 03 24 05 26 07) - punpcklbw mmD,mmF ; mmD=(11 21 13 23 15 25 17 27) - - movq mmG,mmA - movq mmH,mmA - punpcklwd mmA,mmE ; mmA=(00 10 20 01 02 12 22 03) - punpckhwd mmG,mmE ; mmG=(04 14 24 05 06 16 26 07) - - psrlq mmH,2*BYTE_BIT ; mmH=(02 12 04 14 06 16 -- --) - psrlq mmE,2*BYTE_BIT ; mmE=(22 03 24 05 26 07 -- --) - - movq mmC,mmD - movq mmB,mmD - punpcklwd mmD,mmH ; mmD=(11 21 02 12 13 23 04 14) - punpckhwd mmC,mmH ; mmC=(15 25 06 16 17 27 -- --) - - psrlq mmB,2*BYTE_BIT ; mmB=(13 23 15 25 17 27 -- --) - - movq mmF,mmE - punpcklwd mmE,mmB ; mmE=(22 03 13 23 24 05 15 25) - punpckhwd mmF,mmB ; mmF=(26 07 17 27 -- -- -- --) - - punpckldq mmA,mmD ; mmA=(00 10 20 01 11 21 02 12) - punpckldq mmE,mmG ; mmE=(22 03 13 23 04 14 24 05) - punpckldq mmC,mmF ; mmC=(15 25 06 16 26 07 17 27) - - cmp ecx, byte SIZEOF_MMWORD - jb short .column_st16 - - movq MMWORD [edi+0*SIZEOF_MMWORD], mmA - movq MMWORD [edi+1*SIZEOF_MMWORD], mmE - movq MMWORD [edi+2*SIZEOF_MMWORD], mmC - - sub ecx, byte SIZEOF_MMWORD - jz short .nextrow - - add esi, byte SIZEOF_MMWORD ; inptr0 - add ebx, byte SIZEOF_MMWORD ; inptr1 - add edx, byte SIZEOF_MMWORD ; inptr2 - add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr - jmp near .columnloop - alignx 16,7 - -.column_st16: - lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE - cmp ecx, byte 2*SIZEOF_MMWORD - jb short .column_st8 - movq MMWORD [edi+0*SIZEOF_MMWORD], mmA - movq MMWORD [edi+1*SIZEOF_MMWORD], mmE - movq mmA,mmC - sub ecx, byte 2*SIZEOF_MMWORD - add edi, byte 2*SIZEOF_MMWORD - jmp short .column_st4 -.column_st8: - cmp ecx, byte SIZEOF_MMWORD - jb short .column_st4 - movq MMWORD [edi+0*SIZEOF_MMWORD], mmA - movq mmA,mmE - sub ecx, byte SIZEOF_MMWORD - add edi, byte SIZEOF_MMWORD -.column_st4: - movd eax,mmA - cmp ecx, byte SIZEOF_DWORD - jb short .column_st2 - mov DWORD [edi+0*SIZEOF_DWORD], eax - psrlq mmA,DWORD_BIT - movd eax,mmA - sub ecx, byte SIZEOF_DWORD - add edi, byte SIZEOF_DWORD -.column_st2: - cmp ecx, byte SIZEOF_WORD - jb short .column_st1 - mov WORD [edi+0*SIZEOF_WORD], ax - shr eax,WORD_BIT - sub ecx, byte SIZEOF_WORD - add edi, byte SIZEOF_WORD -.column_st1: - cmp ecx, byte SIZEOF_BYTE - jb short .nextrow - mov BYTE [edi+0*SIZEOF_BYTE], al - -%else ; RGB_PIXELSIZE == 4 ; ----------- - -%ifdef RGBX_FILLER_0XFF - pcmpeqb mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) - pcmpeqb mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) -%else - pxor mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) - pxor mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) -%endif - ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) - ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) - ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) - ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **) - - punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16) - punpcklbw mmE,mmG ; mmE=(20 30 22 32 24 34 26 36) - punpcklbw mmB,mmD ; mmB=(01 11 03 13 05 15 07 17) - punpcklbw mmF,mmH ; mmF=(21 31 23 33 25 35 27 37) - - movq mmC,mmA - punpcklwd mmA,mmE ; mmA=(00 10 20 30 02 12 22 32) - punpckhwd mmC,mmE ; mmC=(04 14 24 34 06 16 26 36) - movq mmG,mmB - punpcklwd mmB,mmF ; mmB=(01 11 21 31 03 13 23 33) - punpckhwd mmG,mmF ; mmG=(05 15 25 35 07 17 27 37) - - movq mmD,mmA - punpckldq mmA,mmB ; mmA=(00 10 20 30 01 11 21 31) - punpckhdq mmD,mmB ; mmD=(02 12 22 32 03 13 23 33) - movq mmH,mmC - punpckldq mmC,mmG ; mmC=(04 14 24 34 05 15 25 35) - punpckhdq mmH,mmG ; mmH=(06 16 26 36 07 17 27 37) - - cmp ecx, byte SIZEOF_MMWORD - jb short .column_st16 - - movq MMWORD [edi+0*SIZEOF_MMWORD], mmA - movq MMWORD [edi+1*SIZEOF_MMWORD], mmD - movq MMWORD [edi+2*SIZEOF_MMWORD], mmC - movq MMWORD [edi+3*SIZEOF_MMWORD], mmH - - sub ecx, byte SIZEOF_MMWORD - jz short .nextrow - - add esi, byte SIZEOF_MMWORD ; inptr0 - add ebx, byte SIZEOF_MMWORD ; inptr1 - add edx, byte SIZEOF_MMWORD ; inptr2 - add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr - jmp near .columnloop - alignx 16,7 - -.column_st16: - cmp ecx, byte SIZEOF_MMWORD/2 - jb short .column_st8 - movq MMWORD [edi+0*SIZEOF_MMWORD], mmA - movq MMWORD [edi+1*SIZEOF_MMWORD], mmD - movq mmA,mmC - movq mmD,mmH - sub ecx, byte SIZEOF_MMWORD/2 - add edi, byte 2*SIZEOF_MMWORD -.column_st8: - cmp ecx, byte SIZEOF_MMWORD/4 - jb short .column_st4 - movq MMWORD [edi+0*SIZEOF_MMWORD], mmA - movq mmA,mmD - sub ecx, byte SIZEOF_MMWORD/4 - add edi, byte 1*SIZEOF_MMWORD -.column_st4: - cmp ecx, byte SIZEOF_MMWORD/8 - jb short .nextrow - movd DWORD [edi+0*SIZEOF_DWORD], mmA - -%endif ; RGB_PIXELSIZE ; --------------- - - alignx 16,7 - -.nextrow: - pop ecx - pop esi - pop ebx - pop edx - pop edi - pop eax - - add esi, byte SIZEOF_JSAMPROW - add ebx, byte SIZEOF_JSAMPROW - add edx, byte SIZEOF_JSAMPROW - add edi, byte SIZEOF_JSAMPROW ; output_buf - dec eax ; num_rows - jg near .rowloop - - emms ; empty MMX state - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jdcolext-sse2-64.asm b/simd/jdcolext-sse2-64.asm deleted file mode 100644 index 4634066..0000000 --- a/simd/jdcolext-sse2-64.asm +++ /dev/null @@ -1,440 +0,0 @@ -; -; jdcolext.asm - colorspace conversion (64-bit SSE2) -; -; Copyright 2009, 2012 Pierre Ossman for Cendio AB -; Copyright (C) 2009, 2012, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jcolsamp.inc" - -; -------------------------------------------------------------------------- -; -; Convert some rows of samples to the output colorspace. -; -; GLOBAL(void) -; jsimd_ycc_rgb_convert_sse2 (JDIMENSION out_width, -; JSAMPIMAGE input_buf, JDIMENSION input_row, -; JSAMPARRAY output_buf, int num_rows) -; - -; r10 = JDIMENSION out_width -; r11 = JSAMPIMAGE input_buf -; r12 = JDIMENSION input_row -; r13 = JSAMPARRAY output_buf -; r14 = int num_rows - -%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 2 - - align 16 - global EXTN(jsimd_ycc_rgb_convert_sse2) - -EXTN(jsimd_ycc_rgb_convert_sse2): - push rbp - mov rax,rsp ; rax = original rbp - sub rsp, byte 4 - and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [rsp],rax - mov rbp,rsp ; rbp = aligned rbp - lea rsp, [wk(0)] - collect_args - push rbx - - mov ecx, r10d ; num_cols - test rcx,rcx - jz near .return - - push rcx - - mov rdi, r11 - mov ecx, r12d - mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] - mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] - mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] - lea rsi, [rsi+rcx*SIZEOF_JSAMPROW] - lea rbx, [rbx+rcx*SIZEOF_JSAMPROW] - lea rdx, [rdx+rcx*SIZEOF_JSAMPROW] - - pop rcx - - mov rdi, r13 - mov eax, r14d - test rax,rax - jle near .return -.rowloop: - push rax - push rdi - push rdx - push rbx - push rsi - push rcx ; col - - mov rsi, JSAMPROW [rsi] ; inptr0 - mov rbx, JSAMPROW [rbx] ; inptr1 - mov rdx, JSAMPROW [rdx] ; inptr2 - mov rdi, JSAMPROW [rdi] ; outptr -.columnloop: - - movdqa xmm5, XMMWORD [rbx] ; xmm5=Cb(0123456789ABCDEF) - movdqa xmm1, XMMWORD [rdx] ; xmm1=Cr(0123456789ABCDEF) - - pcmpeqw xmm4,xmm4 - pcmpeqw xmm7,xmm7 - psrlw xmm4,BYTE_BIT - psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} - movdqa xmm0,xmm4 ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..} - - pand xmm4,xmm5 ; xmm4=Cb(02468ACE)=CbE - psrlw xmm5,BYTE_BIT ; xmm5=Cb(13579BDF)=CbO - pand xmm0,xmm1 ; xmm0=Cr(02468ACE)=CrE - psrlw xmm1,BYTE_BIT ; xmm1=Cr(13579BDF)=CrO - - paddw xmm4,xmm7 - paddw xmm5,xmm7 - paddw xmm0,xmm7 - paddw xmm1,xmm7 - - ; (Original) - ; R = Y + 1.40200 * Cr - ; G = Y - 0.34414 * Cb - 0.71414 * Cr - ; B = Y + 1.77200 * Cb - ; - ; (This implementation) - ; R = Y + 0.40200 * Cr + Cr - ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr - ; B = Y - 0.22800 * Cb + Cb + Cb - - movdqa xmm2,xmm4 ; xmm2=CbE - movdqa xmm3,xmm5 ; xmm3=CbO - paddw xmm4,xmm4 ; xmm4=2*CbE - paddw xmm5,xmm5 ; xmm5=2*CbO - movdqa xmm6,xmm0 ; xmm6=CrE - movdqa xmm7,xmm1 ; xmm7=CrO - paddw xmm0,xmm0 ; xmm0=2*CrE - paddw xmm1,xmm1 ; xmm1=2*CrO - - pmulhw xmm4,[rel PW_MF0228] ; xmm4=(2*CbE * -FIX(0.22800)) - pmulhw xmm5,[rel PW_MF0228] ; xmm5=(2*CbO * -FIX(0.22800)) - pmulhw xmm0,[rel PW_F0402] ; xmm0=(2*CrE * FIX(0.40200)) - pmulhw xmm1,[rel PW_F0402] ; xmm1=(2*CrO * FIX(0.40200)) - - paddw xmm4,[rel PW_ONE] - paddw xmm5,[rel PW_ONE] - psraw xmm4,1 ; xmm4=(CbE * -FIX(0.22800)) - psraw xmm5,1 ; xmm5=(CbO * -FIX(0.22800)) - paddw xmm0,[rel PW_ONE] - paddw xmm1,[rel PW_ONE] - psraw xmm0,1 ; xmm0=(CrE * FIX(0.40200)) - psraw xmm1,1 ; xmm1=(CrO * FIX(0.40200)) - - paddw xmm4,xmm2 - paddw xmm5,xmm3 - paddw xmm4,xmm2 ; xmm4=(CbE * FIX(1.77200))=(B-Y)E - paddw xmm5,xmm3 ; xmm5=(CbO * FIX(1.77200))=(B-Y)O - paddw xmm0,xmm6 ; xmm0=(CrE * FIX(1.40200))=(R-Y)E - paddw xmm1,xmm7 ; xmm1=(CrO * FIX(1.40200))=(R-Y)O - - movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=(B-Y)E - movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(B-Y)O - - movdqa xmm4,xmm2 - movdqa xmm5,xmm3 - punpcklwd xmm2,xmm6 - punpckhwd xmm4,xmm6 - pmaddwd xmm2,[rel PW_MF0344_F0285] - pmaddwd xmm4,[rel PW_MF0344_F0285] - punpcklwd xmm3,xmm7 - punpckhwd xmm5,xmm7 - pmaddwd xmm3,[rel PW_MF0344_F0285] - pmaddwd xmm5,[rel PW_MF0344_F0285] - - paddd xmm2,[rel PD_ONEHALF] - paddd xmm4,[rel PD_ONEHALF] - psrad xmm2,SCALEBITS - psrad xmm4,SCALEBITS - paddd xmm3,[rel PD_ONEHALF] - paddd xmm5,[rel PD_ONEHALF] - psrad xmm3,SCALEBITS - psrad xmm5,SCALEBITS - - packssdw xmm2,xmm4 ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285) - packssdw xmm3,xmm5 ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285) - psubw xmm2,xmm6 ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E - psubw xmm3,xmm7 ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O - - movdqa xmm5, XMMWORD [rsi] ; xmm5=Y(0123456789ABCDEF) - - pcmpeqw xmm4,xmm4 - psrlw xmm4,BYTE_BIT ; xmm4={0xFF 0x00 0xFF 0x00 ..} - pand xmm4,xmm5 ; xmm4=Y(02468ACE)=YE - psrlw xmm5,BYTE_BIT ; xmm5=Y(13579BDF)=YO - - paddw xmm0,xmm4 ; xmm0=((R-Y)E+YE)=RE=R(02468ACE) - paddw xmm1,xmm5 ; xmm1=((R-Y)O+YO)=RO=R(13579BDF) - packuswb xmm0,xmm0 ; xmm0=R(02468ACE********) - packuswb xmm1,xmm1 ; xmm1=R(13579BDF********) - - paddw xmm2,xmm4 ; xmm2=((G-Y)E+YE)=GE=G(02468ACE) - paddw xmm3,xmm5 ; xmm3=((G-Y)O+YO)=GO=G(13579BDF) - packuswb xmm2,xmm2 ; xmm2=G(02468ACE********) - packuswb xmm3,xmm3 ; xmm3=G(13579BDF********) - - paddw xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE) - paddw xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF) - packuswb xmm4,xmm4 ; xmm4=B(02468ACE********) - packuswb xmm5,xmm5 ; xmm5=B(13579BDF********) - -%if RGB_PIXELSIZE == 3 ; --------------- - - ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) - ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) - ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) - ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) - - punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) - punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) - punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) - - movdqa xmmG,xmmA - movdqa xmmH,xmmA - punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) - punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) - - psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) - psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) - - movdqa xmmC,xmmD - movdqa xmmB,xmmD - punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) - punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) - - psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) - - movdqa xmmF,xmmE - punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) - punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) - - pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) - movdqa xmmB,xmmE - punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) - punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) - punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) - - pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) - movdqa xmmB,xmmF - punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) - punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) - punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) - - punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) - punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) - punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) - - cmp rcx, byte SIZEOF_XMMWORD - jb short .column_st32 - - test rdi, SIZEOF_XMMWORD-1 - jnz short .out1 - ; --(aligned)------------------- - movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA - movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD - movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF - jmp short .out0 -.out1: ; --(unaligned)----------------- - movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA - movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD - movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF -.out0: - add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr - sub rcx, byte SIZEOF_XMMWORD - jz near .nextrow - - add rsi, byte SIZEOF_XMMWORD ; inptr0 - add rbx, byte SIZEOF_XMMWORD ; inptr1 - add rdx, byte SIZEOF_XMMWORD ; inptr2 - jmp near .columnloop - -.column_st32: - lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE - cmp rcx, byte 2*SIZEOF_XMMWORD - jb short .column_st16 - movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA - movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD - add rdi, byte 2*SIZEOF_XMMWORD ; outptr - movdqa xmmA,xmmF - sub rcx, byte 2*SIZEOF_XMMWORD - jmp short .column_st15 -.column_st16: - cmp rcx, byte SIZEOF_XMMWORD - jb short .column_st15 - movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA - add rdi, byte SIZEOF_XMMWORD ; outptr - movdqa xmmA,xmmD - sub rcx, byte SIZEOF_XMMWORD -.column_st15: - ; Store the lower 8 bytes of xmmA to the output when it has enough - ; space. - cmp rcx, byte SIZEOF_MMWORD - jb short .column_st7 - movq XMM_MMWORD [rdi], xmmA - add rdi, byte SIZEOF_MMWORD - sub rcx, byte SIZEOF_MMWORD - psrldq xmmA, SIZEOF_MMWORD -.column_st7: - ; Store the lower 4 bytes of xmmA to the output when it has enough - ; space. - cmp rcx, byte SIZEOF_DWORD - jb short .column_st3 - movd XMM_DWORD [rdi], xmmA - add rdi, byte SIZEOF_DWORD - sub rcx, byte SIZEOF_DWORD - psrldq xmmA, SIZEOF_DWORD -.column_st3: - ; Store the lower 2 bytes of rax to the output when it has enough - ; space. - movd eax, xmmA - cmp rcx, byte SIZEOF_WORD - jb short .column_st1 - mov WORD [rdi], ax - add rdi, byte SIZEOF_WORD - sub rcx, byte SIZEOF_WORD - shr rax, 16 -.column_st1: - ; Store the lower 1 byte of rax to the output when it has enough - ; space. - test rcx, rcx - jz short .nextrow - mov BYTE [rdi], al - -%else ; RGB_PIXELSIZE == 4 ; ----------- - -%ifdef RGBX_FILLER_0XFF - pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) - pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) -%else - pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) - pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) -%endif - ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) - ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) - ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) - ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) - - punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) - punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) - punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) - punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) - - movdqa xmmC,xmmA - punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) - punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) - movdqa xmmG,xmmB - punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) - punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) - - movdqa xmmD,xmmA - punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) - punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) - movdqa xmmH,xmmC - punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) - punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) - - cmp rcx, byte SIZEOF_XMMWORD - jb short .column_st32 - - test rdi, SIZEOF_XMMWORD-1 - jnz short .out1 - ; --(aligned)------------------- - movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA - movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD - movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC - movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH - jmp short .out0 -.out1: ; --(unaligned)----------------- - movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA - movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD - movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC - movdqu XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH -.out0: - add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr - sub rcx, byte SIZEOF_XMMWORD - jz near .nextrow - - add rsi, byte SIZEOF_XMMWORD ; inptr0 - add rbx, byte SIZEOF_XMMWORD ; inptr1 - add rdx, byte SIZEOF_XMMWORD ; inptr2 - jmp near .columnloop - -.column_st32: - cmp rcx, byte SIZEOF_XMMWORD/2 - jb short .column_st16 - movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA - movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD - add rdi, byte 2*SIZEOF_XMMWORD ; outptr - movdqa xmmA,xmmC - movdqa xmmD,xmmH - sub rcx, byte SIZEOF_XMMWORD/2 -.column_st16: - cmp rcx, byte SIZEOF_XMMWORD/4 - jb short .column_st15 - movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA - add rdi, byte SIZEOF_XMMWORD ; outptr - movdqa xmmA,xmmD - sub rcx, byte SIZEOF_XMMWORD/4 -.column_st15: - ; Store two pixels (8 bytes) of xmmA to the output when it has enough - ; space. - cmp rcx, byte SIZEOF_XMMWORD/8 - jb short .column_st7 - movq MMWORD [rdi], xmmA - add rdi, byte SIZEOF_XMMWORD/8*4 - sub rcx, byte SIZEOF_XMMWORD/8 - psrldq xmmA, SIZEOF_XMMWORD/8*4 -.column_st7: - ; Store one pixel (4 bytes) of xmmA to the output when it has enough - ; space. - test rcx, rcx - jz short .nextrow - movd XMM_DWORD [rdi], xmmA - -%endif ; RGB_PIXELSIZE ; --------------- - -.nextrow: - pop rcx - pop rsi - pop rbx - pop rdx - pop rdi - pop rax - - add rsi, byte SIZEOF_JSAMPROW - add rbx, byte SIZEOF_JSAMPROW - add rdx, byte SIZEOF_JSAMPROW - add rdi, byte SIZEOF_JSAMPROW ; output_buf - dec rax ; num_rows - jg near .rowloop - - sfence ; flush the write buffer - -.return: - pop rbx - uncollect_args - mov rsp,rbp ; rsp <- aligned rbp - pop rsp ; rsp <- original rbp - pop rbp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jdcolext-sse2.asm b/simd/jdcolext-sse2.asm deleted file mode 100644 index 682aef3..0000000 --- a/simd/jdcolext-sse2.asm +++ /dev/null @@ -1,459 +0,0 @@ -; -; jdcolext.asm - colorspace conversion (SSE2) -; -; Copyright 2009, 2012 Pierre Ossman for Cendio AB -; Copyright (C) 2012, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jcolsamp.inc" - -; -------------------------------------------------------------------------- -; -; Convert some rows of samples to the output colorspace. -; -; GLOBAL(void) -; jsimd_ycc_rgb_convert_sse2 (JDIMENSION out_width, -; JSAMPIMAGE input_buf, JDIMENSION input_row, -; JSAMPARRAY output_buf, int num_rows) -; - -%define out_width(b) (b)+8 ; JDIMENSION out_width -%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf -%define input_row(b) (b)+16 ; JDIMENSION input_row -%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf -%define num_rows(b) (b)+24 ; int num_rows - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 2 -%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr - - align 16 - global EXTN(jsimd_ycc_rgb_convert_sse2) - -EXTN(jsimd_ycc_rgb_convert_sse2): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic eax ; make a room for GOT address - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - movpic POINTER [gotptr], ebx ; save GOT address - - mov ecx, JDIMENSION [out_width(eax)] ; num_cols - test ecx,ecx - jz near .return - - push ecx - - mov edi, JSAMPIMAGE [input_buf(eax)] - mov ecx, JDIMENSION [input_row(eax)] - mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] - mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] - mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] - lea esi, [esi+ecx*SIZEOF_JSAMPROW] - lea ebx, [ebx+ecx*SIZEOF_JSAMPROW] - lea edx, [edx+ecx*SIZEOF_JSAMPROW] - - pop ecx - - mov edi, JSAMPARRAY [output_buf(eax)] - mov eax, INT [num_rows(eax)] - test eax,eax - jle near .return - alignx 16,7 -.rowloop: - push eax - push edi - push edx - push ebx - push esi - push ecx ; col - - mov esi, JSAMPROW [esi] ; inptr0 - mov ebx, JSAMPROW [ebx] ; inptr1 - mov edx, JSAMPROW [edx] ; inptr2 - mov edi, JSAMPROW [edi] ; outptr - movpic eax, POINTER [gotptr] ; load GOT address (eax) - alignx 16,7 -.columnloop: - - movdqa xmm5, XMMWORD [ebx] ; xmm5=Cb(0123456789ABCDEF) - movdqa xmm1, XMMWORD [edx] ; xmm1=Cr(0123456789ABCDEF) - - pcmpeqw xmm4,xmm4 - pcmpeqw xmm7,xmm7 - psrlw xmm4,BYTE_BIT - psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} - movdqa xmm0,xmm4 ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..} - - pand xmm4,xmm5 ; xmm4=Cb(02468ACE)=CbE - psrlw xmm5,BYTE_BIT ; xmm5=Cb(13579BDF)=CbO - pand xmm0,xmm1 ; xmm0=Cr(02468ACE)=CrE - psrlw xmm1,BYTE_BIT ; xmm1=Cr(13579BDF)=CrO - - paddw xmm4,xmm7 - paddw xmm5,xmm7 - paddw xmm0,xmm7 - paddw xmm1,xmm7 - - ; (Original) - ; R = Y + 1.40200 * Cr - ; G = Y - 0.34414 * Cb - 0.71414 * Cr - ; B = Y + 1.77200 * Cb - ; - ; (This implementation) - ; R = Y + 0.40200 * Cr + Cr - ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr - ; B = Y - 0.22800 * Cb + Cb + Cb - - movdqa xmm2,xmm4 ; xmm2=CbE - movdqa xmm3,xmm5 ; xmm3=CbO - paddw xmm4,xmm4 ; xmm4=2*CbE - paddw xmm5,xmm5 ; xmm5=2*CbO - movdqa xmm6,xmm0 ; xmm6=CrE - movdqa xmm7,xmm1 ; xmm7=CrO - paddw xmm0,xmm0 ; xmm0=2*CrE - paddw xmm1,xmm1 ; xmm1=2*CrO - - pmulhw xmm4,[GOTOFF(eax,PW_MF0228)] ; xmm4=(2*CbE * -FIX(0.22800)) - pmulhw xmm5,[GOTOFF(eax,PW_MF0228)] ; xmm5=(2*CbO * -FIX(0.22800)) - pmulhw xmm0,[GOTOFF(eax,PW_F0402)] ; xmm0=(2*CrE * FIX(0.40200)) - pmulhw xmm1,[GOTOFF(eax,PW_F0402)] ; xmm1=(2*CrO * FIX(0.40200)) - - paddw xmm4,[GOTOFF(eax,PW_ONE)] - paddw xmm5,[GOTOFF(eax,PW_ONE)] - psraw xmm4,1 ; xmm4=(CbE * -FIX(0.22800)) - psraw xmm5,1 ; xmm5=(CbO * -FIX(0.22800)) - paddw xmm0,[GOTOFF(eax,PW_ONE)] - paddw xmm1,[GOTOFF(eax,PW_ONE)] - psraw xmm0,1 ; xmm0=(CrE * FIX(0.40200)) - psraw xmm1,1 ; xmm1=(CrO * FIX(0.40200)) - - paddw xmm4,xmm2 - paddw xmm5,xmm3 - paddw xmm4,xmm2 ; xmm4=(CbE * FIX(1.77200))=(B-Y)E - paddw xmm5,xmm3 ; xmm5=(CbO * FIX(1.77200))=(B-Y)O - paddw xmm0,xmm6 ; xmm0=(CrE * FIX(1.40200))=(R-Y)E - paddw xmm1,xmm7 ; xmm1=(CrO * FIX(1.40200))=(R-Y)O - - movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=(B-Y)E - movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(B-Y)O - - movdqa xmm4,xmm2 - movdqa xmm5,xmm3 - punpcklwd xmm2,xmm6 - punpckhwd xmm4,xmm6 - pmaddwd xmm2,[GOTOFF(eax,PW_MF0344_F0285)] - pmaddwd xmm4,[GOTOFF(eax,PW_MF0344_F0285)] - punpcklwd xmm3,xmm7 - punpckhwd xmm5,xmm7 - pmaddwd xmm3,[GOTOFF(eax,PW_MF0344_F0285)] - pmaddwd xmm5,[GOTOFF(eax,PW_MF0344_F0285)] - - paddd xmm2,[GOTOFF(eax,PD_ONEHALF)] - paddd xmm4,[GOTOFF(eax,PD_ONEHALF)] - psrad xmm2,SCALEBITS - psrad xmm4,SCALEBITS - paddd xmm3,[GOTOFF(eax,PD_ONEHALF)] - paddd xmm5,[GOTOFF(eax,PD_ONEHALF)] - psrad xmm3,SCALEBITS - psrad xmm5,SCALEBITS - - packssdw xmm2,xmm4 ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285) - packssdw xmm3,xmm5 ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285) - psubw xmm2,xmm6 ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E - psubw xmm3,xmm7 ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O - - movdqa xmm5, XMMWORD [esi] ; xmm5=Y(0123456789ABCDEF) - - pcmpeqw xmm4,xmm4 - psrlw xmm4,BYTE_BIT ; xmm4={0xFF 0x00 0xFF 0x00 ..} - pand xmm4,xmm5 ; xmm4=Y(02468ACE)=YE - psrlw xmm5,BYTE_BIT ; xmm5=Y(13579BDF)=YO - - paddw xmm0,xmm4 ; xmm0=((R-Y)E+YE)=RE=R(02468ACE) - paddw xmm1,xmm5 ; xmm1=((R-Y)O+YO)=RO=R(13579BDF) - packuswb xmm0,xmm0 ; xmm0=R(02468ACE********) - packuswb xmm1,xmm1 ; xmm1=R(13579BDF********) - - paddw xmm2,xmm4 ; xmm2=((G-Y)E+YE)=GE=G(02468ACE) - paddw xmm3,xmm5 ; xmm3=((G-Y)O+YO)=GO=G(13579BDF) - packuswb xmm2,xmm2 ; xmm2=G(02468ACE********) - packuswb xmm3,xmm3 ; xmm3=G(13579BDF********) - - paddw xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE) - paddw xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF) - packuswb xmm4,xmm4 ; xmm4=B(02468ACE********) - packuswb xmm5,xmm5 ; xmm5=B(13579BDF********) - -%if RGB_PIXELSIZE == 3 ; --------------- - - ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) - ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) - ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) - ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) - - punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) - punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) - punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) - - movdqa xmmG,xmmA - movdqa xmmH,xmmA - punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) - punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) - - psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) - psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) - - movdqa xmmC,xmmD - movdqa xmmB,xmmD - punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) - punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) - - psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) - - movdqa xmmF,xmmE - punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) - punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) - - pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) - movdqa xmmB,xmmE - punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) - punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) - punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) - - pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) - movdqa xmmB,xmmF - punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) - punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) - punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) - - punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) - punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) - punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) - - cmp ecx, byte SIZEOF_XMMWORD - jb short .column_st32 - - test edi, SIZEOF_XMMWORD-1 - jnz short .out1 - ; --(aligned)------------------- - movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA - movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD - movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF - jmp short .out0 -.out1: ; --(unaligned)----------------- - movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA - movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD - movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF -.out0: - add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr - sub ecx, byte SIZEOF_XMMWORD - jz near .nextrow - - add esi, byte SIZEOF_XMMWORD ; inptr0 - add ebx, byte SIZEOF_XMMWORD ; inptr1 - add edx, byte SIZEOF_XMMWORD ; inptr2 - jmp near .columnloop - alignx 16,7 - -.column_st32: - lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE - cmp ecx, byte 2*SIZEOF_XMMWORD - jb short .column_st16 - movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA - movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD - add edi, byte 2*SIZEOF_XMMWORD ; outptr - movdqa xmmA,xmmF - sub ecx, byte 2*SIZEOF_XMMWORD - jmp short .column_st15 -.column_st16: - cmp ecx, byte SIZEOF_XMMWORD - jb short .column_st15 - movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA - add edi, byte SIZEOF_XMMWORD ; outptr - movdqa xmmA,xmmD - sub ecx, byte SIZEOF_XMMWORD -.column_st15: - ; Store the lower 8 bytes of xmmA to the output when it has enough - ; space. - cmp ecx, byte SIZEOF_MMWORD - jb short .column_st7 - movq XMM_MMWORD [edi], xmmA - add edi, byte SIZEOF_MMWORD - sub ecx, byte SIZEOF_MMWORD - psrldq xmmA, SIZEOF_MMWORD -.column_st7: - ; Store the lower 4 bytes of xmmA to the output when it has enough - ; space. - cmp ecx, byte SIZEOF_DWORD - jb short .column_st3 - movd XMM_DWORD [edi], xmmA - add edi, byte SIZEOF_DWORD - sub ecx, byte SIZEOF_DWORD - psrldq xmmA, SIZEOF_DWORD -.column_st3: - ; Store the lower 2 bytes of eax to the output when it has enough - ; space. - movd eax, xmmA - cmp ecx, byte SIZEOF_WORD - jb short .column_st1 - mov WORD [edi], ax - add edi, byte SIZEOF_WORD - sub ecx, byte SIZEOF_WORD - shr eax, 16 -.column_st1: - ; Store the lower 1 byte of eax to the output when it has enough - ; space. - test ecx, ecx - jz short .nextrow - mov BYTE [edi], al - -%else ; RGB_PIXELSIZE == 4 ; ----------- - -%ifdef RGBX_FILLER_0XFF - pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) - pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) -%else - pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) - pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) -%endif - ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) - ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) - ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) - ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) - - punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) - punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) - punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) - punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) - - movdqa xmmC,xmmA - punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) - punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) - movdqa xmmG,xmmB - punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) - punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) - - movdqa xmmD,xmmA - punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) - punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) - movdqa xmmH,xmmC - punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) - punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) - - cmp ecx, byte SIZEOF_XMMWORD - jb short .column_st32 - - test edi, SIZEOF_XMMWORD-1 - jnz short .out1 - ; --(aligned)------------------- - movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA - movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD - movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC - movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH - jmp short .out0 -.out1: ; --(unaligned)----------------- - movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA - movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD - movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC - movdqu XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH -.out0: - add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr - sub ecx, byte SIZEOF_XMMWORD - jz near .nextrow - - add esi, byte SIZEOF_XMMWORD ; inptr0 - add ebx, byte SIZEOF_XMMWORD ; inptr1 - add edx, byte SIZEOF_XMMWORD ; inptr2 - jmp near .columnloop - alignx 16,7 - -.column_st32: - cmp ecx, byte SIZEOF_XMMWORD/2 - jb short .column_st16 - movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA - movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD - add edi, byte 2*SIZEOF_XMMWORD ; outptr - movdqa xmmA,xmmC - movdqa xmmD,xmmH - sub ecx, byte SIZEOF_XMMWORD/2 -.column_st16: - cmp ecx, byte SIZEOF_XMMWORD/4 - jb short .column_st15 - movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA - add edi, byte SIZEOF_XMMWORD ; outptr - movdqa xmmA,xmmD - sub ecx, byte SIZEOF_XMMWORD/4 -.column_st15: - ; Store two pixels (8 bytes) of xmmA to the output when it has enough - ; space. - cmp ecx, byte SIZEOF_XMMWORD/8 - jb short .column_st7 - movq XMM_MMWORD [edi], xmmA - add edi, byte SIZEOF_XMMWORD/8*4 - sub ecx, byte SIZEOF_XMMWORD/8 - psrldq xmmA, SIZEOF_XMMWORD/8*4 -.column_st7: - ; Store one pixel (4 bytes) of xmmA to the output when it has enough - ; space. - test ecx, ecx - jz short .nextrow - movd XMM_DWORD [edi], xmmA - -%endif ; RGB_PIXELSIZE ; --------------- - - alignx 16,7 - -.nextrow: - pop ecx - pop esi - pop ebx - pop edx - pop edi - pop eax - - add esi, byte SIZEOF_JSAMPROW - add ebx, byte SIZEOF_JSAMPROW - add edx, byte SIZEOF_JSAMPROW - add edi, byte SIZEOF_JSAMPROW ; output_buf - dec eax ; num_rows - jg near .rowloop - - sfence ; flush the write buffer - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jdcolor-altivec.c b/simd/jdcolor-altivec.c deleted file mode 100644 index 0dc4c42..0000000 --- a/simd/jdcolor-altivec.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * AltiVec optimizations for libjpeg-turbo - * - * Copyright (C) 2015, D. R. Commander. All Rights Reserved. - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgment in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* YCC --> RGB CONVERSION */ - -#include "jsimd_altivec.h" - - -#define F_0_344 22554 /* FIX(0.34414) */ -#define F_0_714 46802 /* FIX(0.71414) */ -#define F_1_402 91881 /* FIX(1.40200) */ -#define F_1_772 116130 /* FIX(1.77200) */ -#define F_0_402 (F_1_402 - 65536) /* FIX(1.40200) - FIX(1) */ -#define F_0_285 (65536 - F_0_714) /* FIX(1) - FIX(0.71414) */ -#define F_0_228 (131072 - F_1_772) /* FIX(2) - FIX(1.77200) */ - -#define SCALEBITS 16 -#define ONE_HALF (1 << (SCALEBITS - 1)) - -#define RGB_INDEX0 {0,1,8,2,3,10,4,5,12,6,7,14,16,17,24,18} -#define RGB_INDEX1 {3,10,4,5,12,6,7,14,16,17,24,18,19,26,20,21} -#define RGB_INDEX2 {12,6,7,14,16,17,24,18,19,26,20,21,28,22,23,30} -#include "jdcolext-altivec.c" -#undef RGB_PIXELSIZE - -#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extrgb_convert_altivec -#include "jdcolext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGB_INDEX0 -#undef RGB_INDEX1 -#undef RGB_INDEX2 -#undef jsimd_ycc_rgb_convert_altivec - -#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -#define RGB_INDEX {0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15} -#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extrgbx_convert_altivec -#include "jdcolext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGB_INDEX -#undef jsimd_ycc_rgb_convert_altivec - -#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -#define RGB_INDEX0 {8,1,0,10,3,2,12,5,4,14,7,6,24,17,16,26} -#define RGB_INDEX1 {3,2,12,5,4,14,7,6,24,17,16,26,19,18,28,21} -#define RGB_INDEX2 {4,14,7,6,24,17,16,26,19,18,28,21,20,30,23,22} -#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extbgr_convert_altivec -#include "jdcolext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGB_INDEX0 -#undef RGB_INDEX1 -#undef RGB_INDEX2 -#undef jsimd_ycc_rgb_convert_altivec - -#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -#define RGB_INDEX {8,1,0,9,10,3,2,11,12,5,4,13,14,7,6,15} -#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extbgrx_convert_altivec -#include "jdcolext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGB_INDEX -#undef jsimd_ycc_rgb_convert_altivec - -#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -#define RGB_INDEX {9,8,1,0,11,10,3,2,13,12,5,4,15,14,7,6} -#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extxbgr_convert_altivec -#include "jdcolext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGB_INDEX -#undef jsimd_ycc_rgb_convert_altivec - -#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -#define RGB_INDEX {9,0,1,8,11,2,3,10,13,4,5,12,15,6,7,14} -#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extxrgb_convert_altivec -#include "jdcolext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGB_INDEX -#undef jsimd_ycc_rgb_convert_altivec diff --git a/simd/jdcolor-mmx.asm b/simd/jdcolor-mmx.asm deleted file mode 100644 index 4e58031..0000000 --- a/simd/jdcolor-mmx.asm +++ /dev/null @@ -1,119 +0,0 @@ -; -; jdcolor.asm - colorspace conversion (MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - -%define SCALEBITS 16 - -F_0_344 equ 22554 ; FIX(0.34414) -F_0_714 equ 46802 ; FIX(0.71414) -F_1_402 equ 91881 ; FIX(1.40200) -F_1_772 equ 116130 ; FIX(1.77200) -F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) -F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) -F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_ycc_rgb_convert_mmx) - -EXTN(jconst_ycc_rgb_convert_mmx): - -PW_F0402 times 4 dw F_0_402 -PW_MF0228 times 4 dw -F_0_228 -PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285 -PW_ONE times 4 dw 1 -PD_ONEHALF times 2 dd 1 << (SCALEBITS-1) - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 - -%include "jdcolext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGB_RED -%define RGB_GREEN EXT_RGB_GREEN -%define RGB_BLUE EXT_RGB_BLUE -%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgb_convert_mmx -%include "jdcolext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGBX_RED -%define RGB_GREEN EXT_RGBX_GREEN -%define RGB_BLUE EXT_RGBX_BLUE -%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgbx_convert_mmx -%include "jdcolext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGR_RED -%define RGB_GREEN EXT_BGR_GREEN -%define RGB_BLUE EXT_BGR_BLUE -%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgr_convert_mmx -%include "jdcolext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGRX_RED -%define RGB_GREEN EXT_BGRX_GREEN -%define RGB_BLUE EXT_BGRX_BLUE -%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgrx_convert_mmx -%include "jdcolext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XBGR_RED -%define RGB_GREEN EXT_XBGR_GREEN -%define RGB_BLUE EXT_XBGR_BLUE -%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxbgr_convert_mmx -%include "jdcolext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XRGB_RED -%define RGB_GREEN EXT_XRGB_GREEN -%define RGB_BLUE EXT_XRGB_BLUE -%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxrgb_convert_mmx -%include "jdcolext-mmx.asm" diff --git a/simd/jdcolor-sse2-64.asm b/simd/jdcolor-sse2-64.asm deleted file mode 100644 index d2bf210..0000000 --- a/simd/jdcolor-sse2-64.asm +++ /dev/null @@ -1,119 +0,0 @@ -; -; jdcolor.asm - colorspace conversion (64-bit SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - -%define SCALEBITS 16 - -F_0_344 equ 22554 ; FIX(0.34414) -F_0_714 equ 46802 ; FIX(0.71414) -F_1_402 equ 91881 ; FIX(1.40200) -F_1_772 equ 116130 ; FIX(1.77200) -F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) -F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) -F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_ycc_rgb_convert_sse2) - -EXTN(jconst_ycc_rgb_convert_sse2): - -PW_F0402 times 8 dw F_0_402 -PW_MF0228 times 8 dw -F_0_228 -PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 -PW_ONE times 8 dw 1 -PD_ONEHALF times 4 dd 1 << (SCALEBITS-1) - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 64 - -%include "jdcolext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGB_RED -%define RGB_GREEN EXT_RGB_GREEN -%define RGB_BLUE EXT_RGB_BLUE -%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2 -%include "jdcolext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGBX_RED -%define RGB_GREEN EXT_RGBX_GREEN -%define RGB_BLUE EXT_RGBX_BLUE -%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2 -%include "jdcolext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGR_RED -%define RGB_GREEN EXT_BGR_GREEN -%define RGB_BLUE EXT_BGR_BLUE -%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2 -%include "jdcolext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGRX_RED -%define RGB_GREEN EXT_BGRX_GREEN -%define RGB_BLUE EXT_BGRX_BLUE -%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2 -%include "jdcolext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XBGR_RED -%define RGB_GREEN EXT_XBGR_GREEN -%define RGB_BLUE EXT_XBGR_BLUE -%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2 -%include "jdcolext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XRGB_RED -%define RGB_GREEN EXT_XRGB_GREEN -%define RGB_BLUE EXT_XRGB_BLUE -%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2 -%include "jdcolext-sse2-64.asm" diff --git a/simd/jdcolor-sse2.asm b/simd/jdcolor-sse2.asm deleted file mode 100644 index 7ff5d05..0000000 --- a/simd/jdcolor-sse2.asm +++ /dev/null @@ -1,119 +0,0 @@ -; -; jdcolor.asm - colorspace conversion (SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - -%define SCALEBITS 16 - -F_0_344 equ 22554 ; FIX(0.34414) -F_0_714 equ 46802 ; FIX(0.71414) -F_1_402 equ 91881 ; FIX(1.40200) -F_1_772 equ 116130 ; FIX(1.77200) -F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) -F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) -F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_ycc_rgb_convert_sse2) - -EXTN(jconst_ycc_rgb_convert_sse2): - -PW_F0402 times 8 dw F_0_402 -PW_MF0228 times 8 dw -F_0_228 -PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 -PW_ONE times 8 dw 1 -PD_ONEHALF times 4 dd 1 << (SCALEBITS-1) - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 - -%include "jdcolext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGB_RED -%define RGB_GREEN EXT_RGB_GREEN -%define RGB_BLUE EXT_RGB_BLUE -%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2 -%include "jdcolext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGBX_RED -%define RGB_GREEN EXT_RGBX_GREEN -%define RGB_BLUE EXT_RGBX_BLUE -%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2 -%include "jdcolext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGR_RED -%define RGB_GREEN EXT_BGR_GREEN -%define RGB_BLUE EXT_BGR_BLUE -%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2 -%include "jdcolext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGRX_RED -%define RGB_GREEN EXT_BGRX_GREEN -%define RGB_BLUE EXT_BGRX_BLUE -%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2 -%include "jdcolext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XBGR_RED -%define RGB_GREEN EXT_XBGR_GREEN -%define RGB_BLUE EXT_XBGR_BLUE -%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2 -%include "jdcolext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XRGB_RED -%define RGB_GREEN EXT_XRGB_GREEN -%define RGB_BLUE EXT_XRGB_BLUE -%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2 -%include "jdcolext-sse2.asm" diff --git a/simd/jdmerge-altivec.c b/simd/jdmerge-altivec.c deleted file mode 100644 index 6a35f20..0000000 --- a/simd/jdmerge-altivec.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * AltiVec optimizations for libjpeg-turbo - * - * Copyright (C) 2015, D. R. Commander. All Rights Reserved. - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgment in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* MERGED YCC --> RGB CONVERSION AND UPSAMPLING */ - -#include "jsimd_altivec.h" - - -#define F_0_344 22554 /* FIX(0.34414) */ -#define F_0_714 46802 /* FIX(0.71414) */ -#define F_1_402 91881 /* FIX(1.40200) */ -#define F_1_772 116130 /* FIX(1.77200) */ -#define F_0_402 (F_1_402 - 65536) /* FIX(1.40200) - FIX(1) */ -#define F_0_285 (65536 - F_0_714) /* FIX(1) - FIX(0.71414) */ -#define F_0_228 (131072 - F_1_772) /* FIX(2) - FIX(1.77200) */ - -#define SCALEBITS 16 -#define ONE_HALF (1 << (SCALEBITS - 1)) - -#define RGB_INDEX0 {0,1,8,2,3,10,4,5,12,6,7,14,16,17,24,18} -#define RGB_INDEX1 {3,10,4,5,12,6,7,14,16,17,24,18,19,26,20,21} -#define RGB_INDEX2 {12,6,7,14,16,17,24,18,19,26,20,21,28,22,23,30} -#include "jdmrgext-altivec.c" -#undef RGB_PIXELSIZE - -#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extrgb_merged_upsample_altivec -#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extrgb_merged_upsample_altivec -#include "jdmrgext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGB_INDEX0 -#undef RGB_INDEX1 -#undef RGB_INDEX2 -#undef jsimd_h2v1_merged_upsample_altivec -#undef jsimd_h2v2_merged_upsample_altivec - -#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -#define RGB_INDEX {0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15} -#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extrgbx_merged_upsample_altivec -#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extrgbx_merged_upsample_altivec -#include "jdmrgext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGB_INDEX -#undef jsimd_h2v1_merged_upsample_altivec -#undef jsimd_h2v2_merged_upsample_altivec - -#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -#define RGB_INDEX0 {8,1,0,10,3,2,12,5,4,14,7,6,24,17,16,26} -#define RGB_INDEX1 {3,2,12,5,4,14,7,6,24,17,16,26,19,18,28,21} -#define RGB_INDEX2 {4,14,7,6,24,17,16,26,19,18,28,21,20,30,23,22} -#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extbgr_merged_upsample_altivec -#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extbgr_merged_upsample_altivec -#include "jdmrgext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGB_INDEX0 -#undef RGB_INDEX1 -#undef RGB_INDEX2 -#undef jsimd_h2v1_merged_upsample_altivec -#undef jsimd_h2v2_merged_upsample_altivec - -#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -#define RGB_INDEX {8,1,0,9,10,3,2,11,12,5,4,13,14,7,6,15} -#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extbgrx_merged_upsample_altivec -#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extbgrx_merged_upsample_altivec -#include "jdmrgext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGB_INDEX -#undef jsimd_h2v1_merged_upsample_altivec -#undef jsimd_h2v2_merged_upsample_altivec - -#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -#define RGB_INDEX {9,8,1,0,11,10,3,2,13,12,5,4,15,14,7,6} -#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extxbgr_merged_upsample_altivec -#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extxbgr_merged_upsample_altivec -#include "jdmrgext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGB_INDEX -#undef jsimd_h2v1_merged_upsample_altivec -#undef jsimd_h2v2_merged_upsample_altivec - -#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -#define RGB_INDEX {9,0,1,8,11,2,3,10,13,4,5,12,15,6,7,14} -#define jsimd_h2v1_merged_upsample_altivec jsimd_h2v1_extxrgb_merged_upsample_altivec -#define jsimd_h2v2_merged_upsample_altivec jsimd_h2v2_extxrgb_merged_upsample_altivec -#include "jdmrgext-altivec.c" -#undef RGB_PIXELSIZE -#undef RGB_INDEX -#undef jsimd_h2v1_merged_upsample_altivec -#undef jsimd_h2v2_merged_upsample_altivec diff --git a/simd/jdmerge-mmx.asm b/simd/jdmerge-mmx.asm deleted file mode 100644 index ee58bff..0000000 --- a/simd/jdmerge-mmx.asm +++ /dev/null @@ -1,125 +0,0 @@ -; -; jdmerge.asm - merged upsampling/color conversion (MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - -%define SCALEBITS 16 - -F_0_344 equ 22554 ; FIX(0.34414) -F_0_714 equ 46802 ; FIX(0.71414) -F_1_402 equ 91881 ; FIX(1.40200) -F_1_772 equ 116130 ; FIX(1.77200) -F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) -F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) -F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_merged_upsample_mmx) - -EXTN(jconst_merged_upsample_mmx): - -PW_F0402 times 4 dw F_0_402 -PW_MF0228 times 4 dw -F_0_228 -PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285 -PW_ONE times 4 dw 1 -PD_ONEHALF times 2 dd 1 << (SCALEBITS-1) - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 - -%include "jdmrgext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGB_RED -%define RGB_GREEN EXT_RGB_GREEN -%define RGB_BLUE EXT_RGB_BLUE -%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extrgb_merged_upsample_mmx -%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extrgb_merged_upsample_mmx -%include "jdmrgext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGBX_RED -%define RGB_GREEN EXT_RGBX_GREEN -%define RGB_BLUE EXT_RGBX_BLUE -%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extrgbx_merged_upsample_mmx -%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extrgbx_merged_upsample_mmx -%include "jdmrgext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGR_RED -%define RGB_GREEN EXT_BGR_GREEN -%define RGB_BLUE EXT_BGR_BLUE -%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extbgr_merged_upsample_mmx -%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extbgr_merged_upsample_mmx -%include "jdmrgext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGRX_RED -%define RGB_GREEN EXT_BGRX_GREEN -%define RGB_BLUE EXT_BGRX_BLUE -%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extbgrx_merged_upsample_mmx -%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extbgrx_merged_upsample_mmx -%include "jdmrgext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XBGR_RED -%define RGB_GREEN EXT_XBGR_GREEN -%define RGB_BLUE EXT_XBGR_BLUE -%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extxbgr_merged_upsample_mmx -%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extxbgr_merged_upsample_mmx -%include "jdmrgext-mmx.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XRGB_RED -%define RGB_GREEN EXT_XRGB_GREEN -%define RGB_BLUE EXT_XRGB_BLUE -%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extxrgb_merged_upsample_mmx -%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extxrgb_merged_upsample_mmx -%include "jdmrgext-mmx.asm" diff --git a/simd/jdmerge-sse2-64.asm b/simd/jdmerge-sse2-64.asm deleted file mode 100644 index 244bd40..0000000 --- a/simd/jdmerge-sse2-64.asm +++ /dev/null @@ -1,125 +0,0 @@ -; -; jdmerge.asm - merged upsampling/color conversion (64-bit SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - -%define SCALEBITS 16 - -F_0_344 equ 22554 ; FIX(0.34414) -F_0_714 equ 46802 ; FIX(0.71414) -F_1_402 equ 91881 ; FIX(1.40200) -F_1_772 equ 116130 ; FIX(1.77200) -F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) -F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) -F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_merged_upsample_sse2) - -EXTN(jconst_merged_upsample_sse2): - -PW_F0402 times 8 dw F_0_402 -PW_MF0228 times 8 dw -F_0_228 -PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 -PW_ONE times 8 dw 1 -PD_ONEHALF times 4 dd 1 << (SCALEBITS-1) - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 64 - -%include "jdmrgext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGB_RED -%define RGB_GREEN EXT_RGB_GREEN -%define RGB_BLUE EXT_RGB_BLUE -%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgb_merged_upsample_sse2 -%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgb_merged_upsample_sse2 -%include "jdmrgext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGBX_RED -%define RGB_GREEN EXT_RGBX_GREEN -%define RGB_BLUE EXT_RGBX_BLUE -%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgbx_merged_upsample_sse2 -%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgbx_merged_upsample_sse2 -%include "jdmrgext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGR_RED -%define RGB_GREEN EXT_BGR_GREEN -%define RGB_BLUE EXT_BGR_BLUE -%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgr_merged_upsample_sse2 -%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgr_merged_upsample_sse2 -%include "jdmrgext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGRX_RED -%define RGB_GREEN EXT_BGRX_GREEN -%define RGB_BLUE EXT_BGRX_BLUE -%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgrx_merged_upsample_sse2 -%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgrx_merged_upsample_sse2 -%include "jdmrgext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XBGR_RED -%define RGB_GREEN EXT_XBGR_GREEN -%define RGB_BLUE EXT_XBGR_BLUE -%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxbgr_merged_upsample_sse2 -%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxbgr_merged_upsample_sse2 -%include "jdmrgext-sse2-64.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XRGB_RED -%define RGB_GREEN EXT_XRGB_GREEN -%define RGB_BLUE EXT_XRGB_BLUE -%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxrgb_merged_upsample_sse2 -%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxrgb_merged_upsample_sse2 -%include "jdmrgext-sse2-64.asm" diff --git a/simd/jdmerge-sse2.asm b/simd/jdmerge-sse2.asm deleted file mode 100644 index 236de5a..0000000 --- a/simd/jdmerge-sse2.asm +++ /dev/null @@ -1,125 +0,0 @@ -; -; jdmerge.asm - merged upsampling/color conversion (SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - -%define SCALEBITS 16 - -F_0_344 equ 22554 ; FIX(0.34414) -F_0_714 equ 46802 ; FIX(0.71414) -F_1_402 equ 91881 ; FIX(1.40200) -F_1_772 equ 116130 ; FIX(1.77200) -F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) -F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) -F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_merged_upsample_sse2) - -EXTN(jconst_merged_upsample_sse2): - -PW_F0402 times 8 dw F_0_402 -PW_MF0228 times 8 dw -F_0_228 -PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 -PW_ONE times 8 dw 1 -PD_ONEHALF times 4 dd 1 << (SCALEBITS-1) - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 - -%include "jdmrgext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGB_RED -%define RGB_GREEN EXT_RGB_GREEN -%define RGB_BLUE EXT_RGB_BLUE -%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE -%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgb_merged_upsample_sse2 -%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgb_merged_upsample_sse2 -%include "jdmrgext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_RGBX_RED -%define RGB_GREEN EXT_RGBX_GREEN -%define RGB_BLUE EXT_RGBX_BLUE -%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE -%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgbx_merged_upsample_sse2 -%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgbx_merged_upsample_sse2 -%include "jdmrgext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGR_RED -%define RGB_GREEN EXT_BGR_GREEN -%define RGB_BLUE EXT_BGR_BLUE -%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE -%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgr_merged_upsample_sse2 -%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgr_merged_upsample_sse2 -%include "jdmrgext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_BGRX_RED -%define RGB_GREEN EXT_BGRX_GREEN -%define RGB_BLUE EXT_BGRX_BLUE -%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE -%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgrx_merged_upsample_sse2 -%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgrx_merged_upsample_sse2 -%include "jdmrgext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XBGR_RED -%define RGB_GREEN EXT_XBGR_GREEN -%define RGB_BLUE EXT_XBGR_BLUE -%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE -%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxbgr_merged_upsample_sse2 -%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxbgr_merged_upsample_sse2 -%include "jdmrgext-sse2.asm" - -%undef RGB_RED -%undef RGB_GREEN -%undef RGB_BLUE -%undef RGB_PIXELSIZE -%define RGB_RED EXT_XRGB_RED -%define RGB_GREEN EXT_XRGB_GREEN -%define RGB_BLUE EXT_XRGB_BLUE -%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE -%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxrgb_merged_upsample_sse2 -%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxrgb_merged_upsample_sse2 -%include "jdmrgext-sse2.asm" diff --git a/simd/jdmrgext-mmx.asm b/simd/jdmrgext-mmx.asm deleted file mode 100644 index 63f45cf..0000000 --- a/simd/jdmrgext-mmx.asm +++ /dev/null @@ -1,463 +0,0 @@ -; -; jdmrgext.asm - merged upsampling/color conversion (MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jcolsamp.inc" - -; -------------------------------------------------------------------------- -; -; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. -; -; GLOBAL(void) -; jsimd_h2v1_merged_upsample_mmx (JDIMENSION output_width, -; JSAMPIMAGE input_buf, -; JDIMENSION in_row_group_ctr, -; JSAMPARRAY output_buf); -; - -%define output_width(b) (b)+8 ; JDIMENSION output_width -%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf -%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr -%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] -%define WK_NUM 3 -%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr - - align 16 - global EXTN(jsimd_h2v1_merged_upsample_mmx) - -EXTN(jsimd_h2v1_merged_upsample_mmx): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic eax ; make a room for GOT address - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - movpic POINTER [gotptr], ebx ; save GOT address - - mov ecx, JDIMENSION [output_width(eax)] ; col - test ecx,ecx - jz near .return - - push ecx - - mov edi, JSAMPIMAGE [input_buf(eax)] - mov ecx, JDIMENSION [in_row_group_ctr(eax)] - mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] - mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] - mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] - mov edi, JSAMPARRAY [output_buf(eax)] - mov esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW] ; inptr0 - mov ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW] ; inptr1 - mov edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW] ; inptr2 - mov edi, JSAMPROW [edi] ; outptr - - pop ecx ; col - - alignx 16,7 -.columnloop: - movpic eax, POINTER [gotptr] ; load GOT address (eax) - - movq mm6, MMWORD [ebx] ; mm6=Cb(01234567) - movq mm7, MMWORD [edx] ; mm7=Cr(01234567) - - pxor mm1,mm1 ; mm1=(all 0's) - pcmpeqw mm3,mm3 - psllw mm3,7 ; mm3={0xFF80 0xFF80 0xFF80 0xFF80} - - movq mm4,mm6 - punpckhbw mm6,mm1 ; mm6=Cb(4567)=CbH - punpcklbw mm4,mm1 ; mm4=Cb(0123)=CbL - movq mm0,mm7 - punpckhbw mm7,mm1 ; mm7=Cr(4567)=CrH - punpcklbw mm0,mm1 ; mm0=Cr(0123)=CrL - - paddw mm6,mm3 - paddw mm4,mm3 - paddw mm7,mm3 - paddw mm0,mm3 - - ; (Original) - ; R = Y + 1.40200 * Cr - ; G = Y - 0.34414 * Cb - 0.71414 * Cr - ; B = Y + 1.77200 * Cb - ; - ; (This implementation) - ; R = Y + 0.40200 * Cr + Cr - ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr - ; B = Y - 0.22800 * Cb + Cb + Cb - - movq mm5,mm6 ; mm5=CbH - movq mm2,mm4 ; mm2=CbL - paddw mm6,mm6 ; mm6=2*CbH - paddw mm4,mm4 ; mm4=2*CbL - movq mm1,mm7 ; mm1=CrH - movq mm3,mm0 ; mm3=CrL - paddw mm7,mm7 ; mm7=2*CrH - paddw mm0,mm0 ; mm0=2*CrL - - pmulhw mm6,[GOTOFF(eax,PW_MF0228)] ; mm6=(2*CbH * -FIX(0.22800)) - pmulhw mm4,[GOTOFF(eax,PW_MF0228)] ; mm4=(2*CbL * -FIX(0.22800)) - pmulhw mm7,[GOTOFF(eax,PW_F0402)] ; mm7=(2*CrH * FIX(0.40200)) - pmulhw mm0,[GOTOFF(eax,PW_F0402)] ; mm0=(2*CrL * FIX(0.40200)) - - paddw mm6,[GOTOFF(eax,PW_ONE)] - paddw mm4,[GOTOFF(eax,PW_ONE)] - psraw mm6,1 ; mm6=(CbH * -FIX(0.22800)) - psraw mm4,1 ; mm4=(CbL * -FIX(0.22800)) - paddw mm7,[GOTOFF(eax,PW_ONE)] - paddw mm0,[GOTOFF(eax,PW_ONE)] - psraw mm7,1 ; mm7=(CrH * FIX(0.40200)) - psraw mm0,1 ; mm0=(CrL * FIX(0.40200)) - - paddw mm6,mm5 - paddw mm4,mm2 - paddw mm6,mm5 ; mm6=(CbH * FIX(1.77200))=(B-Y)H - paddw mm4,mm2 ; mm4=(CbL * FIX(1.77200))=(B-Y)L - paddw mm7,mm1 ; mm7=(CrH * FIX(1.40200))=(R-Y)H - paddw mm0,mm3 ; mm0=(CrL * FIX(1.40200))=(R-Y)L - - movq MMWORD [wk(0)], mm6 ; wk(0)=(B-Y)H - movq MMWORD [wk(1)], mm7 ; wk(1)=(R-Y)H - - movq mm6,mm5 - movq mm7,mm2 - punpcklwd mm5,mm1 - punpckhwd mm6,mm1 - pmaddwd mm5,[GOTOFF(eax,PW_MF0344_F0285)] - pmaddwd mm6,[GOTOFF(eax,PW_MF0344_F0285)] - punpcklwd mm2,mm3 - punpckhwd mm7,mm3 - pmaddwd mm2,[GOTOFF(eax,PW_MF0344_F0285)] - pmaddwd mm7,[GOTOFF(eax,PW_MF0344_F0285)] - - paddd mm5,[GOTOFF(eax,PD_ONEHALF)] - paddd mm6,[GOTOFF(eax,PD_ONEHALF)] - psrad mm5,SCALEBITS - psrad mm6,SCALEBITS - paddd mm2,[GOTOFF(eax,PD_ONEHALF)] - paddd mm7,[GOTOFF(eax,PD_ONEHALF)] - psrad mm2,SCALEBITS - psrad mm7,SCALEBITS - - packssdw mm5,mm6 ; mm5=CbH*-FIX(0.344)+CrH*FIX(0.285) - packssdw mm2,mm7 ; mm2=CbL*-FIX(0.344)+CrL*FIX(0.285) - psubw mm5,mm1 ; mm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H - psubw mm2,mm3 ; mm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L - - movq MMWORD [wk(2)], mm5 ; wk(2)=(G-Y)H - - mov al,2 ; Yctr - jmp short .Yloop_1st - alignx 16,7 - -.Yloop_2nd: - movq mm0, MMWORD [wk(1)] ; mm0=(R-Y)H - movq mm2, MMWORD [wk(2)] ; mm2=(G-Y)H - movq mm4, MMWORD [wk(0)] ; mm4=(B-Y)H - alignx 16,7 - -.Yloop_1st: - movq mm7, MMWORD [esi] ; mm7=Y(01234567) - - pcmpeqw mm6,mm6 - psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..} - pand mm6,mm7 ; mm6=Y(0246)=YE - psrlw mm7,BYTE_BIT ; mm7=Y(1357)=YO - - movq mm1,mm0 ; mm1=mm0=(R-Y)(L/H) - movq mm3,mm2 ; mm3=mm2=(G-Y)(L/H) - movq mm5,mm4 ; mm5=mm4=(B-Y)(L/H) - - paddw mm0,mm6 ; mm0=((R-Y)+YE)=RE=(R0 R2 R4 R6) - paddw mm1,mm7 ; mm1=((R-Y)+YO)=RO=(R1 R3 R5 R7) - packuswb mm0,mm0 ; mm0=(R0 R2 R4 R6 ** ** ** **) - packuswb mm1,mm1 ; mm1=(R1 R3 R5 R7 ** ** ** **) - - paddw mm2,mm6 ; mm2=((G-Y)+YE)=GE=(G0 G2 G4 G6) - paddw mm3,mm7 ; mm3=((G-Y)+YO)=GO=(G1 G3 G5 G7) - packuswb mm2,mm2 ; mm2=(G0 G2 G4 G6 ** ** ** **) - packuswb mm3,mm3 ; mm3=(G1 G3 G5 G7 ** ** ** **) - - paddw mm4,mm6 ; mm4=((B-Y)+YE)=BE=(B0 B2 B4 B6) - paddw mm5,mm7 ; mm5=((B-Y)+YO)=BO=(B1 B3 B5 B7) - packuswb mm4,mm4 ; mm4=(B0 B2 B4 B6 ** ** ** **) - packuswb mm5,mm5 ; mm5=(B1 B3 B5 B7 ** ** ** **) - -%if RGB_PIXELSIZE == 3 ; --------------- - - ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) - ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) - ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) - ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **) - - punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16) - punpcklbw mmE,mmB ; mmE=(20 01 22 03 24 05 26 07) - punpcklbw mmD,mmF ; mmD=(11 21 13 23 15 25 17 27) - - movq mmG,mmA - movq mmH,mmA - punpcklwd mmA,mmE ; mmA=(00 10 20 01 02 12 22 03) - punpckhwd mmG,mmE ; mmG=(04 14 24 05 06 16 26 07) - - psrlq mmH,2*BYTE_BIT ; mmH=(02 12 04 14 06 16 -- --) - psrlq mmE,2*BYTE_BIT ; mmE=(22 03 24 05 26 07 -- --) - - movq mmC,mmD - movq mmB,mmD - punpcklwd mmD,mmH ; mmD=(11 21 02 12 13 23 04 14) - punpckhwd mmC,mmH ; mmC=(15 25 06 16 17 27 -- --) - - psrlq mmB,2*BYTE_BIT ; mmB=(13 23 15 25 17 27 -- --) - - movq mmF,mmE - punpcklwd mmE,mmB ; mmE=(22 03 13 23 24 05 15 25) - punpckhwd mmF,mmB ; mmF=(26 07 17 27 -- -- -- --) - - punpckldq mmA,mmD ; mmA=(00 10 20 01 11 21 02 12) - punpckldq mmE,mmG ; mmE=(22 03 13 23 04 14 24 05) - punpckldq mmC,mmF ; mmC=(15 25 06 16 26 07 17 27) - - cmp ecx, byte SIZEOF_MMWORD - jb short .column_st16 - - movq MMWORD [edi+0*SIZEOF_MMWORD], mmA - movq MMWORD [edi+1*SIZEOF_MMWORD], mmE - movq MMWORD [edi+2*SIZEOF_MMWORD], mmC - - sub ecx, byte SIZEOF_MMWORD - jz near .endcolumn - - add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr - add esi, byte SIZEOF_MMWORD ; inptr0 - dec al ; Yctr - jnz near .Yloop_2nd - - add ebx, byte SIZEOF_MMWORD ; inptr1 - add edx, byte SIZEOF_MMWORD ; inptr2 - jmp near .columnloop - alignx 16,7 - -.column_st16: - lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE - cmp ecx, byte 2*SIZEOF_MMWORD - jb short .column_st8 - movq MMWORD [edi+0*SIZEOF_MMWORD], mmA - movq MMWORD [edi+1*SIZEOF_MMWORD], mmE - movq mmA,mmC - sub ecx, byte 2*SIZEOF_MMWORD - add edi, byte 2*SIZEOF_MMWORD - jmp short .column_st4 -.column_st8: - cmp ecx, byte SIZEOF_MMWORD - jb short .column_st4 - movq MMWORD [edi+0*SIZEOF_MMWORD], mmA - movq mmA,mmE - sub ecx, byte SIZEOF_MMWORD - add edi, byte SIZEOF_MMWORD -.column_st4: - movd eax,mmA - cmp ecx, byte SIZEOF_DWORD - jb short .column_st2 - mov DWORD [edi+0*SIZEOF_DWORD], eax - psrlq mmA,DWORD_BIT - movd eax,mmA - sub ecx, byte SIZEOF_DWORD - add edi, byte SIZEOF_DWORD -.column_st2: - cmp ecx, byte SIZEOF_WORD - jb short .column_st1 - mov WORD [edi+0*SIZEOF_WORD], ax - shr eax,WORD_BIT - sub ecx, byte SIZEOF_WORD - add edi, byte SIZEOF_WORD -.column_st1: - cmp ecx, byte SIZEOF_BYTE - jb short .endcolumn - mov BYTE [edi+0*SIZEOF_BYTE], al - -%else ; RGB_PIXELSIZE == 4 ; ----------- - -%ifdef RGBX_FILLER_0XFF - pcmpeqb mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) - pcmpeqb mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) -%else - pxor mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **) - pxor mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **) -%endif - ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) - ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) - ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) - ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **) - - punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16) - punpcklbw mmE,mmG ; mmE=(20 30 22 32 24 34 26 36) - punpcklbw mmB,mmD ; mmB=(01 11 03 13 05 15 07 17) - punpcklbw mmF,mmH ; mmF=(21 31 23 33 25 35 27 37) - - movq mmC,mmA - punpcklwd mmA,mmE ; mmA=(00 10 20 30 02 12 22 32) - punpckhwd mmC,mmE ; mmC=(04 14 24 34 06 16 26 36) - movq mmG,mmB - punpcklwd mmB,mmF ; mmB=(01 11 21 31 03 13 23 33) - punpckhwd mmG,mmF ; mmG=(05 15 25 35 07 17 27 37) - - movq mmD,mmA - punpckldq mmA,mmB ; mmA=(00 10 20 30 01 11 21 31) - punpckhdq mmD,mmB ; mmD=(02 12 22 32 03 13 23 33) - movq mmH,mmC - punpckldq mmC,mmG ; mmC=(04 14 24 34 05 15 25 35) - punpckhdq mmH,mmG ; mmH=(06 16 26 36 07 17 27 37) - - cmp ecx, byte SIZEOF_MMWORD - jb short .column_st16 - - movq MMWORD [edi+0*SIZEOF_MMWORD], mmA - movq MMWORD [edi+1*SIZEOF_MMWORD], mmD - movq MMWORD [edi+2*SIZEOF_MMWORD], mmC - movq MMWORD [edi+3*SIZEOF_MMWORD], mmH - - sub ecx, byte SIZEOF_MMWORD - jz short .endcolumn - - add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr - add esi, byte SIZEOF_MMWORD ; inptr0 - dec al ; Yctr - jnz near .Yloop_2nd - - add ebx, byte SIZEOF_MMWORD ; inptr1 - add edx, byte SIZEOF_MMWORD ; inptr2 - jmp near .columnloop - alignx 16,7 - -.column_st16: - cmp ecx, byte SIZEOF_MMWORD/2 - jb short .column_st8 - movq MMWORD [edi+0*SIZEOF_MMWORD], mmA - movq MMWORD [edi+1*SIZEOF_MMWORD], mmD - movq mmA,mmC - movq mmD,mmH - sub ecx, byte SIZEOF_MMWORD/2 - add edi, byte 2*SIZEOF_MMWORD -.column_st8: - cmp ecx, byte SIZEOF_MMWORD/4 - jb short .column_st4 - movq MMWORD [edi+0*SIZEOF_MMWORD], mmA - movq mmA,mmD - sub ecx, byte SIZEOF_MMWORD/4 - add edi, byte 1*SIZEOF_MMWORD -.column_st4: - cmp ecx, byte SIZEOF_MMWORD/8 - jb short .endcolumn - movd DWORD [edi+0*SIZEOF_DWORD], mmA - -%endif ; RGB_PIXELSIZE ; --------------- - -.endcolumn: - emms ; empty MMX state - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; -------------------------------------------------------------------------- -; -; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. -; -; GLOBAL(void) -; jsimd_h2v2_merged_upsample_mmx (JDIMENSION output_width, -; JSAMPIMAGE input_buf, -; JDIMENSION in_row_group_ctr, -; JSAMPARRAY output_buf); -; - -%define output_width(b) (b)+8 ; JDIMENSION output_width -%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf -%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr -%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf - - align 16 - global EXTN(jsimd_h2v2_merged_upsample_mmx) - -EXTN(jsimd_h2v2_merged_upsample_mmx): - push ebp - mov ebp,esp - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - mov eax, JDIMENSION [output_width(ebp)] - - mov edi, JSAMPIMAGE [input_buf(ebp)] - mov ecx, JDIMENSION [in_row_group_ctr(ebp)] - mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] - mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] - mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] - mov edi, JSAMPARRAY [output_buf(ebp)] - lea esi, [esi+ecx*SIZEOF_JSAMPROW] - - push edx ; inptr2 - push ebx ; inptr1 - push esi ; inptr00 - mov ebx,esp - - push edi ; output_buf (outptr0) - push ecx ; in_row_group_ctr - push ebx ; input_buf - push eax ; output_width - - call near EXTN(jsimd_h2v1_merged_upsample_mmx) - - add esi, byte SIZEOF_JSAMPROW ; inptr01 - add edi, byte SIZEOF_JSAMPROW ; outptr1 - mov POINTER [ebx+0*SIZEOF_POINTER], esi - mov POINTER [ebx-1*SIZEOF_POINTER], edi - - call near EXTN(jsimd_h2v1_merged_upsample_mmx) - - add esp, byte 7*SIZEOF_DWORD - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jdmrgext-sse2-64.asm b/simd/jdmrgext-sse2-64.asm deleted file mode 100644 index ad74c5f..0000000 --- a/simd/jdmrgext-sse2-64.asm +++ /dev/null @@ -1,537 +0,0 @@ -; -; jdmrgext.asm - merged upsampling/color conversion (64-bit SSE2) -; -; Copyright 2009, 2012 Pierre Ossman for Cendio AB -; Copyright (C) 2009, 2012, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jcolsamp.inc" - -; -------------------------------------------------------------------------- -; -; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. -; -; GLOBAL(void) -; jsimd_h2v1_merged_upsample_sse2 (JDIMENSION output_width, -; JSAMPIMAGE input_buf, -; JDIMENSION in_row_group_ctr, -; JSAMPARRAY output_buf); -; - -; r10 = JDIMENSION output_width -; r11 = JSAMPIMAGE input_buf -; r12 = JDIMENSION in_row_group_ctr -; r13 = JSAMPARRAY output_buf - -%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 3 - - align 16 - global EXTN(jsimd_h2v1_merged_upsample_sse2) - -EXTN(jsimd_h2v1_merged_upsample_sse2): - push rbp - mov rax,rsp ; rax = original rbp - sub rsp, byte 4 - and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [rsp],rax - mov rbp,rsp ; rbp = aligned rbp - lea rsp, [wk(0)] - collect_args - push rbx - - mov ecx, r10d ; col - test rcx,rcx - jz near .return - - push rcx - - mov rdi, r11 - mov ecx, r12d - mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] - mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] - mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] - mov rdi, r13 - mov rsi, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0 - mov rbx, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1 - mov rdx, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2 - mov rdi, JSAMPROW [rdi] ; outptr - - pop rcx ; col - -.columnloop: - - movdqa xmm6, XMMWORD [rbx] ; xmm6=Cb(0123456789ABCDEF) - movdqa xmm7, XMMWORD [rdx] ; xmm7=Cr(0123456789ABCDEF) - - pxor xmm1,xmm1 ; xmm1=(all 0's) - pcmpeqw xmm3,xmm3 - psllw xmm3,7 ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..} - - movdqa xmm4,xmm6 - punpckhbw xmm6,xmm1 ; xmm6=Cb(89ABCDEF)=CbH - punpcklbw xmm4,xmm1 ; xmm4=Cb(01234567)=CbL - movdqa xmm0,xmm7 - punpckhbw xmm7,xmm1 ; xmm7=Cr(89ABCDEF)=CrH - punpcklbw xmm0,xmm1 ; xmm0=Cr(01234567)=CrL - - paddw xmm6,xmm3 - paddw xmm4,xmm3 - paddw xmm7,xmm3 - paddw xmm0,xmm3 - - ; (Original) - ; R = Y + 1.40200 * Cr - ; G = Y - 0.34414 * Cb - 0.71414 * Cr - ; B = Y + 1.77200 * Cb - ; - ; (This implementation) - ; R = Y + 0.40200 * Cr + Cr - ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr - ; B = Y - 0.22800 * Cb + Cb + Cb - - movdqa xmm5,xmm6 ; xmm5=CbH - movdqa xmm2,xmm4 ; xmm2=CbL - paddw xmm6,xmm6 ; xmm6=2*CbH - paddw xmm4,xmm4 ; xmm4=2*CbL - movdqa xmm1,xmm7 ; xmm1=CrH - movdqa xmm3,xmm0 ; xmm3=CrL - paddw xmm7,xmm7 ; xmm7=2*CrH - paddw xmm0,xmm0 ; xmm0=2*CrL - - pmulhw xmm6,[rel PW_MF0228] ; xmm6=(2*CbH * -FIX(0.22800)) - pmulhw xmm4,[rel PW_MF0228] ; xmm4=(2*CbL * -FIX(0.22800)) - pmulhw xmm7,[rel PW_F0402] ; xmm7=(2*CrH * FIX(0.40200)) - pmulhw xmm0,[rel PW_F0402] ; xmm0=(2*CrL * FIX(0.40200)) - - paddw xmm6,[rel PW_ONE] - paddw xmm4,[rel PW_ONE] - psraw xmm6,1 ; xmm6=(CbH * -FIX(0.22800)) - psraw xmm4,1 ; xmm4=(CbL * -FIX(0.22800)) - paddw xmm7,[rel PW_ONE] - paddw xmm0,[rel PW_ONE] - psraw xmm7,1 ; xmm7=(CrH * FIX(0.40200)) - psraw xmm0,1 ; xmm0=(CrL * FIX(0.40200)) - - paddw xmm6,xmm5 - paddw xmm4,xmm2 - paddw xmm6,xmm5 ; xmm6=(CbH * FIX(1.77200))=(B-Y)H - paddw xmm4,xmm2 ; xmm4=(CbL * FIX(1.77200))=(B-Y)L - paddw xmm7,xmm1 ; xmm7=(CrH * FIX(1.40200))=(R-Y)H - paddw xmm0,xmm3 ; xmm0=(CrL * FIX(1.40200))=(R-Y)L - - movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=(B-Y)H - movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(R-Y)H - - movdqa xmm6,xmm5 - movdqa xmm7,xmm2 - punpcklwd xmm5,xmm1 - punpckhwd xmm6,xmm1 - pmaddwd xmm5,[rel PW_MF0344_F0285] - pmaddwd xmm6,[rel PW_MF0344_F0285] - punpcklwd xmm2,xmm3 - punpckhwd xmm7,xmm3 - pmaddwd xmm2,[rel PW_MF0344_F0285] - pmaddwd xmm7,[rel PW_MF0344_F0285] - - paddd xmm5,[rel PD_ONEHALF] - paddd xmm6,[rel PD_ONEHALF] - psrad xmm5,SCALEBITS - psrad xmm6,SCALEBITS - paddd xmm2,[rel PD_ONEHALF] - paddd xmm7,[rel PD_ONEHALF] - psrad xmm2,SCALEBITS - psrad xmm7,SCALEBITS - - packssdw xmm5,xmm6 ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285) - packssdw xmm2,xmm7 ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285) - psubw xmm5,xmm1 ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H - psubw xmm2,xmm3 ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L - - movdqa XMMWORD [wk(2)], xmm5 ; wk(2)=(G-Y)H - - mov al,2 ; Yctr - jmp short .Yloop_1st - -.Yloop_2nd: - movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(R-Y)H - movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(G-Y)H - movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(B-Y)H - -.Yloop_1st: - movdqa xmm7, XMMWORD [rsi] ; xmm7=Y(0123456789ABCDEF) - - pcmpeqw xmm6,xmm6 - psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} - pand xmm6,xmm7 ; xmm6=Y(02468ACE)=YE - psrlw xmm7,BYTE_BIT ; xmm7=Y(13579BDF)=YO - - movdqa xmm1,xmm0 ; xmm1=xmm0=(R-Y)(L/H) - movdqa xmm3,xmm2 ; xmm3=xmm2=(G-Y)(L/H) - movdqa xmm5,xmm4 ; xmm5=xmm4=(B-Y)(L/H) - - paddw xmm0,xmm6 ; xmm0=((R-Y)+YE)=RE=R(02468ACE) - paddw xmm1,xmm7 ; xmm1=((R-Y)+YO)=RO=R(13579BDF) - packuswb xmm0,xmm0 ; xmm0=R(02468ACE********) - packuswb xmm1,xmm1 ; xmm1=R(13579BDF********) - - paddw xmm2,xmm6 ; xmm2=((G-Y)+YE)=GE=G(02468ACE) - paddw xmm3,xmm7 ; xmm3=((G-Y)+YO)=GO=G(13579BDF) - packuswb xmm2,xmm2 ; xmm2=G(02468ACE********) - packuswb xmm3,xmm3 ; xmm3=G(13579BDF********) - - paddw xmm4,xmm6 ; xmm4=((B-Y)+YE)=BE=B(02468ACE) - paddw xmm5,xmm7 ; xmm5=((B-Y)+YO)=BO=B(13579BDF) - packuswb xmm4,xmm4 ; xmm4=B(02468ACE********) - packuswb xmm5,xmm5 ; xmm5=B(13579BDF********) - -%if RGB_PIXELSIZE == 3 ; --------------- - - ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) - ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) - ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) - ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) - - punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) - punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) - punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) - - movdqa xmmG,xmmA - movdqa xmmH,xmmA - punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) - punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) - - psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) - psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) - - movdqa xmmC,xmmD - movdqa xmmB,xmmD - punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) - punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) - - psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) - - movdqa xmmF,xmmE - punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) - punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) - - pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) - movdqa xmmB,xmmE - punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) - punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) - punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) - - pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) - movdqa xmmB,xmmF - punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) - punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) - punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) - - punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) - punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) - punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) - - cmp rcx, byte SIZEOF_XMMWORD - jb short .column_st32 - - test rdi, SIZEOF_XMMWORD-1 - jnz short .out1 - ; --(aligned)------------------- - movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA - movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD - movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF - jmp short .out0 -.out1: ; --(unaligned)----------------- - movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA - movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD - movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF -.out0: - add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr - sub rcx, byte SIZEOF_XMMWORD - jz near .endcolumn - - add rsi, byte SIZEOF_XMMWORD ; inptr0 - dec al ; Yctr - jnz near .Yloop_2nd - - add rbx, byte SIZEOF_XMMWORD ; inptr1 - add rdx, byte SIZEOF_XMMWORD ; inptr2 - jmp near .columnloop - -.column_st32: - lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE - cmp rcx, byte 2*SIZEOF_XMMWORD - jb short .column_st16 - movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA - movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD - add rdi, byte 2*SIZEOF_XMMWORD ; outptr - movdqa xmmA,xmmF - sub rcx, byte 2*SIZEOF_XMMWORD - jmp short .column_st15 -.column_st16: - cmp rcx, byte SIZEOF_XMMWORD - jb short .column_st15 - movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA - add rdi, byte SIZEOF_XMMWORD ; outptr - movdqa xmmA,xmmD - sub rcx, byte SIZEOF_XMMWORD -.column_st15: - ; Store the lower 8 bytes of xmmA to the output when it has enough - ; space. - cmp rcx, byte SIZEOF_MMWORD - jb short .column_st7 - movq XMM_MMWORD [rdi], xmmA - add rdi, byte SIZEOF_MMWORD - sub rcx, byte SIZEOF_MMWORD - psrldq xmmA, SIZEOF_MMWORD -.column_st7: - ; Store the lower 4 bytes of xmmA to the output when it has enough - ; space. - cmp rcx, byte SIZEOF_DWORD - jb short .column_st3 - movd XMM_DWORD [rdi], xmmA - add rdi, byte SIZEOF_DWORD - sub rcx, byte SIZEOF_DWORD - psrldq xmmA, SIZEOF_DWORD -.column_st3: - ; Store the lower 2 bytes of rax to the output when it has enough - ; space. - movd eax, xmmA - cmp rcx, byte SIZEOF_WORD - jb short .column_st1 - mov WORD [rdi], ax - add rdi, byte SIZEOF_WORD - sub rcx, byte SIZEOF_WORD - shr rax, 16 -.column_st1: - ; Store the lower 1 byte of rax to the output when it has enough - ; space. - test rcx, rcx - jz short .endcolumn - mov BYTE [rdi], al - -%else ; RGB_PIXELSIZE == 4 ; ----------- - -%ifdef RGBX_FILLER_0XFF - pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) - pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) -%else - pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) - pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) -%endif - ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) - ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) - ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) - ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) - - punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) - punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) - punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) - punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) - - movdqa xmmC,xmmA - punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) - punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) - movdqa xmmG,xmmB - punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) - punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) - - movdqa xmmD,xmmA - punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) - punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) - movdqa xmmH,xmmC - punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) - punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) - - cmp rcx, byte SIZEOF_XMMWORD - jb short .column_st32 - - test rdi, SIZEOF_XMMWORD-1 - jnz short .out1 - ; --(aligned)------------------- - movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA - movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD - movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC - movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH - jmp short .out0 -.out1: ; --(unaligned)----------------- - movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA - movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD - movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC - movdqu XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH -.out0: - add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr - sub rcx, byte SIZEOF_XMMWORD - jz near .endcolumn - - add rsi, byte SIZEOF_XMMWORD ; inptr0 - dec al ; Yctr - jnz near .Yloop_2nd - - add rbx, byte SIZEOF_XMMWORD ; inptr1 - add rdx, byte SIZEOF_XMMWORD ; inptr2 - jmp near .columnloop - -.column_st32: - cmp rcx, byte SIZEOF_XMMWORD/2 - jb short .column_st16 - movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA - movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD - add rdi, byte 2*SIZEOF_XMMWORD ; outptr - movdqa xmmA,xmmC - movdqa xmmD,xmmH - sub rcx, byte SIZEOF_XMMWORD/2 -.column_st16: - cmp rcx, byte SIZEOF_XMMWORD/4 - jb short .column_st15 - movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA - add rdi, byte SIZEOF_XMMWORD ; outptr - movdqa xmmA,xmmD - sub rcx, byte SIZEOF_XMMWORD/4 -.column_st15: - ; Store two pixels (8 bytes) of xmmA to the output when it has enough - ; space. - cmp rcx, byte SIZEOF_XMMWORD/8 - jb short .column_st7 - movq XMM_MMWORD [rdi], xmmA - add rdi, byte SIZEOF_XMMWORD/8*4 - sub rcx, byte SIZEOF_XMMWORD/8 - psrldq xmmA, SIZEOF_XMMWORD/8*4 -.column_st7: - ; Store one pixel (4 bytes) of xmmA to the output when it has enough - ; space. - test rcx, rcx - jz short .endcolumn - movd XMM_DWORD [rdi], xmmA - -%endif ; RGB_PIXELSIZE ; --------------- - -.endcolumn: - sfence ; flush the write buffer - -.return: - pop rbx - uncollect_args - mov rsp,rbp ; rsp <- aligned rbp - pop rsp ; rsp <- original rbp - pop rbp - ret - -; -------------------------------------------------------------------------- -; -; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. -; -; GLOBAL(void) -; jsimd_h2v2_merged_upsample_sse2 (JDIMENSION output_width, -; JSAMPIMAGE input_buf, -; JDIMENSION in_row_group_ctr, -; JSAMPARRAY output_buf); -; - -; r10 = JDIMENSION output_width -; r11 = JSAMPIMAGE input_buf -; r12 = JDIMENSION in_row_group_ctr -; r13 = JSAMPARRAY output_buf - - align 16 - global EXTN(jsimd_h2v2_merged_upsample_sse2) - -EXTN(jsimd_h2v2_merged_upsample_sse2): - push rbp - mov rax,rsp - mov rbp,rsp - collect_args - push rbx - - mov eax, r10d - - mov rdi, r11 - mov ecx, r12d - mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] - mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] - mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] - mov rdi, r13 - lea rsi, [rsi+rcx*SIZEOF_JSAMPROW] - - push rdx ; inptr2 - push rbx ; inptr1 - push rsi ; inptr00 - mov rbx,rsp - - push rdi - push rcx - push rax - - %ifdef WIN64 - mov r8, rcx - mov r9, rdi - mov rcx, rax - mov rdx, rbx - %else - mov rdx, rcx - mov rcx, rdi - mov rdi, rax - mov rsi, rbx - %endif - - call EXTN(jsimd_h2v1_merged_upsample_sse2) - - pop rax - pop rcx - pop rdi - pop rsi - pop rbx - pop rdx - - add rdi, byte SIZEOF_JSAMPROW ; outptr1 - add rsi, byte SIZEOF_JSAMPROW ; inptr01 - - push rdx ; inptr2 - push rbx ; inptr1 - push rsi ; inptr00 - mov rbx,rsp - - push rdi - push rcx - push rax - - %ifdef WIN64 - mov r8, rcx - mov r9, rdi - mov rcx, rax - mov rdx, rbx - %else - mov rdx, rcx - mov rcx, rdi - mov rdi, rax - mov rsi, rbx - %endif - - call EXTN(jsimd_h2v1_merged_upsample_sse2) - - pop rax - pop rcx - pop rdi - pop rsi - pop rbx - pop rdx - - pop rbx - uncollect_args - pop rbp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jdmrgext-sse2.asm b/simd/jdmrgext-sse2.asm deleted file mode 100644 index b50f698..0000000 --- a/simd/jdmrgext-sse2.asm +++ /dev/null @@ -1,518 +0,0 @@ -; -; jdmrgext.asm - merged upsampling/color conversion (SSE2) -; -; Copyright 2009, 2012 Pierre Ossman for Cendio AB -; Copyright (C) 2012, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jcolsamp.inc" - -; -------------------------------------------------------------------------- -; -; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. -; -; GLOBAL(void) -; jsimd_h2v1_merged_upsample_sse2 (JDIMENSION output_width, -; JSAMPIMAGE input_buf, -; JDIMENSION in_row_group_ctr, -; JSAMPARRAY output_buf); -; - -%define output_width(b) (b)+8 ; JDIMENSION output_width -%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf -%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr -%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 3 -%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr - - align 16 - global EXTN(jsimd_h2v1_merged_upsample_sse2) - -EXTN(jsimd_h2v1_merged_upsample_sse2): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic eax ; make a room for GOT address - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - movpic POINTER [gotptr], ebx ; save GOT address - - mov ecx, JDIMENSION [output_width(eax)] ; col - test ecx,ecx - jz near .return - - push ecx - - mov edi, JSAMPIMAGE [input_buf(eax)] - mov ecx, JDIMENSION [in_row_group_ctr(eax)] - mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] - mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] - mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] - mov edi, JSAMPARRAY [output_buf(eax)] - mov esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW] ; inptr0 - mov ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW] ; inptr1 - mov edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW] ; inptr2 - mov edi, JSAMPROW [edi] ; outptr - - pop ecx ; col - - alignx 16,7 -.columnloop: - movpic eax, POINTER [gotptr] ; load GOT address (eax) - - movdqa xmm6, XMMWORD [ebx] ; xmm6=Cb(0123456789ABCDEF) - movdqa xmm7, XMMWORD [edx] ; xmm7=Cr(0123456789ABCDEF) - - pxor xmm1,xmm1 ; xmm1=(all 0's) - pcmpeqw xmm3,xmm3 - psllw xmm3,7 ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..} - - movdqa xmm4,xmm6 - punpckhbw xmm6,xmm1 ; xmm6=Cb(89ABCDEF)=CbH - punpcklbw xmm4,xmm1 ; xmm4=Cb(01234567)=CbL - movdqa xmm0,xmm7 - punpckhbw xmm7,xmm1 ; xmm7=Cr(89ABCDEF)=CrH - punpcklbw xmm0,xmm1 ; xmm0=Cr(01234567)=CrL - - paddw xmm6,xmm3 - paddw xmm4,xmm3 - paddw xmm7,xmm3 - paddw xmm0,xmm3 - - ; (Original) - ; R = Y + 1.40200 * Cr - ; G = Y - 0.34414 * Cb - 0.71414 * Cr - ; B = Y + 1.77200 * Cb - ; - ; (This implementation) - ; R = Y + 0.40200 * Cr + Cr - ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr - ; B = Y - 0.22800 * Cb + Cb + Cb - - movdqa xmm5,xmm6 ; xmm5=CbH - movdqa xmm2,xmm4 ; xmm2=CbL - paddw xmm6,xmm6 ; xmm6=2*CbH - paddw xmm4,xmm4 ; xmm4=2*CbL - movdqa xmm1,xmm7 ; xmm1=CrH - movdqa xmm3,xmm0 ; xmm3=CrL - paddw xmm7,xmm7 ; xmm7=2*CrH - paddw xmm0,xmm0 ; xmm0=2*CrL - - pmulhw xmm6,[GOTOFF(eax,PW_MF0228)] ; xmm6=(2*CbH * -FIX(0.22800)) - pmulhw xmm4,[GOTOFF(eax,PW_MF0228)] ; xmm4=(2*CbL * -FIX(0.22800)) - pmulhw xmm7,[GOTOFF(eax,PW_F0402)] ; xmm7=(2*CrH * FIX(0.40200)) - pmulhw xmm0,[GOTOFF(eax,PW_F0402)] ; xmm0=(2*CrL * FIX(0.40200)) - - paddw xmm6,[GOTOFF(eax,PW_ONE)] - paddw xmm4,[GOTOFF(eax,PW_ONE)] - psraw xmm6,1 ; xmm6=(CbH * -FIX(0.22800)) - psraw xmm4,1 ; xmm4=(CbL * -FIX(0.22800)) - paddw xmm7,[GOTOFF(eax,PW_ONE)] - paddw xmm0,[GOTOFF(eax,PW_ONE)] - psraw xmm7,1 ; xmm7=(CrH * FIX(0.40200)) - psraw xmm0,1 ; xmm0=(CrL * FIX(0.40200)) - - paddw xmm6,xmm5 - paddw xmm4,xmm2 - paddw xmm6,xmm5 ; xmm6=(CbH * FIX(1.77200))=(B-Y)H - paddw xmm4,xmm2 ; xmm4=(CbL * FIX(1.77200))=(B-Y)L - paddw xmm7,xmm1 ; xmm7=(CrH * FIX(1.40200))=(R-Y)H - paddw xmm0,xmm3 ; xmm0=(CrL * FIX(1.40200))=(R-Y)L - - movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=(B-Y)H - movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(R-Y)H - - movdqa xmm6,xmm5 - movdqa xmm7,xmm2 - punpcklwd xmm5,xmm1 - punpckhwd xmm6,xmm1 - pmaddwd xmm5,[GOTOFF(eax,PW_MF0344_F0285)] - pmaddwd xmm6,[GOTOFF(eax,PW_MF0344_F0285)] - punpcklwd xmm2,xmm3 - punpckhwd xmm7,xmm3 - pmaddwd xmm2,[GOTOFF(eax,PW_MF0344_F0285)] - pmaddwd xmm7,[GOTOFF(eax,PW_MF0344_F0285)] - - paddd xmm5,[GOTOFF(eax,PD_ONEHALF)] - paddd xmm6,[GOTOFF(eax,PD_ONEHALF)] - psrad xmm5,SCALEBITS - psrad xmm6,SCALEBITS - paddd xmm2,[GOTOFF(eax,PD_ONEHALF)] - paddd xmm7,[GOTOFF(eax,PD_ONEHALF)] - psrad xmm2,SCALEBITS - psrad xmm7,SCALEBITS - - packssdw xmm5,xmm6 ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285) - packssdw xmm2,xmm7 ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285) - psubw xmm5,xmm1 ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H - psubw xmm2,xmm3 ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L - - movdqa XMMWORD [wk(2)], xmm5 ; wk(2)=(G-Y)H - - mov al,2 ; Yctr - jmp short .Yloop_1st - alignx 16,7 - -.Yloop_2nd: - movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(R-Y)H - movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(G-Y)H - movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(B-Y)H - alignx 16,7 - -.Yloop_1st: - movdqa xmm7, XMMWORD [esi] ; xmm7=Y(0123456789ABCDEF) - - pcmpeqw xmm6,xmm6 - psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} - pand xmm6,xmm7 ; xmm6=Y(02468ACE)=YE - psrlw xmm7,BYTE_BIT ; xmm7=Y(13579BDF)=YO - - movdqa xmm1,xmm0 ; xmm1=xmm0=(R-Y)(L/H) - movdqa xmm3,xmm2 ; xmm3=xmm2=(G-Y)(L/H) - movdqa xmm5,xmm4 ; xmm5=xmm4=(B-Y)(L/H) - - paddw xmm0,xmm6 ; xmm0=((R-Y)+YE)=RE=R(02468ACE) - paddw xmm1,xmm7 ; xmm1=((R-Y)+YO)=RO=R(13579BDF) - packuswb xmm0,xmm0 ; xmm0=R(02468ACE********) - packuswb xmm1,xmm1 ; xmm1=R(13579BDF********) - - paddw xmm2,xmm6 ; xmm2=((G-Y)+YE)=GE=G(02468ACE) - paddw xmm3,xmm7 ; xmm3=((G-Y)+YO)=GO=G(13579BDF) - packuswb xmm2,xmm2 ; xmm2=G(02468ACE********) - packuswb xmm3,xmm3 ; xmm3=G(13579BDF********) - - paddw xmm4,xmm6 ; xmm4=((B-Y)+YE)=BE=B(02468ACE) - paddw xmm5,xmm7 ; xmm5=((B-Y)+YO)=BO=B(13579BDF) - packuswb xmm4,xmm4 ; xmm4=B(02468ACE********) - packuswb xmm5,xmm5 ; xmm5=B(13579BDF********) - -%if RGB_PIXELSIZE == 3 ; --------------- - - ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) - ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) - ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) - ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) - - punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) - punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) - punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) - - movdqa xmmG,xmmA - movdqa xmmH,xmmA - punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) - punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) - - psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) - psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) - - movdqa xmmC,xmmD - movdqa xmmB,xmmD - punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) - punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) - - psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) - - movdqa xmmF,xmmE - punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) - punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) - - pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) - movdqa xmmB,xmmE - punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) - punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) - punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) - - pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) - movdqa xmmB,xmmF - punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) - punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) - punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) - - punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) - punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) - punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) - - cmp ecx, byte SIZEOF_XMMWORD - jb short .column_st32 - - test edi, SIZEOF_XMMWORD-1 - jnz short .out1 - ; --(aligned)------------------- - movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA - movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD - movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF - jmp short .out0 -.out1: ; --(unaligned)----------------- - movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA - movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD - movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF -.out0: - add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr - sub ecx, byte SIZEOF_XMMWORD - jz near .endcolumn - - add esi, byte SIZEOF_XMMWORD ; inptr0 - dec al ; Yctr - jnz near .Yloop_2nd - - add ebx, byte SIZEOF_XMMWORD ; inptr1 - add edx, byte SIZEOF_XMMWORD ; inptr2 - jmp near .columnloop - alignx 16,7 - -.column_st32: - lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE - cmp ecx, byte 2*SIZEOF_XMMWORD - jb short .column_st16 - movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA - movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD - add edi, byte 2*SIZEOF_XMMWORD ; outptr - movdqa xmmA,xmmF - sub ecx, byte 2*SIZEOF_XMMWORD - jmp short .column_st15 -.column_st16: - cmp ecx, byte SIZEOF_XMMWORD - jb short .column_st15 - movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA - add edi, byte SIZEOF_XMMWORD ; outptr - movdqa xmmA,xmmD - sub ecx, byte SIZEOF_XMMWORD -.column_st15: - ; Store the lower 8 bytes of xmmA to the output when it has enough - ; space. - cmp ecx, byte SIZEOF_MMWORD - jb short .column_st7 - movq XMM_MMWORD [edi], xmmA - add edi, byte SIZEOF_MMWORD - sub ecx, byte SIZEOF_MMWORD - psrldq xmmA, SIZEOF_MMWORD -.column_st7: - ; Store the lower 4 bytes of xmmA to the output when it has enough - ; space. - cmp ecx, byte SIZEOF_DWORD - jb short .column_st3 - movd XMM_DWORD [edi], xmmA - add edi, byte SIZEOF_DWORD - sub ecx, byte SIZEOF_DWORD - psrldq xmmA, SIZEOF_DWORD -.column_st3: - ; Store the lower 2 bytes of eax to the output when it has enough - ; space. - movd eax, xmmA - cmp ecx, byte SIZEOF_WORD - jb short .column_st1 - mov WORD [edi], ax - add edi, byte SIZEOF_WORD - sub ecx, byte SIZEOF_WORD - shr eax, 16 -.column_st1: - ; Store the lower 1 byte of eax to the output when it has enough - ; space. - test ecx, ecx - jz short .endcolumn - mov BYTE [edi], al - -%else ; RGB_PIXELSIZE == 4 ; ----------- - -%ifdef RGBX_FILLER_0XFF - pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********) - pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********) -%else - pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********) - pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********) -%endif - ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) - ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) - ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) - ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) - - punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) - punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) - punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) - punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) - - movdqa xmmC,xmmA - punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) - punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) - movdqa xmmG,xmmB - punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) - punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) - - movdqa xmmD,xmmA - punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) - punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) - movdqa xmmH,xmmC - punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) - punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) - - cmp ecx, byte SIZEOF_XMMWORD - jb short .column_st32 - - test edi, SIZEOF_XMMWORD-1 - jnz short .out1 - ; --(aligned)------------------- - movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA - movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD - movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC - movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH - jmp short .out0 -.out1: ; --(unaligned)----------------- - movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA - movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD - movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC - movdqu XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH -.out0: - add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr - sub ecx, byte SIZEOF_XMMWORD - jz near .endcolumn - - add esi, byte SIZEOF_XMMWORD ; inptr0 - dec al ; Yctr - jnz near .Yloop_2nd - - add ebx, byte SIZEOF_XMMWORD ; inptr1 - add edx, byte SIZEOF_XMMWORD ; inptr2 - jmp near .columnloop - alignx 16,7 - -.column_st32: - cmp ecx, byte SIZEOF_XMMWORD/2 - jb short .column_st16 - movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA - movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD - add edi, byte 2*SIZEOF_XMMWORD ; outptr - movdqa xmmA,xmmC - movdqa xmmD,xmmH - sub ecx, byte SIZEOF_XMMWORD/2 -.column_st16: - cmp ecx, byte SIZEOF_XMMWORD/4 - jb short .column_st15 - movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA - add edi, byte SIZEOF_XMMWORD ; outptr - movdqa xmmA,xmmD - sub ecx, byte SIZEOF_XMMWORD/4 -.column_st15: - ; Store two pixels (8 bytes) of xmmA to the output when it has enough - ; space. - cmp ecx, byte SIZEOF_XMMWORD/8 - jb short .column_st7 - movq XMM_MMWORD [edi], xmmA - add edi, byte SIZEOF_XMMWORD/8*4 - sub ecx, byte SIZEOF_XMMWORD/8 - psrldq xmmA, SIZEOF_XMMWORD/8*4 -.column_st7: - ; Store one pixel (4 bytes) of xmmA to the output when it has enough - ; space. - test ecx, ecx - jz short .endcolumn - movd XMM_DWORD [edi], xmmA - -%endif ; RGB_PIXELSIZE ; --------------- - -.endcolumn: - sfence ; flush the write buffer - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; -------------------------------------------------------------------------- -; -; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. -; -; GLOBAL(void) -; jsimd_h2v2_merged_upsample_sse2 (JDIMENSION output_width, -; JSAMPIMAGE input_buf, -; JDIMENSION in_row_group_ctr, -; JSAMPARRAY output_buf); -; - -%define output_width(b) (b)+8 ; JDIMENSION output_width -%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf -%define in_row_group_ctr(b) (b)+16 ; JDIMENSION in_row_group_ctr -%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf - - align 16 - global EXTN(jsimd_h2v2_merged_upsample_sse2) - -EXTN(jsimd_h2v2_merged_upsample_sse2): - push ebp - mov ebp,esp - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - mov eax, POINTER [output_width(ebp)] - - mov edi, JSAMPIMAGE [input_buf(ebp)] - mov ecx, JDIMENSION [in_row_group_ctr(ebp)] - mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY] - mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY] - mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY] - mov edi, JSAMPARRAY [output_buf(ebp)] - lea esi, [esi+ecx*SIZEOF_JSAMPROW] - - push edx ; inptr2 - push ebx ; inptr1 - push esi ; inptr00 - mov ebx,esp - - push edi ; output_buf (outptr0) - push ecx ; in_row_group_ctr - push ebx ; input_buf - push eax ; output_width - - call near EXTN(jsimd_h2v1_merged_upsample_sse2) - - add esi, byte SIZEOF_JSAMPROW ; inptr01 - add edi, byte SIZEOF_JSAMPROW ; outptr1 - mov POINTER [ebx+0*SIZEOF_POINTER], esi - mov POINTER [ebx-1*SIZEOF_POINTER], edi - - call near EXTN(jsimd_h2v1_merged_upsample_sse2) - - add esp, byte 7*SIZEOF_DWORD - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jdsample-mmx.asm b/simd/jdsample-mmx.asm deleted file mode 100644 index 5e4fa7a..0000000 --- a/simd/jdsample-mmx.asm +++ /dev/null @@ -1,736 +0,0 @@ -; -; jdsample.asm - upsampling (MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_fancy_upsample_mmx) - -EXTN(jconst_fancy_upsample_mmx): - -PW_ONE times 4 dw 1 -PW_TWO times 4 dw 2 -PW_THREE times 4 dw 3 -PW_SEVEN times 4 dw 7 -PW_EIGHT times 4 dw 8 - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. -; -; The upsampling algorithm is linear interpolation between pixel centers, -; also known as a "triangle filter". This is a good compromise between -; speed and visual quality. The centers of the output pixels are 1/4 and 3/4 -; of the way between input pixel centers. -; -; GLOBAL(void) -; jsimd_h2v1_fancy_upsample_mmx (int max_v_samp_factor, -; JDIMENSION downsampled_width, -; JSAMPARRAY input_data, -; JSAMPARRAY *output_data_ptr); -; - -%define max_v_samp(b) (b)+8 ; int max_v_samp_factor -%define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width -%define input_data(b) (b)+16 ; JSAMPARRAY input_data -%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr - - align 16 - global EXTN(jsimd_h2v1_fancy_upsample_mmx) - -EXTN(jsimd_h2v1_fancy_upsample_mmx): - push ebp - mov ebp,esp - pushpic ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - - mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr - test eax,eax - jz near .return - - mov ecx, INT [max_v_samp(ebp)] ; rowctr - test ecx,ecx - jz near .return - - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - mov edi, POINTER [output_data_ptr(ebp)] - mov edi, JSAMPARRAY [edi] ; output_data - alignx 16,7 -.rowloop: - push eax ; colctr - push edi - push esi - - mov esi, JSAMPROW [esi] ; inptr - mov edi, JSAMPROW [edi] ; outptr - - test eax, SIZEOF_MMWORD-1 - jz short .skip - mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] - mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample -.skip: - pxor mm0,mm0 ; mm0=(all 0's) - pcmpeqb mm7,mm7 - psrlq mm7,(SIZEOF_MMWORD-1)*BYTE_BIT - pand mm7, MMWORD [esi+0*SIZEOF_MMWORD] - - add eax, byte SIZEOF_MMWORD-1 - and eax, byte -SIZEOF_MMWORD - cmp eax, byte SIZEOF_MMWORD - ja short .columnloop - alignx 16,7 - -.columnloop_last: - pcmpeqb mm6,mm6 - psllq mm6,(SIZEOF_MMWORD-1)*BYTE_BIT - pand mm6, MMWORD [esi+0*SIZEOF_MMWORD] - jmp short .upsample - alignx 16,7 - -.columnloop: - movq mm6, MMWORD [esi+1*SIZEOF_MMWORD] - psllq mm6,(SIZEOF_MMWORD-1)*BYTE_BIT - -.upsample: - movq mm1, MMWORD [esi+0*SIZEOF_MMWORD] - movq mm2,mm1 - movq mm3,mm1 ; mm1=( 0 1 2 3 4 5 6 7) - psllq mm2,BYTE_BIT ; mm2=( - 0 1 2 3 4 5 6) - psrlq mm3,BYTE_BIT ; mm3=( 1 2 3 4 5 6 7 -) - - por mm2,mm7 ; mm2=(-1 0 1 2 3 4 5 6) - por mm3,mm6 ; mm3=( 1 2 3 4 5 6 7 8) - - movq mm7,mm1 - psrlq mm7,(SIZEOF_MMWORD-1)*BYTE_BIT ; mm7=( 7 - - - - - - -) - - movq mm4,mm1 - punpcklbw mm1,mm0 ; mm1=( 0 1 2 3) - punpckhbw mm4,mm0 ; mm4=( 4 5 6 7) - movq mm5,mm2 - punpcklbw mm2,mm0 ; mm2=(-1 0 1 2) - punpckhbw mm5,mm0 ; mm5=( 3 4 5 6) - movq mm6,mm3 - punpcklbw mm3,mm0 ; mm3=( 1 2 3 4) - punpckhbw mm6,mm0 ; mm6=( 5 6 7 8) - - pmullw mm1,[GOTOFF(ebx,PW_THREE)] - pmullw mm4,[GOTOFF(ebx,PW_THREE)] - paddw mm2,[GOTOFF(ebx,PW_ONE)] - paddw mm5,[GOTOFF(ebx,PW_ONE)] - paddw mm3,[GOTOFF(ebx,PW_TWO)] - paddw mm6,[GOTOFF(ebx,PW_TWO)] - - paddw mm2,mm1 - paddw mm5,mm4 - psrlw mm2,2 ; mm2=OutLE=( 0 2 4 6) - psrlw mm5,2 ; mm5=OutHE=( 8 10 12 14) - paddw mm3,mm1 - paddw mm6,mm4 - psrlw mm3,2 ; mm3=OutLO=( 1 3 5 7) - psrlw mm6,2 ; mm6=OutHO=( 9 11 13 15) - - psllw mm3,BYTE_BIT - psllw mm6,BYTE_BIT - por mm2,mm3 ; mm2=OutL=( 0 1 2 3 4 5 6 7) - por mm5,mm6 ; mm5=OutH=( 8 9 10 11 12 13 14 15) - - movq MMWORD [edi+0*SIZEOF_MMWORD], mm2 - movq MMWORD [edi+1*SIZEOF_MMWORD], mm5 - - sub eax, byte SIZEOF_MMWORD - add esi, byte 1*SIZEOF_MMWORD ; inptr - add edi, byte 2*SIZEOF_MMWORD ; outptr - cmp eax, byte SIZEOF_MMWORD - ja near .columnloop - test eax,eax - jnz near .columnloop_last - - pop esi - pop edi - pop eax - - add esi, byte SIZEOF_JSAMPROW ; input_data - add edi, byte SIZEOF_JSAMPROW ; output_data - dec ecx ; rowctr - jg near .rowloop - - emms ; empty MMX state - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - poppic ebx - pop ebp - ret - -; -------------------------------------------------------------------------- -; -; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. -; Again a triangle filter; see comments for h2v1 case, above. -; -; GLOBAL(void) -; jsimd_h2v2_fancy_upsample_mmx (int max_v_samp_factor, -; JDIMENSION downsampled_width, -; JSAMPARRAY input_data, -; JSAMPARRAY *output_data_ptr); -; - -%define max_v_samp(b) (b)+8 ; int max_v_samp_factor -%define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width -%define input_data(b) (b)+16 ; JSAMPARRAY input_data -%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] -%define WK_NUM 4 -%define gotptr wk(0)-SIZEOF_POINTER ; void *gotptr - - align 16 - global EXTN(jsimd_h2v2_fancy_upsample_mmx) - -EXTN(jsimd_h2v2_fancy_upsample_mmx): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic eax ; make a room for GOT address - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - movpic POINTER [gotptr], ebx ; save GOT address - - mov edx,eax ; edx = original ebp - mov eax, JDIMENSION [downsamp_width(edx)] ; colctr - test eax,eax - jz near .return - - mov ecx, INT [max_v_samp(edx)] ; rowctr - test ecx,ecx - jz near .return - - mov esi, JSAMPARRAY [input_data(edx)] ; input_data - mov edi, POINTER [output_data_ptr(edx)] - mov edi, JSAMPARRAY [edi] ; output_data - alignx 16,7 -.rowloop: - push eax ; colctr - push ecx - push edi - push esi - - mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above) - mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 - mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1(below) - mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 - mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 - - test eax, SIZEOF_MMWORD-1 - jz short .skip - push edx - mov dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE] - mov JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl - mov dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE] - mov JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl - mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] - mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample - pop edx -.skip: - ; -- process the first column block - - movq mm0, MMWORD [ebx+0*SIZEOF_MMWORD] ; mm0=row[ 0][0] - movq mm1, MMWORD [ecx+0*SIZEOF_MMWORD] ; mm1=row[-1][0] - movq mm2, MMWORD [esi+0*SIZEOF_MMWORD] ; mm2=row[+1][0] - - pushpic ebx - movpic ebx, POINTER [gotptr] ; load GOT address - - pxor mm3,mm3 ; mm3=(all 0's) - movq mm4,mm0 - punpcklbw mm0,mm3 ; mm0=row[ 0][0]( 0 1 2 3) - punpckhbw mm4,mm3 ; mm4=row[ 0][0]( 4 5 6 7) - movq mm5,mm1 - punpcklbw mm1,mm3 ; mm1=row[-1][0]( 0 1 2 3) - punpckhbw mm5,mm3 ; mm5=row[-1][0]( 4 5 6 7) - movq mm6,mm2 - punpcklbw mm2,mm3 ; mm2=row[+1][0]( 0 1 2 3) - punpckhbw mm6,mm3 ; mm6=row[+1][0]( 4 5 6 7) - - pmullw mm0,[GOTOFF(ebx,PW_THREE)] - pmullw mm4,[GOTOFF(ebx,PW_THREE)] - - pcmpeqb mm7,mm7 - psrlq mm7,(SIZEOF_MMWORD-2)*BYTE_BIT - - paddw mm1,mm0 ; mm1=Int0L=( 0 1 2 3) - paddw mm5,mm4 ; mm5=Int0H=( 4 5 6 7) - paddw mm2,mm0 ; mm2=Int1L=( 0 1 2 3) - paddw mm6,mm4 ; mm6=Int1H=( 4 5 6 7) - - movq MMWORD [edx+0*SIZEOF_MMWORD], mm1 ; temporarily save - movq MMWORD [edx+1*SIZEOF_MMWORD], mm5 ; the intermediate data - movq MMWORD [edi+0*SIZEOF_MMWORD], mm2 - movq MMWORD [edi+1*SIZEOF_MMWORD], mm6 - - pand mm1,mm7 ; mm1=( 0 - - -) - pand mm2,mm7 ; mm2=( 0 - - -) - - movq MMWORD [wk(0)], mm1 - movq MMWORD [wk(1)], mm2 - - poppic ebx - - add eax, byte SIZEOF_MMWORD-1 - and eax, byte -SIZEOF_MMWORD - cmp eax, byte SIZEOF_MMWORD - ja short .columnloop - alignx 16,7 - -.columnloop_last: - ; -- process the last column block - - pushpic ebx - movpic ebx, POINTER [gotptr] ; load GOT address - - pcmpeqb mm1,mm1 - psllq mm1,(SIZEOF_MMWORD-2)*BYTE_BIT - movq mm2,mm1 - - pand mm1, MMWORD [edx+1*SIZEOF_MMWORD] ; mm1=( - - - 7) - pand mm2, MMWORD [edi+1*SIZEOF_MMWORD] ; mm2=( - - - 7) - - movq MMWORD [wk(2)], mm1 - movq MMWORD [wk(3)], mm2 - - jmp short .upsample - alignx 16,7 - -.columnloop: - ; -- process the next column block - - movq mm0, MMWORD [ebx+1*SIZEOF_MMWORD] ; mm0=row[ 0][1] - movq mm1, MMWORD [ecx+1*SIZEOF_MMWORD] ; mm1=row[-1][1] - movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] ; mm2=row[+1][1] - - pushpic ebx - movpic ebx, POINTER [gotptr] ; load GOT address - - pxor mm3,mm3 ; mm3=(all 0's) - movq mm4,mm0 - punpcklbw mm0,mm3 ; mm0=row[ 0][1]( 0 1 2 3) - punpckhbw mm4,mm3 ; mm4=row[ 0][1]( 4 5 6 7) - movq mm5,mm1 - punpcklbw mm1,mm3 ; mm1=row[-1][1]( 0 1 2 3) - punpckhbw mm5,mm3 ; mm5=row[-1][1]( 4 5 6 7) - movq mm6,mm2 - punpcklbw mm2,mm3 ; mm2=row[+1][1]( 0 1 2 3) - punpckhbw mm6,mm3 ; mm6=row[+1][1]( 4 5 6 7) - - pmullw mm0,[GOTOFF(ebx,PW_THREE)] - pmullw mm4,[GOTOFF(ebx,PW_THREE)] - - paddw mm1,mm0 ; mm1=Int0L=( 0 1 2 3) - paddw mm5,mm4 ; mm5=Int0H=( 4 5 6 7) - paddw mm2,mm0 ; mm2=Int1L=( 0 1 2 3) - paddw mm6,mm4 ; mm6=Int1H=( 4 5 6 7) - - movq MMWORD [edx+2*SIZEOF_MMWORD], mm1 ; temporarily save - movq MMWORD [edx+3*SIZEOF_MMWORD], mm5 ; the intermediate data - movq MMWORD [edi+2*SIZEOF_MMWORD], mm2 - movq MMWORD [edi+3*SIZEOF_MMWORD], mm6 - - psllq mm1,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm1=( - - - 0) - psllq mm2,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm2=( - - - 0) - - movq MMWORD [wk(2)], mm1 - movq MMWORD [wk(3)], mm2 - -.upsample: - ; -- process the upper row - - movq mm7, MMWORD [edx+0*SIZEOF_MMWORD] ; mm7=Int0L=( 0 1 2 3) - movq mm3, MMWORD [edx+1*SIZEOF_MMWORD] ; mm3=Int0H=( 4 5 6 7) - - movq mm0,mm7 - movq mm4,mm3 - psrlq mm0,2*BYTE_BIT ; mm0=( 1 2 3 -) - psllq mm4,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm4=( - - - 4) - movq mm5,mm7 - movq mm6,mm3 - psrlq mm5,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm5=( 3 - - -) - psllq mm6,2*BYTE_BIT ; mm6=( - 4 5 6) - - por mm0,mm4 ; mm0=( 1 2 3 4) - por mm5,mm6 ; mm5=( 3 4 5 6) - - movq mm1,mm7 - movq mm2,mm3 - psllq mm1,2*BYTE_BIT ; mm1=( - 0 1 2) - psrlq mm2,2*BYTE_BIT ; mm2=( 5 6 7 -) - movq mm4,mm3 - psrlq mm4,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm4=( 7 - - -) - - por mm1, MMWORD [wk(0)] ; mm1=(-1 0 1 2) - por mm2, MMWORD [wk(2)] ; mm2=( 5 6 7 8) - - movq MMWORD [wk(0)], mm4 - - pmullw mm7,[GOTOFF(ebx,PW_THREE)] - pmullw mm3,[GOTOFF(ebx,PW_THREE)] - paddw mm1,[GOTOFF(ebx,PW_EIGHT)] - paddw mm5,[GOTOFF(ebx,PW_EIGHT)] - paddw mm0,[GOTOFF(ebx,PW_SEVEN)] - paddw mm2,[GOTOFF(ebx,PW_SEVEN)] - - paddw mm1,mm7 - paddw mm5,mm3 - psrlw mm1,4 ; mm1=Out0LE=( 0 2 4 6) - psrlw mm5,4 ; mm5=Out0HE=( 8 10 12 14) - paddw mm0,mm7 - paddw mm2,mm3 - psrlw mm0,4 ; mm0=Out0LO=( 1 3 5 7) - psrlw mm2,4 ; mm2=Out0HO=( 9 11 13 15) - - psllw mm0,BYTE_BIT - psllw mm2,BYTE_BIT - por mm1,mm0 ; mm1=Out0L=( 0 1 2 3 4 5 6 7) - por mm5,mm2 ; mm5=Out0H=( 8 9 10 11 12 13 14 15) - - movq MMWORD [edx+0*SIZEOF_MMWORD], mm1 - movq MMWORD [edx+1*SIZEOF_MMWORD], mm5 - - ; -- process the lower row - - movq mm6, MMWORD [edi+0*SIZEOF_MMWORD] ; mm6=Int1L=( 0 1 2 3) - movq mm4, MMWORD [edi+1*SIZEOF_MMWORD] ; mm4=Int1H=( 4 5 6 7) - - movq mm7,mm6 - movq mm3,mm4 - psrlq mm7,2*BYTE_BIT ; mm7=( 1 2 3 -) - psllq mm3,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm3=( - - - 4) - movq mm0,mm6 - movq mm2,mm4 - psrlq mm0,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm0=( 3 - - -) - psllq mm2,2*BYTE_BIT ; mm2=( - 4 5 6) - - por mm7,mm3 ; mm7=( 1 2 3 4) - por mm0,mm2 ; mm0=( 3 4 5 6) - - movq mm1,mm6 - movq mm5,mm4 - psllq mm1,2*BYTE_BIT ; mm1=( - 0 1 2) - psrlq mm5,2*BYTE_BIT ; mm5=( 5 6 7 -) - movq mm3,mm4 - psrlq mm3,(SIZEOF_MMWORD-2)*BYTE_BIT ; mm3=( 7 - - -) - - por mm1, MMWORD [wk(1)] ; mm1=(-1 0 1 2) - por mm5, MMWORD [wk(3)] ; mm5=( 5 6 7 8) - - movq MMWORD [wk(1)], mm3 - - pmullw mm6,[GOTOFF(ebx,PW_THREE)] - pmullw mm4,[GOTOFF(ebx,PW_THREE)] - paddw mm1,[GOTOFF(ebx,PW_EIGHT)] - paddw mm0,[GOTOFF(ebx,PW_EIGHT)] - paddw mm7,[GOTOFF(ebx,PW_SEVEN)] - paddw mm5,[GOTOFF(ebx,PW_SEVEN)] - - paddw mm1,mm6 - paddw mm0,mm4 - psrlw mm1,4 ; mm1=Out1LE=( 0 2 4 6) - psrlw mm0,4 ; mm0=Out1HE=( 8 10 12 14) - paddw mm7,mm6 - paddw mm5,mm4 - psrlw mm7,4 ; mm7=Out1LO=( 1 3 5 7) - psrlw mm5,4 ; mm5=Out1HO=( 9 11 13 15) - - psllw mm7,BYTE_BIT - psllw mm5,BYTE_BIT - por mm1,mm7 ; mm1=Out1L=( 0 1 2 3 4 5 6 7) - por mm0,mm5 ; mm0=Out1H=( 8 9 10 11 12 13 14 15) - - movq MMWORD [edi+0*SIZEOF_MMWORD], mm1 - movq MMWORD [edi+1*SIZEOF_MMWORD], mm0 - - poppic ebx - - sub eax, byte SIZEOF_MMWORD - add ecx, byte 1*SIZEOF_MMWORD ; inptr1(above) - add ebx, byte 1*SIZEOF_MMWORD ; inptr0 - add esi, byte 1*SIZEOF_MMWORD ; inptr1(below) - add edx, byte 2*SIZEOF_MMWORD ; outptr0 - add edi, byte 2*SIZEOF_MMWORD ; outptr1 - cmp eax, byte SIZEOF_MMWORD - ja near .columnloop - test eax,eax - jnz near .columnloop_last - - pop esi - pop edi - pop ecx - pop eax - - add esi, byte 1*SIZEOF_JSAMPROW ; input_data - add edi, byte 2*SIZEOF_JSAMPROW ; output_data - sub ecx, byte 2 ; rowctr - jg near .rowloop - - emms ; empty MMX state - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; -------------------------------------------------------------------------- -; -; Fast processing for the common case of 2:1 horizontal and 1:1 vertical. -; It's still a box filter. -; -; GLOBAL(void) -; jsimd_h2v1_upsample_mmx (int max_v_samp_factor, -; JDIMENSION output_width, -; JSAMPARRAY input_data, -; JSAMPARRAY *output_data_ptr); -; - -%define max_v_samp(b) (b)+8 ; int max_v_samp_factor -%define output_width(b) (b)+12 ; JDIMENSION output_width -%define input_data(b) (b)+16 ; JSAMPARRAY input_data -%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr - - align 16 - global EXTN(jsimd_h2v1_upsample_mmx) - -EXTN(jsimd_h2v1_upsample_mmx): - push ebp - mov ebp,esp -; push ebx ; unused -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - mov edx, JDIMENSION [output_width(ebp)] - add edx, byte (2*SIZEOF_MMWORD)-1 - and edx, byte -(2*SIZEOF_MMWORD) - jz short .return - - mov ecx, INT [max_v_samp(ebp)] ; rowctr - test ecx,ecx - jz short .return - - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - mov edi, POINTER [output_data_ptr(ebp)] - mov edi, JSAMPARRAY [edi] ; output_data - alignx 16,7 -.rowloop: - push edi - push esi - - mov esi, JSAMPROW [esi] ; inptr - mov edi, JSAMPROW [edi] ; outptr - mov eax,edx ; colctr - alignx 16,7 -.columnloop: - - movq mm0, MMWORD [esi+0*SIZEOF_MMWORD] - - movq mm1,mm0 - punpcklbw mm0,mm0 - punpckhbw mm1,mm1 - - movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 - movq MMWORD [edi+1*SIZEOF_MMWORD], mm1 - - sub eax, byte 2*SIZEOF_MMWORD - jz short .nextrow - - movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] - - movq mm3,mm2 - punpcklbw mm2,mm2 - punpckhbw mm3,mm3 - - movq MMWORD [edi+2*SIZEOF_MMWORD], mm2 - movq MMWORD [edi+3*SIZEOF_MMWORD], mm3 - - sub eax, byte 2*SIZEOF_MMWORD - jz short .nextrow - - add esi, byte 2*SIZEOF_MMWORD ; inptr - add edi, byte 4*SIZEOF_MMWORD ; outptr - jmp short .columnloop - alignx 16,7 - -.nextrow: - pop esi - pop edi - - add esi, byte SIZEOF_JSAMPROW ; input_data - add edi, byte SIZEOF_JSAMPROW ; output_data - dec ecx ; rowctr - jg short .rowloop - - emms ; empty MMX state - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved -; pop ebx ; unused - pop ebp - ret - -; -------------------------------------------------------------------------- -; -; Fast processing for the common case of 2:1 horizontal and 2:1 vertical. -; It's still a box filter. -; -; GLOBAL(void) -; jsimd_h2v2_upsample_mmx (int max_v_samp_factor, -; JDIMENSION output_width, -; JSAMPARRAY input_data, -; JSAMPARRAY *output_data_ptr); -; - -%define max_v_samp(b) (b)+8 ; int max_v_samp_factor -%define output_width(b) (b)+12 ; JDIMENSION output_width -%define input_data(b) (b)+16 ; JSAMPARRAY input_data -%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr - - align 16 - global EXTN(jsimd_h2v2_upsample_mmx) - -EXTN(jsimd_h2v2_upsample_mmx): - push ebp - mov ebp,esp - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - mov edx, JDIMENSION [output_width(ebp)] - add edx, byte (2*SIZEOF_MMWORD)-1 - and edx, byte -(2*SIZEOF_MMWORD) - jz near .return - - mov ecx, INT [max_v_samp(ebp)] ; rowctr - test ecx,ecx - jz short .return - - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - mov edi, POINTER [output_data_ptr(ebp)] - mov edi, JSAMPARRAY [edi] ; output_data - alignx 16,7 -.rowloop: - push edi - push esi - - mov esi, JSAMPROW [esi] ; inptr - mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 - mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 - mov eax,edx ; colctr - alignx 16,7 -.columnloop: - - movq mm0, MMWORD [esi+0*SIZEOF_MMWORD] - - movq mm1,mm0 - punpcklbw mm0,mm0 - punpckhbw mm1,mm1 - - movq MMWORD [ebx+0*SIZEOF_MMWORD], mm0 - movq MMWORD [ebx+1*SIZEOF_MMWORD], mm1 - movq MMWORD [edi+0*SIZEOF_MMWORD], mm0 - movq MMWORD [edi+1*SIZEOF_MMWORD], mm1 - - sub eax, byte 2*SIZEOF_MMWORD - jz short .nextrow - - movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] - - movq mm3,mm2 - punpcklbw mm2,mm2 - punpckhbw mm3,mm3 - - movq MMWORD [ebx+2*SIZEOF_MMWORD], mm2 - movq MMWORD [ebx+3*SIZEOF_MMWORD], mm3 - movq MMWORD [edi+2*SIZEOF_MMWORD], mm2 - movq MMWORD [edi+3*SIZEOF_MMWORD], mm3 - - sub eax, byte 2*SIZEOF_MMWORD - jz short .nextrow - - add esi, byte 2*SIZEOF_MMWORD ; inptr - add ebx, byte 4*SIZEOF_MMWORD ; outptr0 - add edi, byte 4*SIZEOF_MMWORD ; outptr1 - jmp short .columnloop - alignx 16,7 - -.nextrow: - pop esi - pop edi - - add esi, byte 1*SIZEOF_JSAMPROW ; input_data - add edi, byte 2*SIZEOF_JSAMPROW ; output_data - sub ecx, byte 2 ; rowctr - jg short .rowloop - - emms ; empty MMX state - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jdsample-sse2-64.asm b/simd/jdsample-sse2-64.asm deleted file mode 100644 index 1faaed6..0000000 --- a/simd/jdsample-sse2-64.asm +++ /dev/null @@ -1,670 +0,0 @@ -; -; jdsample.asm - upsampling (64-bit SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_fancy_upsample_sse2) - -EXTN(jconst_fancy_upsample_sse2): - -PW_ONE times 8 dw 1 -PW_TWO times 8 dw 2 -PW_THREE times 8 dw 3 -PW_SEVEN times 8 dw 7 -PW_EIGHT times 8 dw 8 - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 64 -; -; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. -; -; The upsampling algorithm is linear interpolation between pixel centers, -; also known as a "triangle filter". This is a good compromise between -; speed and visual quality. The centers of the output pixels are 1/4 and 3/4 -; of the way between input pixel centers. -; -; GLOBAL(void) -; jsimd_h2v1_fancy_upsample_sse2 (int max_v_samp_factor, -; JDIMENSION downsampled_width, -; JSAMPARRAY input_data, -; JSAMPARRAY *output_data_ptr); -; - -; r10 = int max_v_samp_factor -; r11 = JDIMENSION downsampled_width -; r12 = JSAMPARRAY input_data -; r13 = JSAMPARRAY *output_data_ptr - - align 16 - global EXTN(jsimd_h2v1_fancy_upsample_sse2) - -EXTN(jsimd_h2v1_fancy_upsample_sse2): - push rbp - mov rax,rsp - mov rbp,rsp - collect_args - - mov eax, r11d ; colctr - test rax,rax - jz near .return - - mov rcx, r10 ; rowctr - test rcx,rcx - jz near .return - - mov rsi, r12 ; input_data - mov rdi, r13 - mov rdi, JSAMPARRAY [rdi] ; output_data -.rowloop: - push rax ; colctr - push rdi - push rsi - - mov rsi, JSAMPROW [rsi] ; inptr - mov rdi, JSAMPROW [rdi] ; outptr - - test rax, SIZEOF_XMMWORD-1 - jz short .skip - mov dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE] - mov JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl ; insert a dummy sample -.skip: - pxor xmm0,xmm0 ; xmm0=(all 0's) - pcmpeqb xmm7,xmm7 - psrldq xmm7,(SIZEOF_XMMWORD-1) - pand xmm7, XMMWORD [rsi+0*SIZEOF_XMMWORD] - - add rax, byte SIZEOF_XMMWORD-1 - and rax, byte -SIZEOF_XMMWORD - cmp rax, byte SIZEOF_XMMWORD - ja short .columnloop - -.columnloop_last: - pcmpeqb xmm6,xmm6 - pslldq xmm6,(SIZEOF_XMMWORD-1) - pand xmm6, XMMWORD [rsi+0*SIZEOF_XMMWORD] - jmp short .upsample - -.columnloop: - movdqa xmm6, XMMWORD [rsi+1*SIZEOF_XMMWORD] - pslldq xmm6,(SIZEOF_XMMWORD-1) - -.upsample: - movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD] - movdqa xmm2,xmm1 - movdqa xmm3,xmm1 ; xmm1=( 0 1 2 ... 13 14 15) - pslldq xmm2,1 ; xmm2=(-- 0 1 ... 12 13 14) - psrldq xmm3,1 ; xmm3=( 1 2 3 ... 14 15 --) - - por xmm2,xmm7 ; xmm2=(-1 0 1 ... 12 13 14) - por xmm3,xmm6 ; xmm3=( 1 2 3 ... 14 15 16) - - movdqa xmm7,xmm1 - psrldq xmm7,(SIZEOF_XMMWORD-1) ; xmm7=(15 -- -- ... -- -- --) - - movdqa xmm4,xmm1 - punpcklbw xmm1,xmm0 ; xmm1=( 0 1 2 3 4 5 6 7) - punpckhbw xmm4,xmm0 ; xmm4=( 8 9 10 11 12 13 14 15) - movdqa xmm5,xmm2 - punpcklbw xmm2,xmm0 ; xmm2=(-1 0 1 2 3 4 5 6) - punpckhbw xmm5,xmm0 ; xmm5=( 7 8 9 10 11 12 13 14) - movdqa xmm6,xmm3 - punpcklbw xmm3,xmm0 ; xmm3=( 1 2 3 4 5 6 7 8) - punpckhbw xmm6,xmm0 ; xmm6=( 9 10 11 12 13 14 15 16) - - pmullw xmm1,[rel PW_THREE] - pmullw xmm4,[rel PW_THREE] - paddw xmm2,[rel PW_ONE] - paddw xmm5,[rel PW_ONE] - paddw xmm3,[rel PW_TWO] - paddw xmm6,[rel PW_TWO] - - paddw xmm2,xmm1 - paddw xmm5,xmm4 - psrlw xmm2,2 ; xmm2=OutLE=( 0 2 4 6 8 10 12 14) - psrlw xmm5,2 ; xmm5=OutHE=(16 18 20 22 24 26 28 30) - paddw xmm3,xmm1 - paddw xmm6,xmm4 - psrlw xmm3,2 ; xmm3=OutLO=( 1 3 5 7 9 11 13 15) - psrlw xmm6,2 ; xmm6=OutHO=(17 19 21 23 25 27 29 31) - - psllw xmm3,BYTE_BIT - psllw xmm6,BYTE_BIT - por xmm2,xmm3 ; xmm2=OutL=( 0 1 2 ... 13 14 15) - por xmm5,xmm6 ; xmm5=OutH=(16 17 18 ... 29 30 31) - - movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm2 - movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm5 - - sub rax, byte SIZEOF_XMMWORD - add rsi, byte 1*SIZEOF_XMMWORD ; inptr - add rdi, byte 2*SIZEOF_XMMWORD ; outptr - cmp rax, byte SIZEOF_XMMWORD - ja near .columnloop - test eax,eax - jnz near .columnloop_last - - pop rsi - pop rdi - pop rax - - add rsi, byte SIZEOF_JSAMPROW ; input_data - add rdi, byte SIZEOF_JSAMPROW ; output_data - dec rcx ; rowctr - jg near .rowloop - -.return: - uncollect_args - pop rbp - ret - -; -------------------------------------------------------------------------- -; -; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. -; Again a triangle filter; see comments for h2v1 case, above. -; -; GLOBAL(void) -; jsimd_h2v2_fancy_upsample_sse2 (int max_v_samp_factor, -; JDIMENSION downsampled_width, -; JSAMPARRAY input_data, -; JSAMPARRAY *output_data_ptr); -; - -; r10 = int max_v_samp_factor -; r11 = JDIMENSION downsampled_width -; r12 = JSAMPARRAY input_data -; r13 = JSAMPARRAY *output_data_ptr - -%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 4 - - align 16 - global EXTN(jsimd_h2v2_fancy_upsample_sse2) - -EXTN(jsimd_h2v2_fancy_upsample_sse2): - push rbp - mov rax,rsp ; rax = original rbp - sub rsp, byte 4 - and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [rsp],rax - mov rbp,rsp ; rbp = aligned rbp - lea rsp, [wk(0)] - collect_args - push rbx - - mov eax, r11d ; colctr - test rax,rax - jz near .return - - mov rcx, r10 ; rowctr - test rcx,rcx - jz near .return - - mov rsi, r12 ; input_data - mov rdi, r13 - mov rdi, JSAMPARRAY [rdi] ; output_data -.rowloop: - push rax ; colctr - push rcx - push rdi - push rsi - - mov rcx, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above) - mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0 - mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below) - mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0 - mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1 - - test rax, SIZEOF_XMMWORD-1 - jz short .skip - push rdx - mov dl, JSAMPLE [rcx+(rax-1)*SIZEOF_JSAMPLE] - mov JSAMPLE [rcx+rax*SIZEOF_JSAMPLE], dl - mov dl, JSAMPLE [rbx+(rax-1)*SIZEOF_JSAMPLE] - mov JSAMPLE [rbx+rax*SIZEOF_JSAMPLE], dl - mov dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE] - mov JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl ; insert a dummy sample - pop rdx -.skip: - ; -- process the first column block - - movdqa xmm0, XMMWORD [rbx+0*SIZEOF_XMMWORD] ; xmm0=row[ 0][0] - movdqa xmm1, XMMWORD [rcx+0*SIZEOF_XMMWORD] ; xmm1=row[-1][0] - movdqa xmm2, XMMWORD [rsi+0*SIZEOF_XMMWORD] ; xmm2=row[+1][0] - - pxor xmm3,xmm3 ; xmm3=(all 0's) - movdqa xmm4,xmm0 - punpcklbw xmm0,xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) - punpckhbw xmm4,xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) - movdqa xmm5,xmm1 - punpcklbw xmm1,xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) - punpckhbw xmm5,xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) - movdqa xmm6,xmm2 - punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) - punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) - - pmullw xmm0,[rel PW_THREE] - pmullw xmm4,[rel PW_THREE] - - pcmpeqb xmm7,xmm7 - psrldq xmm7,(SIZEOF_XMMWORD-2) - - paddw xmm1,xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) - paddw xmm5,xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) - paddw xmm2,xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) - paddw xmm6,xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) - - movdqa XMMWORD [rdx+0*SIZEOF_XMMWORD], xmm1 ; temporarily save - movdqa XMMWORD [rdx+1*SIZEOF_XMMWORD], xmm5 ; the intermediate data - movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm2 - movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm6 - - pand xmm1,xmm7 ; xmm1=( 0 -- -- -- -- -- -- --) - pand xmm2,xmm7 ; xmm2=( 0 -- -- -- -- -- -- --) - - movdqa XMMWORD [wk(0)], xmm1 - movdqa XMMWORD [wk(1)], xmm2 - - add rax, byte SIZEOF_XMMWORD-1 - and rax, byte -SIZEOF_XMMWORD - cmp rax, byte SIZEOF_XMMWORD - ja short .columnloop - -.columnloop_last: - ; -- process the last column block - - pcmpeqb xmm1,xmm1 - pslldq xmm1,(SIZEOF_XMMWORD-2) - movdqa xmm2,xmm1 - - pand xmm1, XMMWORD [rdx+1*SIZEOF_XMMWORD] - pand xmm2, XMMWORD [rdi+1*SIZEOF_XMMWORD] - - movdqa XMMWORD [wk(2)], xmm1 ; xmm1=(-- -- -- -- -- -- -- 15) - movdqa XMMWORD [wk(3)], xmm2 ; xmm2=(-- -- -- -- -- -- -- 15) - - jmp near .upsample - -.columnloop: - ; -- process the next column block - - movdqa xmm0, XMMWORD [rbx+1*SIZEOF_XMMWORD] ; xmm0=row[ 0][1] - movdqa xmm1, XMMWORD [rcx+1*SIZEOF_XMMWORD] ; xmm1=row[-1][1] - movdqa xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD] ; xmm2=row[+1][1] - - pxor xmm3,xmm3 ; xmm3=(all 0's) - movdqa xmm4,xmm0 - punpcklbw xmm0,xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) - punpckhbw xmm4,xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) - movdqa xmm5,xmm1 - punpcklbw xmm1,xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) - punpckhbw xmm5,xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) - movdqa xmm6,xmm2 - punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) - punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) - - pmullw xmm0,[rel PW_THREE] - pmullw xmm4,[rel PW_THREE] - - paddw xmm1,xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) - paddw xmm5,xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) - paddw xmm2,xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) - paddw xmm6,xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) - - movdqa XMMWORD [rdx+2*SIZEOF_XMMWORD], xmm1 ; temporarily save - movdqa XMMWORD [rdx+3*SIZEOF_XMMWORD], xmm5 ; the intermediate data - movdqa XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2 - movdqa XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm6 - - pslldq xmm1,(SIZEOF_XMMWORD-2) ; xmm1=(-- -- -- -- -- -- -- 0) - pslldq xmm2,(SIZEOF_XMMWORD-2) ; xmm2=(-- -- -- -- -- -- -- 0) - - movdqa XMMWORD [wk(2)], xmm1 - movdqa XMMWORD [wk(3)], xmm2 - -.upsample: - ; -- process the upper row - - movdqa xmm7, XMMWORD [rdx+0*SIZEOF_XMMWORD] - movdqa xmm3, XMMWORD [rdx+1*SIZEOF_XMMWORD] - - movdqa xmm0,xmm7 ; xmm7=Int0L=( 0 1 2 3 4 5 6 7) - movdqa xmm4,xmm3 ; xmm3=Int0H=( 8 9 10 11 12 13 14 15) - psrldq xmm0,2 ; xmm0=( 1 2 3 4 5 6 7 --) - pslldq xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(-- -- -- -- -- -- -- 8) - movdqa xmm5,xmm7 - movdqa xmm6,xmm3 - psrldq xmm5,(SIZEOF_XMMWORD-2) ; xmm5=( 7 -- -- -- -- -- -- --) - pslldq xmm6,2 ; xmm6=(-- 8 9 10 11 12 13 14) - - por xmm0,xmm4 ; xmm0=( 1 2 3 4 5 6 7 8) - por xmm5,xmm6 ; xmm5=( 7 8 9 10 11 12 13 14) - - movdqa xmm1,xmm7 - movdqa xmm2,xmm3 - pslldq xmm1,2 ; xmm1=(-- 0 1 2 3 4 5 6) - psrldq xmm2,2 ; xmm2=( 9 10 11 12 13 14 15 --) - movdqa xmm4,xmm3 - psrldq xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(15 -- -- -- -- -- -- --) - - por xmm1, XMMWORD [wk(0)] ; xmm1=(-1 0 1 2 3 4 5 6) - por xmm2, XMMWORD [wk(2)] ; xmm2=( 9 10 11 12 13 14 15 16) - - movdqa XMMWORD [wk(0)], xmm4 - - pmullw xmm7,[rel PW_THREE] - pmullw xmm3,[rel PW_THREE] - paddw xmm1,[rel PW_EIGHT] - paddw xmm5,[rel PW_EIGHT] - paddw xmm0,[rel PW_SEVEN] - paddw xmm2,[rel PW_SEVEN] - - paddw xmm1,xmm7 - paddw xmm5,xmm3 - psrlw xmm1,4 ; xmm1=Out0LE=( 0 2 4 6 8 10 12 14) - psrlw xmm5,4 ; xmm5=Out0HE=(16 18 20 22 24 26 28 30) - paddw xmm0,xmm7 - paddw xmm2,xmm3 - psrlw xmm0,4 ; xmm0=Out0LO=( 1 3 5 7 9 11 13 15) - psrlw xmm2,4 ; xmm2=Out0HO=(17 19 21 23 25 27 29 31) - - psllw xmm0,BYTE_BIT - psllw xmm2,BYTE_BIT - por xmm1,xmm0 ; xmm1=Out0L=( 0 1 2 ... 13 14 15) - por xmm5,xmm2 ; xmm5=Out0H=(16 17 18 ... 29 30 31) - - movdqa XMMWORD [rdx+0*SIZEOF_XMMWORD], xmm1 - movdqa XMMWORD [rdx+1*SIZEOF_XMMWORD], xmm5 - - ; -- process the lower row - - movdqa xmm6, XMMWORD [rdi+0*SIZEOF_XMMWORD] - movdqa xmm4, XMMWORD [rdi+1*SIZEOF_XMMWORD] - - movdqa xmm7,xmm6 ; xmm6=Int1L=( 0 1 2 3 4 5 6 7) - movdqa xmm3,xmm4 ; xmm4=Int1H=( 8 9 10 11 12 13 14 15) - psrldq xmm7,2 ; xmm7=( 1 2 3 4 5 6 7 --) - pslldq xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(-- -- -- -- -- -- -- 8) - movdqa xmm0,xmm6 - movdqa xmm2,xmm4 - psrldq xmm0,(SIZEOF_XMMWORD-2) ; xmm0=( 7 -- -- -- -- -- -- --) - pslldq xmm2,2 ; xmm2=(-- 8 9 10 11 12 13 14) - - por xmm7,xmm3 ; xmm7=( 1 2 3 4 5 6 7 8) - por xmm0,xmm2 ; xmm0=( 7 8 9 10 11 12 13 14) - - movdqa xmm1,xmm6 - movdqa xmm5,xmm4 - pslldq xmm1,2 ; xmm1=(-- 0 1 2 3 4 5 6) - psrldq xmm5,2 ; xmm5=( 9 10 11 12 13 14 15 --) - movdqa xmm3,xmm4 - psrldq xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(15 -- -- -- -- -- -- --) - - por xmm1, XMMWORD [wk(1)] ; xmm1=(-1 0 1 2 3 4 5 6) - por xmm5, XMMWORD [wk(3)] ; xmm5=( 9 10 11 12 13 14 15 16) - - movdqa XMMWORD [wk(1)], xmm3 - - pmullw xmm6,[rel PW_THREE] - pmullw xmm4,[rel PW_THREE] - paddw xmm1,[rel PW_EIGHT] - paddw xmm0,[rel PW_EIGHT] - paddw xmm7,[rel PW_SEVEN] - paddw xmm5,[rel PW_SEVEN] - - paddw xmm1,xmm6 - paddw xmm0,xmm4 - psrlw xmm1,4 ; xmm1=Out1LE=( 0 2 4 6 8 10 12 14) - psrlw xmm0,4 ; xmm0=Out1HE=(16 18 20 22 24 26 28 30) - paddw xmm7,xmm6 - paddw xmm5,xmm4 - psrlw xmm7,4 ; xmm7=Out1LO=( 1 3 5 7 9 11 13 15) - psrlw xmm5,4 ; xmm5=Out1HO=(17 19 21 23 25 27 29 31) - - psllw xmm7,BYTE_BIT - psllw xmm5,BYTE_BIT - por xmm1,xmm7 ; xmm1=Out1L=( 0 1 2 ... 13 14 15) - por xmm0,xmm5 ; xmm0=Out1H=(16 17 18 ... 29 30 31) - - movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm1 - movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm0 - - sub rax, byte SIZEOF_XMMWORD - add rcx, byte 1*SIZEOF_XMMWORD ; inptr1(above) - add rbx, byte 1*SIZEOF_XMMWORD ; inptr0 - add rsi, byte 1*SIZEOF_XMMWORD ; inptr1(below) - add rdx, byte 2*SIZEOF_XMMWORD ; outptr0 - add rdi, byte 2*SIZEOF_XMMWORD ; outptr1 - cmp rax, byte SIZEOF_XMMWORD - ja near .columnloop - test rax,rax - jnz near .columnloop_last - - pop rsi - pop rdi - pop rcx - pop rax - - add rsi, byte 1*SIZEOF_JSAMPROW ; input_data - add rdi, byte 2*SIZEOF_JSAMPROW ; output_data - sub rcx, byte 2 ; rowctr - jg near .rowloop - -.return: - pop rbx - uncollect_args - mov rsp,rbp ; rsp <- aligned rbp - pop rsp ; rsp <- original rbp - pop rbp - ret - -; -------------------------------------------------------------------------- -; -; Fast processing for the common case of 2:1 horizontal and 1:1 vertical. -; It's still a box filter. -; -; GLOBAL(void) -; jsimd_h2v1_upsample_sse2 (int max_v_samp_factor, -; JDIMENSION output_width, -; JSAMPARRAY input_data, -; JSAMPARRAY *output_data_ptr); -; - -; r10 = int max_v_samp_factor -; r11 = JDIMENSION output_width -; r12 = JSAMPARRAY input_data -; r13 = JSAMPARRAY *output_data_ptr - - align 16 - global EXTN(jsimd_h2v1_upsample_sse2) - -EXTN(jsimd_h2v1_upsample_sse2): - push rbp - mov rax,rsp - mov rbp,rsp - collect_args - - mov edx, r11d - add rdx, byte (2*SIZEOF_XMMWORD)-1 - and rdx, byte -(2*SIZEOF_XMMWORD) - jz near .return - - mov rcx, r10 ; rowctr - test rcx,rcx - jz short .return - - mov rsi, r12 ; input_data - mov rdi, r13 - mov rdi, JSAMPARRAY [rdi] ; output_data -.rowloop: - push rdi - push rsi - - mov rsi, JSAMPROW [rsi] ; inptr - mov rdi, JSAMPROW [rdi] ; outptr - mov rax,rdx ; colctr -.columnloop: - - movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] - - movdqa xmm1,xmm0 - punpcklbw xmm0,xmm0 - punpckhbw xmm1,xmm1 - - movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 - movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1 - - sub rax, byte 2*SIZEOF_XMMWORD - jz short .nextrow - - movdqa xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD] - - movdqa xmm3,xmm2 - punpcklbw xmm2,xmm2 - punpckhbw xmm3,xmm3 - - movdqa XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2 - movdqa XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm3 - - sub rax, byte 2*SIZEOF_XMMWORD - jz short .nextrow - - add rsi, byte 2*SIZEOF_XMMWORD ; inptr - add rdi, byte 4*SIZEOF_XMMWORD ; outptr - jmp short .columnloop - -.nextrow: - pop rsi - pop rdi - - add rsi, byte SIZEOF_JSAMPROW ; input_data - add rdi, byte SIZEOF_JSAMPROW ; output_data - dec rcx ; rowctr - jg short .rowloop - -.return: - uncollect_args - pop rbp - ret - -; -------------------------------------------------------------------------- -; -; Fast processing for the common case of 2:1 horizontal and 2:1 vertical. -; It's still a box filter. -; -; GLOBAL(void) -; jsimd_h2v2_upsample_sse2 (nt max_v_samp_factor, -; JDIMENSION output_width, -; JSAMPARRAY input_data, -; JSAMPARRAY *output_data_ptr); -; - -; r10 = int max_v_samp_factor -; r11 = JDIMENSION output_width -; r12 = JSAMPARRAY input_data -; r13 = JSAMPARRAY *output_data_ptr - - align 16 - global EXTN(jsimd_h2v2_upsample_sse2) - -EXTN(jsimd_h2v2_upsample_sse2): - push rbp - mov rax,rsp - mov rbp,rsp - collect_args - push rbx - - mov edx, r11d - add rdx, byte (2*SIZEOF_XMMWORD)-1 - and rdx, byte -(2*SIZEOF_XMMWORD) - jz near .return - - mov rcx, r10 ; rowctr - test rcx,rcx - jz near .return - - mov rsi, r12 ; input_data - mov rdi, r13 - mov rdi, JSAMPARRAY [rdi] ; output_data -.rowloop: - push rdi - push rsi - - mov rsi, JSAMPROW [rsi] ; inptr - mov rbx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0 - mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1 - mov rax,rdx ; colctr -.columnloop: - - movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] - - movdqa xmm1,xmm0 - punpcklbw xmm0,xmm0 - punpckhbw xmm1,xmm1 - - movdqa XMMWORD [rbx+0*SIZEOF_XMMWORD], xmm0 - movdqa XMMWORD [rbx+1*SIZEOF_XMMWORD], xmm1 - movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 - movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1 - - sub rax, byte 2*SIZEOF_XMMWORD - jz short .nextrow - - movdqa xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD] - - movdqa xmm3,xmm2 - punpcklbw xmm2,xmm2 - punpckhbw xmm3,xmm3 - - movdqa XMMWORD [rbx+2*SIZEOF_XMMWORD], xmm2 - movdqa XMMWORD [rbx+3*SIZEOF_XMMWORD], xmm3 - movdqa XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2 - movdqa XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm3 - - sub rax, byte 2*SIZEOF_XMMWORD - jz short .nextrow - - add rsi, byte 2*SIZEOF_XMMWORD ; inptr - add rbx, byte 4*SIZEOF_XMMWORD ; outptr0 - add rdi, byte 4*SIZEOF_XMMWORD ; outptr1 - jmp short .columnloop - -.nextrow: - pop rsi - pop rdi - - add rsi, byte 1*SIZEOF_JSAMPROW ; input_data - add rdi, byte 2*SIZEOF_JSAMPROW ; output_data - sub rcx, byte 2 ; rowctr - jg near .rowloop - -.return: - pop rbx - uncollect_args - pop rbp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jdsample-sse2.asm b/simd/jdsample-sse2.asm deleted file mode 100644 index 1d0059e..0000000 --- a/simd/jdsample-sse2.asm +++ /dev/null @@ -1,728 +0,0 @@ -; -; jdsample.asm - upsampling (SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_fancy_upsample_sse2) - -EXTN(jconst_fancy_upsample_sse2): - -PW_ONE times 8 dw 1 -PW_TWO times 8 dw 2 -PW_THREE times 8 dw 3 -PW_SEVEN times 8 dw 7 -PW_EIGHT times 8 dw 8 - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. -; -; The upsampling algorithm is linear interpolation between pixel centers, -; also known as a "triangle filter". This is a good compromise between -; speed and visual quality. The centers of the output pixels are 1/4 and 3/4 -; of the way between input pixel centers. -; -; GLOBAL(void) -; jsimd_h2v1_fancy_upsample_sse2 (int max_v_samp_factor, -; JDIMENSION downsampled_width, -; JSAMPARRAY input_data, -; JSAMPARRAY *output_data_ptr); -; - -%define max_v_samp(b) (b)+8 ; int max_v_samp_factor -%define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width -%define input_data(b) (b)+16 ; JSAMPARRAY input_data -%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr - - align 16 - global EXTN(jsimd_h2v1_fancy_upsample_sse2) - -EXTN(jsimd_h2v1_fancy_upsample_sse2): - push ebp - mov ebp,esp - pushpic ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - - mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr - test eax,eax - jz near .return - - mov ecx, INT [max_v_samp(ebp)] ; rowctr - test ecx,ecx - jz near .return - - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - mov edi, POINTER [output_data_ptr(ebp)] - mov edi, JSAMPARRAY [edi] ; output_data - alignx 16,7 -.rowloop: - push eax ; colctr - push edi - push esi - - mov esi, JSAMPROW [esi] ; inptr - mov edi, JSAMPROW [edi] ; outptr - - test eax, SIZEOF_XMMWORD-1 - jz short .skip - mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] - mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample -.skip: - pxor xmm0,xmm0 ; xmm0=(all 0's) - pcmpeqb xmm7,xmm7 - psrldq xmm7,(SIZEOF_XMMWORD-1) - pand xmm7, XMMWORD [esi+0*SIZEOF_XMMWORD] - - add eax, byte SIZEOF_XMMWORD-1 - and eax, byte -SIZEOF_XMMWORD - cmp eax, byte SIZEOF_XMMWORD - ja short .columnloop - alignx 16,7 - -.columnloop_last: - pcmpeqb xmm6,xmm6 - pslldq xmm6,(SIZEOF_XMMWORD-1) - pand xmm6, XMMWORD [esi+0*SIZEOF_XMMWORD] - jmp short .upsample - alignx 16,7 - -.columnloop: - movdqa xmm6, XMMWORD [esi+1*SIZEOF_XMMWORD] - pslldq xmm6,(SIZEOF_XMMWORD-1) - -.upsample: - movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD] - movdqa xmm2,xmm1 - movdqa xmm3,xmm1 ; xmm1=( 0 1 2 ... 13 14 15) - pslldq xmm2,1 ; xmm2=(-- 0 1 ... 12 13 14) - psrldq xmm3,1 ; xmm3=( 1 2 3 ... 14 15 --) - - por xmm2,xmm7 ; xmm2=(-1 0 1 ... 12 13 14) - por xmm3,xmm6 ; xmm3=( 1 2 3 ... 14 15 16) - - movdqa xmm7,xmm1 - psrldq xmm7,(SIZEOF_XMMWORD-1) ; xmm7=(15 -- -- ... -- -- --) - - movdqa xmm4,xmm1 - punpcklbw xmm1,xmm0 ; xmm1=( 0 1 2 3 4 5 6 7) - punpckhbw xmm4,xmm0 ; xmm4=( 8 9 10 11 12 13 14 15) - movdqa xmm5,xmm2 - punpcklbw xmm2,xmm0 ; xmm2=(-1 0 1 2 3 4 5 6) - punpckhbw xmm5,xmm0 ; xmm5=( 7 8 9 10 11 12 13 14) - movdqa xmm6,xmm3 - punpcklbw xmm3,xmm0 ; xmm3=( 1 2 3 4 5 6 7 8) - punpckhbw xmm6,xmm0 ; xmm6=( 9 10 11 12 13 14 15 16) - - pmullw xmm1,[GOTOFF(ebx,PW_THREE)] - pmullw xmm4,[GOTOFF(ebx,PW_THREE)] - paddw xmm2,[GOTOFF(ebx,PW_ONE)] - paddw xmm5,[GOTOFF(ebx,PW_ONE)] - paddw xmm3,[GOTOFF(ebx,PW_TWO)] - paddw xmm6,[GOTOFF(ebx,PW_TWO)] - - paddw xmm2,xmm1 - paddw xmm5,xmm4 - psrlw xmm2,2 ; xmm2=OutLE=( 0 2 4 6 8 10 12 14) - psrlw xmm5,2 ; xmm5=OutHE=(16 18 20 22 24 26 28 30) - paddw xmm3,xmm1 - paddw xmm6,xmm4 - psrlw xmm3,2 ; xmm3=OutLO=( 1 3 5 7 9 11 13 15) - psrlw xmm6,2 ; xmm6=OutHO=(17 19 21 23 25 27 29 31) - - psllw xmm3,BYTE_BIT - psllw xmm6,BYTE_BIT - por xmm2,xmm3 ; xmm2=OutL=( 0 1 2 ... 13 14 15) - por xmm5,xmm6 ; xmm5=OutH=(16 17 18 ... 29 30 31) - - movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2 - movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm5 - - sub eax, byte SIZEOF_XMMWORD - add esi, byte 1*SIZEOF_XMMWORD ; inptr - add edi, byte 2*SIZEOF_XMMWORD ; outptr - cmp eax, byte SIZEOF_XMMWORD - ja near .columnloop - test eax,eax - jnz near .columnloop_last - - pop esi - pop edi - pop eax - - add esi, byte SIZEOF_JSAMPROW ; input_data - add edi, byte SIZEOF_JSAMPROW ; output_data - dec ecx ; rowctr - jg near .rowloop - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - poppic ebx - pop ebp - ret - -; -------------------------------------------------------------------------- -; -; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. -; Again a triangle filter; see comments for h2v1 case, above. -; -; GLOBAL(void) -; jsimd_h2v2_fancy_upsample_sse2 (int max_v_samp_factor, -; JDIMENSION downsampled_width, -; JSAMPARRAY input_data, -; JSAMPARRAY *output_data_ptr); -; - -%define max_v_samp(b) (b)+8 ; int max_v_samp_factor -%define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width -%define input_data(b) (b)+16 ; JSAMPARRAY input_data -%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 4 -%define gotptr wk(0)-SIZEOF_POINTER ; void *gotptr - - align 16 - global EXTN(jsimd_h2v2_fancy_upsample_sse2) - -EXTN(jsimd_h2v2_fancy_upsample_sse2): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic eax ; make a room for GOT address - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - movpic POINTER [gotptr], ebx ; save GOT address - - mov edx,eax ; edx = original ebp - mov eax, JDIMENSION [downsamp_width(edx)] ; colctr - test eax,eax - jz near .return - - mov ecx, INT [max_v_samp(edx)] ; rowctr - test ecx,ecx - jz near .return - - mov esi, JSAMPARRAY [input_data(edx)] ; input_data - mov edi, POINTER [output_data_ptr(edx)] - mov edi, JSAMPARRAY [edi] ; output_data - alignx 16,7 -.rowloop: - push eax ; colctr - push ecx - push edi - push esi - - mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above) - mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0 - mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1(below) - mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 - mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 - - test eax, SIZEOF_XMMWORD-1 - jz short .skip - push edx - mov dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE] - mov JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl - mov dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE] - mov JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl - mov dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE] - mov JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl ; insert a dummy sample - pop edx -.skip: - ; -- process the first column block - - movdqa xmm0, XMMWORD [ebx+0*SIZEOF_XMMWORD] ; xmm0=row[ 0][0] - movdqa xmm1, XMMWORD [ecx+0*SIZEOF_XMMWORD] ; xmm1=row[-1][0] - movdqa xmm2, XMMWORD [esi+0*SIZEOF_XMMWORD] ; xmm2=row[+1][0] - - pushpic ebx - movpic ebx, POINTER [gotptr] ; load GOT address - - pxor xmm3,xmm3 ; xmm3=(all 0's) - movdqa xmm4,xmm0 - punpcklbw xmm0,xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) - punpckhbw xmm4,xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) - movdqa xmm5,xmm1 - punpcklbw xmm1,xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) - punpckhbw xmm5,xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) - movdqa xmm6,xmm2 - punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) - punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) - - pmullw xmm0,[GOTOFF(ebx,PW_THREE)] - pmullw xmm4,[GOTOFF(ebx,PW_THREE)] - - pcmpeqb xmm7,xmm7 - psrldq xmm7,(SIZEOF_XMMWORD-2) - - paddw xmm1,xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) - paddw xmm5,xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) - paddw xmm2,xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) - paddw xmm6,xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) - - movdqa XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1 ; temporarily save - movdqa XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5 ; the intermediate data - movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2 - movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm6 - - pand xmm1,xmm7 ; xmm1=( 0 -- -- -- -- -- -- --) - pand xmm2,xmm7 ; xmm2=( 0 -- -- -- -- -- -- --) - - movdqa XMMWORD [wk(0)], xmm1 - movdqa XMMWORD [wk(1)], xmm2 - - poppic ebx - - add eax, byte SIZEOF_XMMWORD-1 - and eax, byte -SIZEOF_XMMWORD - cmp eax, byte SIZEOF_XMMWORD - ja short .columnloop - alignx 16,7 - -.columnloop_last: - ; -- process the last column block - - pushpic ebx - movpic ebx, POINTER [gotptr] ; load GOT address - - pcmpeqb xmm1,xmm1 - pslldq xmm1,(SIZEOF_XMMWORD-2) - movdqa xmm2,xmm1 - - pand xmm1, XMMWORD [edx+1*SIZEOF_XMMWORD] - pand xmm2, XMMWORD [edi+1*SIZEOF_XMMWORD] - - movdqa XMMWORD [wk(2)], xmm1 ; xmm1=(-- -- -- -- -- -- -- 15) - movdqa XMMWORD [wk(3)], xmm2 ; xmm2=(-- -- -- -- -- -- -- 15) - - jmp near .upsample - alignx 16,7 - -.columnloop: - ; -- process the next column block - - movdqa xmm0, XMMWORD [ebx+1*SIZEOF_XMMWORD] ; xmm0=row[ 0][1] - movdqa xmm1, XMMWORD [ecx+1*SIZEOF_XMMWORD] ; xmm1=row[-1][1] - movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] ; xmm2=row[+1][1] - - pushpic ebx - movpic ebx, POINTER [gotptr] ; load GOT address - - pxor xmm3,xmm3 ; xmm3=(all 0's) - movdqa xmm4,xmm0 - punpcklbw xmm0,xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) - punpckhbw xmm4,xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) - movdqa xmm5,xmm1 - punpcklbw xmm1,xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) - punpckhbw xmm5,xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) - movdqa xmm6,xmm2 - punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) - punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) - - pmullw xmm0,[GOTOFF(ebx,PW_THREE)] - pmullw xmm4,[GOTOFF(ebx,PW_THREE)] - - paddw xmm1,xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) - paddw xmm5,xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) - paddw xmm2,xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) - paddw xmm6,xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) - - movdqa XMMWORD [edx+2*SIZEOF_XMMWORD], xmm1 ; temporarily save - movdqa XMMWORD [edx+3*SIZEOF_XMMWORD], xmm5 ; the intermediate data - movdqa XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2 - movdqa XMMWORD [edi+3*SIZEOF_XMMWORD], xmm6 - - pslldq xmm1,(SIZEOF_XMMWORD-2) ; xmm1=(-- -- -- -- -- -- -- 0) - pslldq xmm2,(SIZEOF_XMMWORD-2) ; xmm2=(-- -- -- -- -- -- -- 0) - - movdqa XMMWORD [wk(2)], xmm1 - movdqa XMMWORD [wk(3)], xmm2 - -.upsample: - ; -- process the upper row - - movdqa xmm7, XMMWORD [edx+0*SIZEOF_XMMWORD] - movdqa xmm3, XMMWORD [edx+1*SIZEOF_XMMWORD] - - movdqa xmm0,xmm7 ; xmm7=Int0L=( 0 1 2 3 4 5 6 7) - movdqa xmm4,xmm3 ; xmm3=Int0H=( 8 9 10 11 12 13 14 15) - psrldq xmm0,2 ; xmm0=( 1 2 3 4 5 6 7 --) - pslldq xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(-- -- -- -- -- -- -- 8) - movdqa xmm5,xmm7 - movdqa xmm6,xmm3 - psrldq xmm5,(SIZEOF_XMMWORD-2) ; xmm5=( 7 -- -- -- -- -- -- --) - pslldq xmm6,2 ; xmm6=(-- 8 9 10 11 12 13 14) - - por xmm0,xmm4 ; xmm0=( 1 2 3 4 5 6 7 8) - por xmm5,xmm6 ; xmm5=( 7 8 9 10 11 12 13 14) - - movdqa xmm1,xmm7 - movdqa xmm2,xmm3 - pslldq xmm1,2 ; xmm1=(-- 0 1 2 3 4 5 6) - psrldq xmm2,2 ; xmm2=( 9 10 11 12 13 14 15 --) - movdqa xmm4,xmm3 - psrldq xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(15 -- -- -- -- -- -- --) - - por xmm1, XMMWORD [wk(0)] ; xmm1=(-1 0 1 2 3 4 5 6) - por xmm2, XMMWORD [wk(2)] ; xmm2=( 9 10 11 12 13 14 15 16) - - movdqa XMMWORD [wk(0)], xmm4 - - pmullw xmm7,[GOTOFF(ebx,PW_THREE)] - pmullw xmm3,[GOTOFF(ebx,PW_THREE)] - paddw xmm1,[GOTOFF(ebx,PW_EIGHT)] - paddw xmm5,[GOTOFF(ebx,PW_EIGHT)] - paddw xmm0,[GOTOFF(ebx,PW_SEVEN)] - paddw xmm2,[GOTOFF(ebx,PW_SEVEN)] - - paddw xmm1,xmm7 - paddw xmm5,xmm3 - psrlw xmm1,4 ; xmm1=Out0LE=( 0 2 4 6 8 10 12 14) - psrlw xmm5,4 ; xmm5=Out0HE=(16 18 20 22 24 26 28 30) - paddw xmm0,xmm7 - paddw xmm2,xmm3 - psrlw xmm0,4 ; xmm0=Out0LO=( 1 3 5 7 9 11 13 15) - psrlw xmm2,4 ; xmm2=Out0HO=(17 19 21 23 25 27 29 31) - - psllw xmm0,BYTE_BIT - psllw xmm2,BYTE_BIT - por xmm1,xmm0 ; xmm1=Out0L=( 0 1 2 ... 13 14 15) - por xmm5,xmm2 ; xmm5=Out0H=(16 17 18 ... 29 30 31) - - movdqa XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1 - movdqa XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5 - - ; -- process the lower row - - movdqa xmm6, XMMWORD [edi+0*SIZEOF_XMMWORD] - movdqa xmm4, XMMWORD [edi+1*SIZEOF_XMMWORD] - - movdqa xmm7,xmm6 ; xmm6=Int1L=( 0 1 2 3 4 5 6 7) - movdqa xmm3,xmm4 ; xmm4=Int1H=( 8 9 10 11 12 13 14 15) - psrldq xmm7,2 ; xmm7=( 1 2 3 4 5 6 7 --) - pslldq xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(-- -- -- -- -- -- -- 8) - movdqa xmm0,xmm6 - movdqa xmm2,xmm4 - psrldq xmm0,(SIZEOF_XMMWORD-2) ; xmm0=( 7 -- -- -- -- -- -- --) - pslldq xmm2,2 ; xmm2=(-- 8 9 10 11 12 13 14) - - por xmm7,xmm3 ; xmm7=( 1 2 3 4 5 6 7 8) - por xmm0,xmm2 ; xmm0=( 7 8 9 10 11 12 13 14) - - movdqa xmm1,xmm6 - movdqa xmm5,xmm4 - pslldq xmm1,2 ; xmm1=(-- 0 1 2 3 4 5 6) - psrldq xmm5,2 ; xmm5=( 9 10 11 12 13 14 15 --) - movdqa xmm3,xmm4 - psrldq xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(15 -- -- -- -- -- -- --) - - por xmm1, XMMWORD [wk(1)] ; xmm1=(-1 0 1 2 3 4 5 6) - por xmm5, XMMWORD [wk(3)] ; xmm5=( 9 10 11 12 13 14 15 16) - - movdqa XMMWORD [wk(1)], xmm3 - - pmullw xmm6,[GOTOFF(ebx,PW_THREE)] - pmullw xmm4,[GOTOFF(ebx,PW_THREE)] - paddw xmm1,[GOTOFF(ebx,PW_EIGHT)] - paddw xmm0,[GOTOFF(ebx,PW_EIGHT)] - paddw xmm7,[GOTOFF(ebx,PW_SEVEN)] - paddw xmm5,[GOTOFF(ebx,PW_SEVEN)] - - paddw xmm1,xmm6 - paddw xmm0,xmm4 - psrlw xmm1,4 ; xmm1=Out1LE=( 0 2 4 6 8 10 12 14) - psrlw xmm0,4 ; xmm0=Out1HE=(16 18 20 22 24 26 28 30) - paddw xmm7,xmm6 - paddw xmm5,xmm4 - psrlw xmm7,4 ; xmm7=Out1LO=( 1 3 5 7 9 11 13 15) - psrlw xmm5,4 ; xmm5=Out1HO=(17 19 21 23 25 27 29 31) - - psllw xmm7,BYTE_BIT - psllw xmm5,BYTE_BIT - por xmm1,xmm7 ; xmm1=Out1L=( 0 1 2 ... 13 14 15) - por xmm0,xmm5 ; xmm0=Out1H=(16 17 18 ... 29 30 31) - - movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm1 - movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm0 - - poppic ebx - - sub eax, byte SIZEOF_XMMWORD - add ecx, byte 1*SIZEOF_XMMWORD ; inptr1(above) - add ebx, byte 1*SIZEOF_XMMWORD ; inptr0 - add esi, byte 1*SIZEOF_XMMWORD ; inptr1(below) - add edx, byte 2*SIZEOF_XMMWORD ; outptr0 - add edi, byte 2*SIZEOF_XMMWORD ; outptr1 - cmp eax, byte SIZEOF_XMMWORD - ja near .columnloop - test eax,eax - jnz near .columnloop_last - - pop esi - pop edi - pop ecx - pop eax - - add esi, byte 1*SIZEOF_JSAMPROW ; input_data - add edi, byte 2*SIZEOF_JSAMPROW ; output_data - sub ecx, byte 2 ; rowctr - jg near .rowloop - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; -------------------------------------------------------------------------- -; -; Fast processing for the common case of 2:1 horizontal and 1:1 vertical. -; It's still a box filter. -; -; GLOBAL(void) -; jsimd_h2v1_upsample_sse2 (int max_v_samp_factor, -; JDIMENSION output_width, -; JSAMPARRAY input_data, -; JSAMPARRAY *output_data_ptr); -; - -%define max_v_samp(b) (b)+8 ; int max_v_samp_factor -%define output_width(b) (b)+12 ; JDIMENSION output_width -%define input_data(b) (b)+16 ; JSAMPARRAY input_data -%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr - - align 16 - global EXTN(jsimd_h2v1_upsample_sse2) - -EXTN(jsimd_h2v1_upsample_sse2): - push ebp - mov ebp,esp -; push ebx ; unused -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - mov edx, JDIMENSION [output_width(ebp)] - add edx, byte (2*SIZEOF_XMMWORD)-1 - and edx, byte -(2*SIZEOF_XMMWORD) - jz short .return - - mov ecx, INT [max_v_samp(ebp)] ; rowctr - test ecx,ecx - jz short .return - - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - mov edi, POINTER [output_data_ptr(ebp)] - mov edi, JSAMPARRAY [edi] ; output_data - alignx 16,7 -.rowloop: - push edi - push esi - - mov esi, JSAMPROW [esi] ; inptr - mov edi, JSAMPROW [edi] ; outptr - mov eax,edx ; colctr - alignx 16,7 -.columnloop: - - movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] - - movdqa xmm1,xmm0 - punpcklbw xmm0,xmm0 - punpckhbw xmm1,xmm1 - - movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 - movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1 - - sub eax, byte 2*SIZEOF_XMMWORD - jz short .nextrow - - movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] - - movdqa xmm3,xmm2 - punpcklbw xmm2,xmm2 - punpckhbw xmm3,xmm3 - - movdqa XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2 - movdqa XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3 - - sub eax, byte 2*SIZEOF_XMMWORD - jz short .nextrow - - add esi, byte 2*SIZEOF_XMMWORD ; inptr - add edi, byte 4*SIZEOF_XMMWORD ; outptr - jmp short .columnloop - alignx 16,7 - -.nextrow: - pop esi - pop edi - - add esi, byte SIZEOF_JSAMPROW ; input_data - add edi, byte SIZEOF_JSAMPROW ; output_data - dec ecx ; rowctr - jg short .rowloop - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved -; pop ebx ; unused - pop ebp - ret - -; -------------------------------------------------------------------------- -; -; Fast processing for the common case of 2:1 horizontal and 2:1 vertical. -; It's still a box filter. -; -; GLOBAL(void) -; jsimd_h2v2_upsample_sse2 (nt max_v_samp_factor, -; JDIMENSION output_width, -; JSAMPARRAY input_data, -; JSAMPARRAY *output_data_ptr); -; - -%define max_v_samp(b) (b)+8 ; int max_v_samp_factor -%define output_width(b) (b)+12 ; JDIMENSION output_width -%define input_data(b) (b)+16 ; JSAMPARRAY input_data -%define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr - - align 16 - global EXTN(jsimd_h2v2_upsample_sse2) - -EXTN(jsimd_h2v2_upsample_sse2): - push ebp - mov ebp,esp - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - mov edx, JDIMENSION [output_width(ebp)] - add edx, byte (2*SIZEOF_XMMWORD)-1 - and edx, byte -(2*SIZEOF_XMMWORD) - jz near .return - - mov ecx, INT [max_v_samp(ebp)] ; rowctr - test ecx,ecx - jz near .return - - mov esi, JSAMPARRAY [input_data(ebp)] ; input_data - mov edi, POINTER [output_data_ptr(ebp)] - mov edi, JSAMPARRAY [edi] ; output_data - alignx 16,7 -.rowloop: - push edi - push esi - - mov esi, JSAMPROW [esi] ; inptr - mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0 - mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1 - mov eax,edx ; colctr - alignx 16,7 -.columnloop: - - movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD] - - movdqa xmm1,xmm0 - punpcklbw xmm0,xmm0 - punpckhbw xmm1,xmm1 - - movdqa XMMWORD [ebx+0*SIZEOF_XMMWORD], xmm0 - movdqa XMMWORD [ebx+1*SIZEOF_XMMWORD], xmm1 - movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0 - movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1 - - sub eax, byte 2*SIZEOF_XMMWORD - jz short .nextrow - - movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] - - movdqa xmm3,xmm2 - punpcklbw xmm2,xmm2 - punpckhbw xmm3,xmm3 - - movdqa XMMWORD [ebx+2*SIZEOF_XMMWORD], xmm2 - movdqa XMMWORD [ebx+3*SIZEOF_XMMWORD], xmm3 - movdqa XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2 - movdqa XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3 - - sub eax, byte 2*SIZEOF_XMMWORD - jz short .nextrow - - add esi, byte 2*SIZEOF_XMMWORD ; inptr - add ebx, byte 4*SIZEOF_XMMWORD ; outptr0 - add edi, byte 4*SIZEOF_XMMWORD ; outptr1 - jmp short .columnloop - alignx 16,7 - -.nextrow: - pop esi - pop edi - - add esi, byte 1*SIZEOF_JSAMPROW ; input_data - add edi, byte 2*SIZEOF_JSAMPROW ; output_data - sub ecx, byte 2 ; rowctr - jg short .rowloop - -.return: - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jfdctflt-3dn.asm b/simd/jfdctflt-3dn.asm deleted file mode 100644 index 2191618..0000000 --- a/simd/jfdctflt-3dn.asm +++ /dev/null @@ -1,319 +0,0 @@ -; -; jfdctflt.asm - floating-point FDCT (3DNow!) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a floating-point implementation of the forward DCT -; (Discrete Cosine Transform). The following code is based directly on -; the IJG's original jfdctflt.c; see the jfdctflt.c for more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_fdct_float_3dnow) - -EXTN(jconst_fdct_float_3dnow): - -PD_0_382 times 2 dd 0.382683432365089771728460 -PD_0_707 times 2 dd 0.707106781186547524400844 -PD_0_541 times 2 dd 0.541196100146196984399723 -PD_1_306 times 2 dd 1.306562964876376527856643 - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Perform the forward DCT on one block of samples. -; -; GLOBAL(void) -; jsimd_fdct_float_3dnow (FAST_FLOAT *data) -; - -%define data(b) (b)+8 ; FAST_FLOAT *data - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] -%define WK_NUM 2 - - align 16 - global EXTN(jsimd_fdct_float_3dnow) - -EXTN(jsimd_fdct_float_3dnow): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved -; push esi ; unused -; push edi ; unused - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process rows. - - mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) - mov ecx, DCTSIZE/2 - alignx 16,7 -.rowloop: - - movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] - movq mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] - movq mm2, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)] - movq mm3, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)] - - ; mm0=(00 01), mm1=(10 11), mm2=(06 07), mm3=(16 17) - - movq mm4,mm0 ; transpose coefficients - punpckldq mm0,mm1 ; mm0=(00 10)=data0 - punpckhdq mm4,mm1 ; mm4=(01 11)=data1 - movq mm5,mm2 ; transpose coefficients - punpckldq mm2,mm3 ; mm2=(06 16)=data6 - punpckhdq mm5,mm3 ; mm5=(07 17)=data7 - - movq mm6,mm4 - movq mm7,mm0 - pfsub mm4,mm2 ; mm4=data1-data6=tmp6 - pfsub mm0,mm5 ; mm0=data0-data7=tmp7 - pfadd mm6,mm2 ; mm6=data1+data6=tmp1 - pfadd mm7,mm5 ; mm7=data0+data7=tmp0 - - movq mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] - movq mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] - movq mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)] - movq mm5, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)] - - ; mm1=(02 03), mm3=(12 13), mm2=(04 05), mm5=(14 15) - - movq MMWORD [wk(0)], mm4 ; wk(0)=tmp6 - movq MMWORD [wk(1)], mm0 ; wk(1)=tmp7 - - movq mm4,mm1 ; transpose coefficients - punpckldq mm1,mm3 ; mm1=(02 12)=data2 - punpckhdq mm4,mm3 ; mm4=(03 13)=data3 - movq mm0,mm2 ; transpose coefficients - punpckldq mm2,mm5 ; mm2=(04 14)=data4 - punpckhdq mm0,mm5 ; mm0=(05 15)=data5 - - movq mm3,mm4 - movq mm5,mm1 - pfadd mm4,mm2 ; mm4=data3+data4=tmp3 - pfadd mm1,mm0 ; mm1=data2+data5=tmp2 - pfsub mm3,mm2 ; mm3=data3-data4=tmp4 - pfsub mm5,mm0 ; mm5=data2-data5=tmp5 - - ; -- Even part - - movq mm2,mm7 - movq mm0,mm6 - pfsub mm7,mm4 ; mm7=tmp13 - pfsub mm6,mm1 ; mm6=tmp12 - pfadd mm2,mm4 ; mm2=tmp10 - pfadd mm0,mm1 ; mm0=tmp11 - - pfadd mm6,mm7 - pfmul mm6,[GOTOFF(ebx,PD_0_707)] ; mm6=z1 - - movq mm4,mm2 - movq mm1,mm7 - pfsub mm2,mm0 ; mm2=data4 - pfsub mm7,mm6 ; mm7=data6 - pfadd mm4,mm0 ; mm4=data0 - pfadd mm1,mm6 ; mm1=data2 - - movq MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)], mm2 - movq MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)], mm7 - movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4 - movq MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], mm1 - - ; -- Odd part - - movq mm0, MMWORD [wk(0)] ; mm0=tmp6 - movq mm6, MMWORD [wk(1)] ; mm6=tmp7 - - pfadd mm3,mm5 ; mm3=tmp10 - pfadd mm5,mm0 ; mm5=tmp11 - pfadd mm0,mm6 ; mm0=tmp12, mm6=tmp7 - - pfmul mm5,[GOTOFF(ebx,PD_0_707)] ; mm5=z3 - - movq mm2,mm3 ; mm2=tmp10 - pfsub mm3,mm0 - pfmul mm3,[GOTOFF(ebx,PD_0_382)] ; mm3=z5 - pfmul mm2,[GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610) - pfmul mm0,[GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296) - pfadd mm2,mm3 ; mm2=z2 - pfadd mm0,mm3 ; mm0=z4 - - movq mm7,mm6 - pfsub mm6,mm5 ; mm6=z13 - pfadd mm7,mm5 ; mm7=z11 - - movq mm4,mm6 - movq mm1,mm7 - pfsub mm6,mm2 ; mm6=data3 - pfsub mm7,mm0 ; mm7=data7 - pfadd mm4,mm2 ; mm4=data5 - pfadd mm1,mm0 ; mm1=data1 - - movq MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], mm6 - movq MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)], mm7 - movq MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)], mm4 - movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1 - - add edx, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT - dec ecx - jnz near .rowloop - - ; ---- Pass 2: process columns. - - mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) - mov ecx, DCTSIZE/2 - alignx 16,7 -.columnloop: - - movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] - movq mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] - movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)] - movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)] - - ; mm0=(00 10), mm1=(01 11), mm2=(60 70), mm3=(61 71) - - movq mm4,mm0 ; transpose coefficients - punpckldq mm0,mm1 ; mm0=(00 01)=data0 - punpckhdq mm4,mm1 ; mm4=(10 11)=data1 - movq mm5,mm2 ; transpose coefficients - punpckldq mm2,mm3 ; mm2=(60 61)=data6 - punpckhdq mm5,mm3 ; mm5=(70 71)=data7 - - movq mm6,mm4 - movq mm7,mm0 - pfsub mm4,mm2 ; mm4=data1-data6=tmp6 - pfsub mm0,mm5 ; mm0=data0-data7=tmp7 - pfadd mm6,mm2 ; mm6=data1+data6=tmp1 - pfadd mm7,mm5 ; mm7=data0+data7=tmp0 - - movq mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)] - movq mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)] - movq mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)] - movq mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)] - - ; mm1=(20 30), mm3=(21 31), mm2=(40 50), mm5=(41 51) - - movq MMWORD [wk(0)], mm4 ; wk(0)=tmp6 - movq MMWORD [wk(1)], mm0 ; wk(1)=tmp7 - - movq mm4,mm1 ; transpose coefficients - punpckldq mm1,mm3 ; mm1=(20 21)=data2 - punpckhdq mm4,mm3 ; mm4=(30 31)=data3 - movq mm0,mm2 ; transpose coefficients - punpckldq mm2,mm5 ; mm2=(40 41)=data4 - punpckhdq mm0,mm5 ; mm0=(50 51)=data5 - - movq mm3,mm4 - movq mm5,mm1 - pfadd mm4,mm2 ; mm4=data3+data4=tmp3 - pfadd mm1,mm0 ; mm1=data2+data5=tmp2 - pfsub mm3,mm2 ; mm3=data3-data4=tmp4 - pfsub mm5,mm0 ; mm5=data2-data5=tmp5 - - ; -- Even part - - movq mm2,mm7 - movq mm0,mm6 - pfsub mm7,mm4 ; mm7=tmp13 - pfsub mm6,mm1 ; mm6=tmp12 - pfadd mm2,mm4 ; mm2=tmp10 - pfadd mm0,mm1 ; mm0=tmp11 - - pfadd mm6,mm7 - pfmul mm6,[GOTOFF(ebx,PD_0_707)] ; mm6=z1 - - movq mm4,mm2 - movq mm1,mm7 - pfsub mm2,mm0 ; mm2=data4 - pfsub mm7,mm6 ; mm7=data6 - pfadd mm4,mm0 ; mm4=data0 - pfadd mm1,mm6 ; mm1=data2 - - movq MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], mm2 - movq MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], mm7 - movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4 - movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], mm1 - - ; -- Odd part - - movq mm0, MMWORD [wk(0)] ; mm0=tmp6 - movq mm6, MMWORD [wk(1)] ; mm6=tmp7 - - pfadd mm3,mm5 ; mm3=tmp10 - pfadd mm5,mm0 ; mm5=tmp11 - pfadd mm0,mm6 ; mm0=tmp12, mm6=tmp7 - - pfmul mm5,[GOTOFF(ebx,PD_0_707)] ; mm5=z3 - - movq mm2,mm3 ; mm2=tmp10 - pfsub mm3,mm0 - pfmul mm3,[GOTOFF(ebx,PD_0_382)] ; mm3=z5 - pfmul mm2,[GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610) - pfmul mm0,[GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296) - pfadd mm2,mm3 ; mm2=z2 - pfadd mm0,mm3 ; mm0=z4 - - movq mm7,mm6 - pfsub mm6,mm5 ; mm6=z13 - pfadd mm7,mm5 ; mm7=z11 - - movq mm4,mm6 - movq mm1,mm7 - pfsub mm6,mm2 ; mm6=data3 - pfsub mm7,mm0 ; mm7=data7 - pfadd mm4,mm2 ; mm4=data5 - pfadd mm1,mm0 ; mm1=data1 - - movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], mm6 - movq MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], mm7 - movq MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], mm4 - movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1 - - add edx, byte 2*SIZEOF_FAST_FLOAT - dec ecx - jnz near .columnloop - - femms ; empty MMX/3DNow! state - -; pop edi ; unused -; pop esi ; unused -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - poppic ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jfdctflt-sse-64.asm b/simd/jfdctflt-sse-64.asm deleted file mode 100644 index 4b64ea4..0000000 --- a/simd/jfdctflt-sse-64.asm +++ /dev/null @@ -1,357 +0,0 @@ -; -; jfdctflt.asm - floating-point FDCT (64-bit SSE) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a floating-point implementation of the forward DCT -; (Discrete Cosine Transform). The following code is based directly on -; the IJG's original jfdctflt.c; see the jfdctflt.c for more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) - shufps %1,%2,0x44 -%endmacro - -%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) - shufps %1,%2,0xEE -%endmacro - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_fdct_float_sse) - -EXTN(jconst_fdct_float_sse): - -PD_0_382 times 4 dd 0.382683432365089771728460 -PD_0_707 times 4 dd 0.707106781186547524400844 -PD_0_541 times 4 dd 0.541196100146196984399723 -PD_1_306 times 4 dd 1.306562964876376527856643 - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 64 -; -; Perform the forward DCT on one block of samples. -; -; GLOBAL(void) -; jsimd_fdct_float_sse (FAST_FLOAT *data) -; - -; r10 = FAST_FLOAT *data - -%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 2 - - align 16 - global EXTN(jsimd_fdct_float_sse) - -EXTN(jsimd_fdct_float_sse): - push rbp - mov rax,rsp ; rax = original rbp - sub rsp, byte 4 - and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [rsp],rax - mov rbp,rsp ; rbp = aligned rbp - lea rsp, [wk(0)] - collect_args - - ; ---- Pass 1: process rows. - - mov rdx, r10 ; (FAST_FLOAT *) - mov rcx, DCTSIZE/4 -.rowloop: - - movaps xmm0, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)] - movaps xmm2, XMMWORD [XMMBLOCK(2,1,rdx,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(3,1,rdx,SIZEOF_FAST_FLOAT)] - - ; xmm0=(20 21 22 23), xmm2=(24 25 26 27) - ; xmm1=(30 31 32 33), xmm3=(34 35 36 37) - - movaps xmm4,xmm0 ; transpose coefficients(phase 1) - unpcklps xmm0,xmm1 ; xmm0=(20 30 21 31) - unpckhps xmm4,xmm1 ; xmm4=(22 32 23 33) - movaps xmm5,xmm2 ; transpose coefficients(phase 1) - unpcklps xmm2,xmm3 ; xmm2=(24 34 25 35) - unpckhps xmm5,xmm3 ; xmm5=(26 36 27 37) - - movaps xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)] - movaps xmm7, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)] - - ; xmm6=(00 01 02 03), xmm1=(04 05 06 07) - ; xmm7=(10 11 12 13), xmm3=(14 15 16 17) - - movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 32 23 33) - movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(24 34 25 35) - - movaps xmm4,xmm6 ; transpose coefficients(phase 1) - unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) - unpckhps xmm4,xmm7 ; xmm4=(02 12 03 13) - movaps xmm2,xmm1 ; transpose coefficients(phase 1) - unpcklps xmm1,xmm3 ; xmm1=(04 14 05 15) - unpckhps xmm2,xmm3 ; xmm2=(06 16 07 17) - - movaps xmm7,xmm6 ; transpose coefficients(phase 2) - unpcklps2 xmm6,xmm0 ; xmm6=(00 10 20 30)=data0 - unpckhps2 xmm7,xmm0 ; xmm7=(01 11 21 31)=data1 - movaps xmm3,xmm2 ; transpose coefficients(phase 2) - unpcklps2 xmm2,xmm5 ; xmm2=(06 16 26 36)=data6 - unpckhps2 xmm3,xmm5 ; xmm3=(07 17 27 37)=data7 - - movaps xmm0,xmm7 - movaps xmm5,xmm6 - subps xmm7,xmm2 ; xmm7=data1-data6=tmp6 - subps xmm6,xmm3 ; xmm6=data0-data7=tmp7 - addps xmm0,xmm2 ; xmm0=data1+data6=tmp1 - addps xmm5,xmm3 ; xmm5=data0+data7=tmp0 - - movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 32 23 33) - movaps xmm3, XMMWORD [wk(1)] ; xmm3=(24 34 25 35) - movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 - movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 - - movaps xmm7,xmm4 ; transpose coefficients(phase 2) - unpcklps2 xmm4,xmm2 ; xmm4=(02 12 22 32)=data2 - unpckhps2 xmm7,xmm2 ; xmm7=(03 13 23 33)=data3 - movaps xmm6,xmm1 ; transpose coefficients(phase 2) - unpcklps2 xmm1,xmm3 ; xmm1=(04 14 24 34)=data4 - unpckhps2 xmm6,xmm3 ; xmm6=(05 15 25 35)=data5 - - movaps xmm2,xmm7 - movaps xmm3,xmm4 - addps xmm7,xmm1 ; xmm7=data3+data4=tmp3 - addps xmm4,xmm6 ; xmm4=data2+data5=tmp2 - subps xmm2,xmm1 ; xmm2=data3-data4=tmp4 - subps xmm3,xmm6 ; xmm3=data2-data5=tmp5 - - ; -- Even part - - movaps xmm1,xmm5 - movaps xmm6,xmm0 - subps xmm5,xmm7 ; xmm5=tmp13 - subps xmm0,xmm4 ; xmm0=tmp12 - addps xmm1,xmm7 ; xmm1=tmp10 - addps xmm6,xmm4 ; xmm6=tmp11 - - addps xmm0,xmm5 - mulps xmm0,[rel PD_0_707] ; xmm0=z1 - - movaps xmm7,xmm1 - movaps xmm4,xmm5 - subps xmm1,xmm6 ; xmm1=data4 - subps xmm5,xmm0 ; xmm5=data6 - addps xmm7,xmm6 ; xmm7=data0 - addps xmm4,xmm0 ; xmm4=data2 - - movaps XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)], xmm1 - movaps XMMWORD [XMMBLOCK(2,1,rdx,SIZEOF_FAST_FLOAT)], xmm5 - movaps XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)], xmm7 - movaps XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)], xmm4 - - ; -- Odd part - - movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 - movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 - - addps xmm2,xmm3 ; xmm2=tmp10 - addps xmm3,xmm6 ; xmm3=tmp11 - addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7 - - mulps xmm3,[rel PD_0_707] ; xmm3=z3 - - movaps xmm1,xmm2 ; xmm1=tmp10 - subps xmm2,xmm6 - mulps xmm2,[rel PD_0_382] ; xmm2=z5 - mulps xmm1,[rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) - mulps xmm6,[rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) - addps xmm1,xmm2 ; xmm1=z2 - addps xmm6,xmm2 ; xmm6=z4 - - movaps xmm5,xmm0 - subps xmm0,xmm3 ; xmm0=z13 - addps xmm5,xmm3 ; xmm5=z11 - - movaps xmm7,xmm0 - movaps xmm4,xmm5 - subps xmm0,xmm1 ; xmm0=data3 - subps xmm5,xmm6 ; xmm5=data7 - addps xmm7,xmm1 ; xmm7=data5 - addps xmm4,xmm6 ; xmm4=data1 - - movaps XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)], xmm0 - movaps XMMWORD [XMMBLOCK(3,1,rdx,SIZEOF_FAST_FLOAT)], xmm5 - movaps XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)], xmm7 - movaps XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)], xmm4 - - add rdx, 4*DCTSIZE*SIZEOF_FAST_FLOAT - dec rcx - jnz near .rowloop - - ; ---- Pass 2: process columns. - - mov rdx, r10 ; (FAST_FLOAT *) - mov rcx, DCTSIZE/4 -.columnloop: - - movaps xmm0, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)] - movaps xmm2, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FAST_FLOAT)] - - ; xmm0=(02 12 22 32), xmm2=(42 52 62 72) - ; xmm1=(03 13 23 33), xmm3=(43 53 63 73) - - movaps xmm4,xmm0 ; transpose coefficients(phase 1) - unpcklps xmm0,xmm1 ; xmm0=(02 03 12 13) - unpckhps xmm4,xmm1 ; xmm4=(22 23 32 33) - movaps xmm5,xmm2 ; transpose coefficients(phase 1) - unpcklps xmm2,xmm3 ; xmm2=(42 43 52 53) - unpckhps xmm5,xmm3 ; xmm5=(62 63 72 73) - - movaps xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)] - movaps xmm7, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FAST_FLOAT)] - - ; xmm6=(00 10 20 30), xmm1=(40 50 60 70) - ; xmm7=(01 11 21 31), xmm3=(41 51 61 71) - - movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 23 32 33) - movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(42 43 52 53) - - movaps xmm4,xmm6 ; transpose coefficients(phase 1) - unpcklps xmm6,xmm7 ; xmm6=(00 01 10 11) - unpckhps xmm4,xmm7 ; xmm4=(20 21 30 31) - movaps xmm2,xmm1 ; transpose coefficients(phase 1) - unpcklps xmm1,xmm3 ; xmm1=(40 41 50 51) - unpckhps xmm2,xmm3 ; xmm2=(60 61 70 71) - - movaps xmm7,xmm6 ; transpose coefficients(phase 2) - unpcklps2 xmm6,xmm0 ; xmm6=(00 01 02 03)=data0 - unpckhps2 xmm7,xmm0 ; xmm7=(10 11 12 13)=data1 - movaps xmm3,xmm2 ; transpose coefficients(phase 2) - unpcklps2 xmm2,xmm5 ; xmm2=(60 61 62 63)=data6 - unpckhps2 xmm3,xmm5 ; xmm3=(70 71 72 73)=data7 - - movaps xmm0,xmm7 - movaps xmm5,xmm6 - subps xmm7,xmm2 ; xmm7=data1-data6=tmp6 - subps xmm6,xmm3 ; xmm6=data0-data7=tmp7 - addps xmm0,xmm2 ; xmm0=data1+data6=tmp1 - addps xmm5,xmm3 ; xmm5=data0+data7=tmp0 - - movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 23 32 33) - movaps xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53) - movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 - movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 - - movaps xmm7,xmm4 ; transpose coefficients(phase 2) - unpcklps2 xmm4,xmm2 ; xmm4=(20 21 22 23)=data2 - unpckhps2 xmm7,xmm2 ; xmm7=(30 31 32 33)=data3 - movaps xmm6,xmm1 ; transpose coefficients(phase 2) - unpcklps2 xmm1,xmm3 ; xmm1=(40 41 42 43)=data4 - unpckhps2 xmm6,xmm3 ; xmm6=(50 51 52 53)=data5 - - movaps xmm2,xmm7 - movaps xmm3,xmm4 - addps xmm7,xmm1 ; xmm7=data3+data4=tmp3 - addps xmm4,xmm6 ; xmm4=data2+data5=tmp2 - subps xmm2,xmm1 ; xmm2=data3-data4=tmp4 - subps xmm3,xmm6 ; xmm3=data2-data5=tmp5 - - ; -- Even part - - movaps xmm1,xmm5 - movaps xmm6,xmm0 - subps xmm5,xmm7 ; xmm5=tmp13 - subps xmm0,xmm4 ; xmm0=tmp12 - addps xmm1,xmm7 ; xmm1=tmp10 - addps xmm6,xmm4 ; xmm6=tmp11 - - addps xmm0,xmm5 - mulps xmm0,[rel PD_0_707] ; xmm0=z1 - - movaps xmm7,xmm1 - movaps xmm4,xmm5 - subps xmm1,xmm6 ; xmm1=data4 - subps xmm5,xmm0 ; xmm5=data6 - addps xmm7,xmm6 ; xmm7=data0 - addps xmm4,xmm0 ; xmm4=data2 - - movaps XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FAST_FLOAT)], xmm1 - movaps XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FAST_FLOAT)], xmm5 - movaps XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)], xmm7 - movaps XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)], xmm4 - - ; -- Odd part - - movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 - movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 - - addps xmm2,xmm3 ; xmm2=tmp10 - addps xmm3,xmm6 ; xmm3=tmp11 - addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7 - - mulps xmm3,[rel PD_0_707] ; xmm3=z3 - - movaps xmm1,xmm2 ; xmm1=tmp10 - subps xmm2,xmm6 - mulps xmm2,[rel PD_0_382] ; xmm2=z5 - mulps xmm1,[rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) - mulps xmm6,[rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) - addps xmm1,xmm2 ; xmm1=z2 - addps xmm6,xmm2 ; xmm6=z4 - - movaps xmm5,xmm0 - subps xmm0,xmm3 ; xmm0=z13 - addps xmm5,xmm3 ; xmm5=z11 - - movaps xmm7,xmm0 - movaps xmm4,xmm5 - subps xmm0,xmm1 ; xmm0=data3 - subps xmm5,xmm6 ; xmm5=data7 - addps xmm7,xmm1 ; xmm7=data5 - addps xmm4,xmm6 ; xmm4=data1 - - movaps XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)], xmm0 - movaps XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FAST_FLOAT)], xmm5 - movaps XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FAST_FLOAT)], xmm7 - movaps XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)], xmm4 - - add rdx, byte 4*SIZEOF_FAST_FLOAT - dec rcx - jnz near .columnloop - - uncollect_args - mov rsp,rbp ; rsp <- aligned rbp - pop rsp ; rsp <- original rbp - pop rbp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jfdctflt-sse.asm b/simd/jfdctflt-sse.asm deleted file mode 100644 index e7ede26..0000000 --- a/simd/jfdctflt-sse.asm +++ /dev/null @@ -1,369 +0,0 @@ -; -; jfdctflt.asm - floating-point FDCT (SSE) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a floating-point implementation of the forward DCT -; (Discrete Cosine Transform). The following code is based directly on -; the IJG's original jfdctflt.c; see the jfdctflt.c for more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) - shufps %1,%2,0x44 -%endmacro - -%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) - shufps %1,%2,0xEE -%endmacro - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_fdct_float_sse) - -EXTN(jconst_fdct_float_sse): - -PD_0_382 times 4 dd 0.382683432365089771728460 -PD_0_707 times 4 dd 0.707106781186547524400844 -PD_0_541 times 4 dd 0.541196100146196984399723 -PD_1_306 times 4 dd 1.306562964876376527856643 - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Perform the forward DCT on one block of samples. -; -; GLOBAL(void) -; jsimd_fdct_float_sse (FAST_FLOAT *data) -; - -%define data(b) (b)+8 ; FAST_FLOAT *data - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 2 - - align 16 - global EXTN(jsimd_fdct_float_sse) - -EXTN(jsimd_fdct_float_sse): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved -; push esi ; unused -; push edi ; unused - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process rows. - - mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) - mov ecx, DCTSIZE/4 - alignx 16,7 -.rowloop: - - movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)] - movaps xmm2, XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)] - - ; xmm0=(20 21 22 23), xmm2=(24 25 26 27) - ; xmm1=(30 31 32 33), xmm3=(34 35 36 37) - - movaps xmm4,xmm0 ; transpose coefficients(phase 1) - unpcklps xmm0,xmm1 ; xmm0=(20 30 21 31) - unpckhps xmm4,xmm1 ; xmm4=(22 32 23 33) - movaps xmm5,xmm2 ; transpose coefficients(phase 1) - unpcklps xmm2,xmm3 ; xmm2=(24 34 25 35) - unpckhps xmm5,xmm3 ; xmm5=(26 36 27 37) - - movaps xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] - movaps xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] - - ; xmm6=(00 01 02 03), xmm1=(04 05 06 07) - ; xmm7=(10 11 12 13), xmm3=(14 15 16 17) - - movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 32 23 33) - movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(24 34 25 35) - - movaps xmm4,xmm6 ; transpose coefficients(phase 1) - unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) - unpckhps xmm4,xmm7 ; xmm4=(02 12 03 13) - movaps xmm2,xmm1 ; transpose coefficients(phase 1) - unpcklps xmm1,xmm3 ; xmm1=(04 14 05 15) - unpckhps xmm2,xmm3 ; xmm2=(06 16 07 17) - - movaps xmm7,xmm6 ; transpose coefficients(phase 2) - unpcklps2 xmm6,xmm0 ; xmm6=(00 10 20 30)=data0 - unpckhps2 xmm7,xmm0 ; xmm7=(01 11 21 31)=data1 - movaps xmm3,xmm2 ; transpose coefficients(phase 2) - unpcklps2 xmm2,xmm5 ; xmm2=(06 16 26 36)=data6 - unpckhps2 xmm3,xmm5 ; xmm3=(07 17 27 37)=data7 - - movaps xmm0,xmm7 - movaps xmm5,xmm6 - subps xmm7,xmm2 ; xmm7=data1-data6=tmp6 - subps xmm6,xmm3 ; xmm6=data0-data7=tmp7 - addps xmm0,xmm2 ; xmm0=data1+data6=tmp1 - addps xmm5,xmm3 ; xmm5=data0+data7=tmp0 - - movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 32 23 33) - movaps xmm3, XMMWORD [wk(1)] ; xmm3=(24 34 25 35) - movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 - movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 - - movaps xmm7,xmm4 ; transpose coefficients(phase 2) - unpcklps2 xmm4,xmm2 ; xmm4=(02 12 22 32)=data2 - unpckhps2 xmm7,xmm2 ; xmm7=(03 13 23 33)=data3 - movaps xmm6,xmm1 ; transpose coefficients(phase 2) - unpcklps2 xmm1,xmm3 ; xmm1=(04 14 24 34)=data4 - unpckhps2 xmm6,xmm3 ; xmm6=(05 15 25 35)=data5 - - movaps xmm2,xmm7 - movaps xmm3,xmm4 - addps xmm7,xmm1 ; xmm7=data3+data4=tmp3 - addps xmm4,xmm6 ; xmm4=data2+data5=tmp2 - subps xmm2,xmm1 ; xmm2=data3-data4=tmp4 - subps xmm3,xmm6 ; xmm3=data2-data5=tmp5 - - ; -- Even part - - movaps xmm1,xmm5 - movaps xmm6,xmm0 - subps xmm5,xmm7 ; xmm5=tmp13 - subps xmm0,xmm4 ; xmm0=tmp12 - addps xmm1,xmm7 ; xmm1=tmp10 - addps xmm6,xmm4 ; xmm6=tmp11 - - addps xmm0,xmm5 - mulps xmm0,[GOTOFF(ebx,PD_0_707)] ; xmm0=z1 - - movaps xmm7,xmm1 - movaps xmm4,xmm5 - subps xmm1,xmm6 ; xmm1=data4 - subps xmm5,xmm0 ; xmm5=data6 - addps xmm7,xmm6 ; xmm7=data0 - addps xmm4,xmm0 ; xmm4=data2 - - movaps XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], xmm1 - movaps XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)], xmm5 - movaps XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7 - movaps XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4 - - ; -- Odd part - - movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 - movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 - - addps xmm2,xmm3 ; xmm2=tmp10 - addps xmm3,xmm6 ; xmm3=tmp11 - addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7 - - mulps xmm3,[GOTOFF(ebx,PD_0_707)] ; xmm3=z3 - - movaps xmm1,xmm2 ; xmm1=tmp10 - subps xmm2,xmm6 - mulps xmm2,[GOTOFF(ebx,PD_0_382)] ; xmm2=z5 - mulps xmm1,[GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) - mulps xmm6,[GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) - addps xmm1,xmm2 ; xmm1=z2 - addps xmm6,xmm2 ; xmm6=z4 - - movaps xmm5,xmm0 - subps xmm0,xmm3 ; xmm0=z13 - addps xmm5,xmm3 ; xmm5=z11 - - movaps xmm7,xmm0 - movaps xmm4,xmm5 - subps xmm0,xmm1 ; xmm0=data3 - subps xmm5,xmm6 ; xmm5=data7 - addps xmm7,xmm1 ; xmm7=data5 - addps xmm4,xmm6 ; xmm4=data1 - - movaps XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0 - movaps XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)], xmm5 - movaps XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], xmm7 - movaps XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4 - - add edx, 4*DCTSIZE*SIZEOF_FAST_FLOAT - dec ecx - jnz near .rowloop - - ; ---- Pass 2: process columns. - - mov edx, POINTER [data(eax)] ; (FAST_FLOAT *) - mov ecx, DCTSIZE/4 - alignx 16,7 -.columnloop: - - movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)] - movaps xmm2, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)] - - ; xmm0=(02 12 22 32), xmm2=(42 52 62 72) - ; xmm1=(03 13 23 33), xmm3=(43 53 63 73) - - movaps xmm4,xmm0 ; transpose coefficients(phase 1) - unpcklps xmm0,xmm1 ; xmm0=(02 03 12 13) - unpckhps xmm4,xmm1 ; xmm4=(22 23 32 33) - movaps xmm5,xmm2 ; transpose coefficients(phase 1) - unpcklps xmm2,xmm3 ; xmm2=(42 43 52 53) - unpckhps xmm5,xmm3 ; xmm5=(62 63 72 73) - - movaps xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] - movaps xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)] - - ; xmm6=(00 10 20 30), xmm1=(40 50 60 70) - ; xmm7=(01 11 21 31), xmm3=(41 51 61 71) - - movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 23 32 33) - movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(42 43 52 53) - - movaps xmm4,xmm6 ; transpose coefficients(phase 1) - unpcklps xmm6,xmm7 ; xmm6=(00 01 10 11) - unpckhps xmm4,xmm7 ; xmm4=(20 21 30 31) - movaps xmm2,xmm1 ; transpose coefficients(phase 1) - unpcklps xmm1,xmm3 ; xmm1=(40 41 50 51) - unpckhps xmm2,xmm3 ; xmm2=(60 61 70 71) - - movaps xmm7,xmm6 ; transpose coefficients(phase 2) - unpcklps2 xmm6,xmm0 ; xmm6=(00 01 02 03)=data0 - unpckhps2 xmm7,xmm0 ; xmm7=(10 11 12 13)=data1 - movaps xmm3,xmm2 ; transpose coefficients(phase 2) - unpcklps2 xmm2,xmm5 ; xmm2=(60 61 62 63)=data6 - unpckhps2 xmm3,xmm5 ; xmm3=(70 71 72 73)=data7 - - movaps xmm0,xmm7 - movaps xmm5,xmm6 - subps xmm7,xmm2 ; xmm7=data1-data6=tmp6 - subps xmm6,xmm3 ; xmm6=data0-data7=tmp7 - addps xmm0,xmm2 ; xmm0=data1+data6=tmp1 - addps xmm5,xmm3 ; xmm5=data0+data7=tmp0 - - movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 23 32 33) - movaps xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53) - movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 - movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 - - movaps xmm7,xmm4 ; transpose coefficients(phase 2) - unpcklps2 xmm4,xmm2 ; xmm4=(20 21 22 23)=data2 - unpckhps2 xmm7,xmm2 ; xmm7=(30 31 32 33)=data3 - movaps xmm6,xmm1 ; transpose coefficients(phase 2) - unpcklps2 xmm1,xmm3 ; xmm1=(40 41 42 43)=data4 - unpckhps2 xmm6,xmm3 ; xmm6=(50 51 52 53)=data5 - - movaps xmm2,xmm7 - movaps xmm3,xmm4 - addps xmm7,xmm1 ; xmm7=data3+data4=tmp3 - addps xmm4,xmm6 ; xmm4=data2+data5=tmp2 - subps xmm2,xmm1 ; xmm2=data3-data4=tmp4 - subps xmm3,xmm6 ; xmm3=data2-data5=tmp5 - - ; -- Even part - - movaps xmm1,xmm5 - movaps xmm6,xmm0 - subps xmm5,xmm7 ; xmm5=tmp13 - subps xmm0,xmm4 ; xmm0=tmp12 - addps xmm1,xmm7 ; xmm1=tmp10 - addps xmm6,xmm4 ; xmm6=tmp11 - - addps xmm0,xmm5 - mulps xmm0,[GOTOFF(ebx,PD_0_707)] ; xmm0=z1 - - movaps xmm7,xmm1 - movaps xmm4,xmm5 - subps xmm1,xmm6 ; xmm1=data4 - subps xmm5,xmm0 ; xmm5=data6 - addps xmm7,xmm6 ; xmm7=data0 - addps xmm4,xmm0 ; xmm4=data2 - - movaps XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], xmm1 - movaps XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], xmm5 - movaps XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7 - movaps XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4 - - ; -- Odd part - - movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 - movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 - - addps xmm2,xmm3 ; xmm2=tmp10 - addps xmm3,xmm6 ; xmm3=tmp11 - addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7 - - mulps xmm3,[GOTOFF(ebx,PD_0_707)] ; xmm3=z3 - - movaps xmm1,xmm2 ; xmm1=tmp10 - subps xmm2,xmm6 - mulps xmm2,[GOTOFF(ebx,PD_0_382)] ; xmm2=z5 - mulps xmm1,[GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) - mulps xmm6,[GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) - addps xmm1,xmm2 ; xmm1=z2 - addps xmm6,xmm2 ; xmm6=z4 - - movaps xmm5,xmm0 - subps xmm0,xmm3 ; xmm0=z13 - addps xmm5,xmm3 ; xmm5=z11 - - movaps xmm7,xmm0 - movaps xmm4,xmm5 - subps xmm0,xmm1 ; xmm0=data3 - subps xmm5,xmm6 ; xmm5=data7 - addps xmm7,xmm1 ; xmm7=data5 - addps xmm4,xmm6 ; xmm4=data1 - - movaps XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0 - movaps XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], xmm5 - movaps XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], xmm7 - movaps XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4 - - add edx, byte 4*SIZEOF_FAST_FLOAT - dec ecx - jnz near .columnloop - -; pop edi ; unused -; pop esi ; unused -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - poppic ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jfdctfst-mmx.asm b/simd/jfdctfst-mmx.asm deleted file mode 100644 index eb2eb9c..0000000 --- a/simd/jfdctfst-mmx.asm +++ /dev/null @@ -1,396 +0,0 @@ -; -; jfdctfst.asm - fast integer FDCT (MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a fast, not so accurate integer implementation of -; the forward DCT (Discrete Cosine Transform). The following code is -; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c -; for more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%define CONST_BITS 8 ; 14 is also OK. - -%if CONST_BITS == 8 -F_0_382 equ 98 ; FIX(0.382683433) -F_0_541 equ 139 ; FIX(0.541196100) -F_0_707 equ 181 ; FIX(0.707106781) -F_1_306 equ 334 ; FIX(1.306562965) -%else -; NASM cannot do compile-time arithmetic on floating-point constants. -%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) -F_0_382 equ DESCALE( 410903207,30-CONST_BITS) ; FIX(0.382683433) -F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) -F_0_707 equ DESCALE( 759250124,30-CONST_BITS) ; FIX(0.707106781) -F_1_306 equ DESCALE(1402911301,30-CONST_BITS) ; FIX(1.306562965) -%endif - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - -; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) -; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) - -%define PRE_MULTIPLY_SCALE_BITS 2 -%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) - - alignz 16 - global EXTN(jconst_fdct_ifast_mmx) - -EXTN(jconst_fdct_ifast_mmx): - -PW_F0707 times 4 dw F_0_707 << CONST_SHIFT -PW_F0382 times 4 dw F_0_382 << CONST_SHIFT -PW_F0541 times 4 dw F_0_541 << CONST_SHIFT -PW_F1306 times 4 dw F_1_306 << CONST_SHIFT - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Perform the forward DCT on one block of samples. -; -; GLOBAL(void) -; jsimd_fdct_ifast_mmx (DCTELEM *data) -; - -%define data(b) (b)+8 ; DCTELEM *data - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] -%define WK_NUM 2 - - align 16 - global EXTN(jsimd_fdct_ifast_mmx) - -EXTN(jsimd_fdct_ifast_mmx): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved -; push esi ; unused -; push edi ; unused - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process rows. - - mov edx, POINTER [data(eax)] ; (DCTELEM *) - mov ecx, DCTSIZE/4 - alignx 16,7 -.rowloop: - - movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] - movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] - movq mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)] - movq mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)] - - ; mm0=(20 21 22 23), mm2=(24 25 26 27) - ; mm1=(30 31 32 33), mm3=(34 35 36 37) - - movq mm4,mm0 ; transpose coefficients(phase 1) - punpcklwd mm0,mm1 ; mm0=(20 30 21 31) - punpckhwd mm4,mm1 ; mm4=(22 32 23 33) - movq mm5,mm2 ; transpose coefficients(phase 1) - punpcklwd mm2,mm3 ; mm2=(24 34 25 35) - punpckhwd mm5,mm3 ; mm5=(26 36 27 37) - - movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] - movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] - movq mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)] - movq mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)] - - ; mm6=(00 01 02 03), mm1=(04 05 06 07) - ; mm7=(10 11 12 13), mm3=(14 15 16 17) - - movq MMWORD [wk(0)], mm4 ; wk(0)=(22 32 23 33) - movq MMWORD [wk(1)], mm2 ; wk(1)=(24 34 25 35) - - movq mm4,mm6 ; transpose coefficients(phase 1) - punpcklwd mm6,mm7 ; mm6=(00 10 01 11) - punpckhwd mm4,mm7 ; mm4=(02 12 03 13) - movq mm2,mm1 ; transpose coefficients(phase 1) - punpcklwd mm1,mm3 ; mm1=(04 14 05 15) - punpckhwd mm2,mm3 ; mm2=(06 16 07 17) - - movq mm7,mm6 ; transpose coefficients(phase 2) - punpckldq mm6,mm0 ; mm6=(00 10 20 30)=data0 - punpckhdq mm7,mm0 ; mm7=(01 11 21 31)=data1 - movq mm3,mm2 ; transpose coefficients(phase 2) - punpckldq mm2,mm5 ; mm2=(06 16 26 36)=data6 - punpckhdq mm3,mm5 ; mm3=(07 17 27 37)=data7 - - movq mm0,mm7 - movq mm5,mm6 - psubw mm7,mm2 ; mm7=data1-data6=tmp6 - psubw mm6,mm3 ; mm6=data0-data7=tmp7 - paddw mm0,mm2 ; mm0=data1+data6=tmp1 - paddw mm5,mm3 ; mm5=data0+data7=tmp0 - - movq mm2, MMWORD [wk(0)] ; mm2=(22 32 23 33) - movq mm3, MMWORD [wk(1)] ; mm3=(24 34 25 35) - movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 - movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 - - movq mm7,mm4 ; transpose coefficients(phase 2) - punpckldq mm4,mm2 ; mm4=(02 12 22 32)=data2 - punpckhdq mm7,mm2 ; mm7=(03 13 23 33)=data3 - movq mm6,mm1 ; transpose coefficients(phase 2) - punpckldq mm1,mm3 ; mm1=(04 14 24 34)=data4 - punpckhdq mm6,mm3 ; mm6=(05 15 25 35)=data5 - - movq mm2,mm7 - movq mm3,mm4 - paddw mm7,mm1 ; mm7=data3+data4=tmp3 - paddw mm4,mm6 ; mm4=data2+data5=tmp2 - psubw mm2,mm1 ; mm2=data3-data4=tmp4 - psubw mm3,mm6 ; mm3=data2-data5=tmp5 - - ; -- Even part - - movq mm1,mm5 - movq mm6,mm0 - psubw mm5,mm7 ; mm5=tmp13 - psubw mm0,mm4 ; mm0=tmp12 - paddw mm1,mm7 ; mm1=tmp10 - paddw mm6,mm4 ; mm6=tmp11 - - paddw mm0,mm5 - psllw mm0,PRE_MULTIPLY_SCALE_BITS - pmulhw mm0,[GOTOFF(ebx,PW_F0707)] ; mm0=z1 - - movq mm7,mm1 - movq mm4,mm5 - psubw mm1,mm6 ; mm1=data4 - psubw mm5,mm0 ; mm5=data6 - paddw mm7,mm6 ; mm7=data0 - paddw mm4,mm0 ; mm4=data2 - - movq MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm1 - movq MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm5 - movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7 - movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 - - ; -- Odd part - - movq mm6, MMWORD [wk(0)] ; mm6=tmp6 - movq mm0, MMWORD [wk(1)] ; mm0=tmp7 - - paddw mm2,mm3 ; mm2=tmp10 - paddw mm3,mm6 ; mm3=tmp11 - paddw mm6,mm0 ; mm6=tmp12, mm0=tmp7 - - psllw mm2,PRE_MULTIPLY_SCALE_BITS - psllw mm6,PRE_MULTIPLY_SCALE_BITS - - psllw mm3,PRE_MULTIPLY_SCALE_BITS - pmulhw mm3,[GOTOFF(ebx,PW_F0707)] ; mm3=z3 - - movq mm1,mm2 ; mm1=tmp10 - psubw mm2,mm6 - pmulhw mm2,[GOTOFF(ebx,PW_F0382)] ; mm2=z5 - pmulhw mm1,[GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610) - pmulhw mm6,[GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296) - paddw mm1,mm2 ; mm1=z2 - paddw mm6,mm2 ; mm6=z4 - - movq mm5,mm0 - psubw mm0,mm3 ; mm0=z13 - paddw mm5,mm3 ; mm5=z11 - - movq mm7,mm0 - movq mm4,mm5 - psubw mm0,mm1 ; mm0=data3 - psubw mm5,mm6 ; mm5=data7 - paddw mm7,mm1 ; mm7=data5 - paddw mm4,mm6 ; mm4=data1 - - movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0 - movq MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm5 - movq MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm7 - movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4 - - add edx, byte 4*DCTSIZE*SIZEOF_DCTELEM - dec ecx - jnz near .rowloop - - ; ---- Pass 2: process columns. - - mov edx, POINTER [data(eax)] ; (DCTELEM *) - mov ecx, DCTSIZE/4 - alignx 16,7 -.columnloop: - - movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] - movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] - movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)] - movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)] - - ; mm0=(02 12 22 32), mm2=(42 52 62 72) - ; mm1=(03 13 23 33), mm3=(43 53 63 73) - - movq mm4,mm0 ; transpose coefficients(phase 1) - punpcklwd mm0,mm1 ; mm0=(02 03 12 13) - punpckhwd mm4,mm1 ; mm4=(22 23 32 33) - movq mm5,mm2 ; transpose coefficients(phase 1) - punpcklwd mm2,mm3 ; mm2=(42 43 52 53) - punpckhwd mm5,mm3 ; mm5=(62 63 72 73) - - movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] - movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] - movq mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)] - movq mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)] - - ; mm6=(00 10 20 30), mm1=(40 50 60 70) - ; mm7=(01 11 21 31), mm3=(41 51 61 71) - - movq MMWORD [wk(0)], mm4 ; wk(0)=(22 23 32 33) - movq MMWORD [wk(1)], mm2 ; wk(1)=(42 43 52 53) - - movq mm4,mm6 ; transpose coefficients(phase 1) - punpcklwd mm6,mm7 ; mm6=(00 01 10 11) - punpckhwd mm4,mm7 ; mm4=(20 21 30 31) - movq mm2,mm1 ; transpose coefficients(phase 1) - punpcklwd mm1,mm3 ; mm1=(40 41 50 51) - punpckhwd mm2,mm3 ; mm2=(60 61 70 71) - - movq mm7,mm6 ; transpose coefficients(phase 2) - punpckldq mm6,mm0 ; mm6=(00 01 02 03)=data0 - punpckhdq mm7,mm0 ; mm7=(10 11 12 13)=data1 - movq mm3,mm2 ; transpose coefficients(phase 2) - punpckldq mm2,mm5 ; mm2=(60 61 62 63)=data6 - punpckhdq mm3,mm5 ; mm3=(70 71 72 73)=data7 - - movq mm0,mm7 - movq mm5,mm6 - psubw mm7,mm2 ; mm7=data1-data6=tmp6 - psubw mm6,mm3 ; mm6=data0-data7=tmp7 - paddw mm0,mm2 ; mm0=data1+data6=tmp1 - paddw mm5,mm3 ; mm5=data0+data7=tmp0 - - movq mm2, MMWORD [wk(0)] ; mm2=(22 23 32 33) - movq mm3, MMWORD [wk(1)] ; mm3=(42 43 52 53) - movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 - movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 - - movq mm7,mm4 ; transpose coefficients(phase 2) - punpckldq mm4,mm2 ; mm4=(20 21 22 23)=data2 - punpckhdq mm7,mm2 ; mm7=(30 31 32 33)=data3 - movq mm6,mm1 ; transpose coefficients(phase 2) - punpckldq mm1,mm3 ; mm1=(40 41 42 43)=data4 - punpckhdq mm6,mm3 ; mm6=(50 51 52 53)=data5 - - movq mm2,mm7 - movq mm3,mm4 - paddw mm7,mm1 ; mm7=data3+data4=tmp3 - paddw mm4,mm6 ; mm4=data2+data5=tmp2 - psubw mm2,mm1 ; mm2=data3-data4=tmp4 - psubw mm3,mm6 ; mm3=data2-data5=tmp5 - - ; -- Even part - - movq mm1,mm5 - movq mm6,mm0 - psubw mm5,mm7 ; mm5=tmp13 - psubw mm0,mm4 ; mm0=tmp12 - paddw mm1,mm7 ; mm1=tmp10 - paddw mm6,mm4 ; mm6=tmp11 - - paddw mm0,mm5 - psllw mm0,PRE_MULTIPLY_SCALE_BITS - pmulhw mm0,[GOTOFF(ebx,PW_F0707)] ; mm0=z1 - - movq mm7,mm1 - movq mm4,mm5 - psubw mm1,mm6 ; mm1=data4 - psubw mm5,mm0 ; mm5=data6 - paddw mm7,mm6 ; mm7=data0 - paddw mm4,mm0 ; mm4=data2 - - movq MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm1 - movq MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm5 - movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7 - movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 - - ; -- Odd part - - movq mm6, MMWORD [wk(0)] ; mm6=tmp6 - movq mm0, MMWORD [wk(1)] ; mm0=tmp7 - - paddw mm2,mm3 ; mm2=tmp10 - paddw mm3,mm6 ; mm3=tmp11 - paddw mm6,mm0 ; mm6=tmp12, mm0=tmp7 - - psllw mm2,PRE_MULTIPLY_SCALE_BITS - psllw mm6,PRE_MULTIPLY_SCALE_BITS - - psllw mm3,PRE_MULTIPLY_SCALE_BITS - pmulhw mm3,[GOTOFF(ebx,PW_F0707)] ; mm3=z3 - - movq mm1,mm2 ; mm1=tmp10 - psubw mm2,mm6 - pmulhw mm2,[GOTOFF(ebx,PW_F0382)] ; mm2=z5 - pmulhw mm1,[GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610) - pmulhw mm6,[GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296) - paddw mm1,mm2 ; mm1=z2 - paddw mm6,mm2 ; mm6=z4 - - movq mm5,mm0 - psubw mm0,mm3 ; mm0=z13 - paddw mm5,mm3 ; mm5=z11 - - movq mm7,mm0 - movq mm4,mm5 - psubw mm0,mm1 ; mm0=data3 - psubw mm5,mm6 ; mm5=data7 - paddw mm7,mm1 ; mm7=data5 - paddw mm4,mm6 ; mm4=data1 - - movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0 - movq MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm5 - movq MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm7 - movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4 - - add edx, byte 4*SIZEOF_DCTELEM - dec ecx - jnz near .columnloop - - emms ; empty MMX state - -; pop edi ; unused -; pop esi ; unused -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - poppic ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jfdctfst-sse2-64.asm b/simd/jfdctfst-sse2-64.asm deleted file mode 100644 index 4c96685..0000000 --- a/simd/jfdctfst-sse2-64.asm +++ /dev/null @@ -1,391 +0,0 @@ -; -; jfdctfst.asm - fast integer FDCT (64-bit SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a fast, not so accurate integer implementation of -; the forward DCT (Discrete Cosine Transform). The following code is -; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c -; for more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%define CONST_BITS 8 ; 14 is also OK. - -%if CONST_BITS == 8 -F_0_382 equ 98 ; FIX(0.382683433) -F_0_541 equ 139 ; FIX(0.541196100) -F_0_707 equ 181 ; FIX(0.707106781) -F_1_306 equ 334 ; FIX(1.306562965) -%else -; NASM cannot do compile-time arithmetic on floating-point constants. -%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) -F_0_382 equ DESCALE( 410903207,30-CONST_BITS) ; FIX(0.382683433) -F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) -F_0_707 equ DESCALE( 759250124,30-CONST_BITS) ; FIX(0.707106781) -F_1_306 equ DESCALE(1402911301,30-CONST_BITS) ; FIX(1.306562965) -%endif - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - -; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) -; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) - -%define PRE_MULTIPLY_SCALE_BITS 2 -%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) - - alignz 16 - global EXTN(jconst_fdct_ifast_sse2) - -EXTN(jconst_fdct_ifast_sse2): - -PW_F0707 times 8 dw F_0_707 << CONST_SHIFT -PW_F0382 times 8 dw F_0_382 << CONST_SHIFT -PW_F0541 times 8 dw F_0_541 << CONST_SHIFT -PW_F1306 times 8 dw F_1_306 << CONST_SHIFT - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 64 -; -; Perform the forward DCT on one block of samples. -; -; GLOBAL(void) -; jsimd_fdct_ifast_sse2 (DCTELEM *data) -; - -; r10 = DCTELEM *data - -%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 2 - - align 16 - global EXTN(jsimd_fdct_ifast_sse2) - -EXTN(jsimd_fdct_ifast_sse2): - push rbp - mov rax,rsp ; rax = original rbp - sub rsp, byte 4 - and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [rsp],rax - mov rbp,rsp ; rbp = aligned rbp - lea rsp, [wk(0)] - collect_args - - ; ---- Pass 1: process rows. - - mov rdx, r10 ; (DCTELEM *) - - movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)] - movdqa xmm1, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)] - movdqa xmm2, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)] - movdqa xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)] - - ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) - ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) - - movdqa xmm4,xmm0 ; transpose coefficients(phase 1) - punpcklwd xmm0,xmm1 ; xmm0=(00 10 01 11 02 12 03 13) - punpckhwd xmm4,xmm1 ; xmm4=(04 14 05 15 06 16 07 17) - movdqa xmm5,xmm2 ; transpose coefficients(phase 1) - punpcklwd xmm2,xmm3 ; xmm2=(20 30 21 31 22 32 23 33) - punpckhwd xmm5,xmm3 ; xmm5=(24 34 25 35 26 36 27 37) - - movdqa xmm6, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)] - movdqa xmm7, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)] - movdqa xmm1, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)] - movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)] - - ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) - ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) - - movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) - movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) - - movdqa xmm2,xmm6 ; transpose coefficients(phase 1) - punpcklwd xmm6,xmm7 ; xmm6=(40 50 41 51 42 52 43 53) - punpckhwd xmm2,xmm7 ; xmm2=(44 54 45 55 46 56 47 57) - movdqa xmm5,xmm1 ; transpose coefficients(phase 1) - punpcklwd xmm1,xmm3 ; xmm1=(60 70 61 71 62 72 63 73) - punpckhwd xmm5,xmm3 ; xmm5=(64 74 65 75 66 76 67 77) - - movdqa xmm7,xmm6 ; transpose coefficients(phase 2) - punpckldq xmm6,xmm1 ; xmm6=(40 50 60 70 41 51 61 71) - punpckhdq xmm7,xmm1 ; xmm7=(42 52 62 72 43 53 63 73) - movdqa xmm3,xmm2 ; transpose coefficients(phase 2) - punpckldq xmm2,xmm5 ; xmm2=(44 54 64 74 45 55 65 75) - punpckhdq xmm3,xmm5 ; xmm3=(46 56 66 76 47 57 67 77) - - movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) - movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) - movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(42 52 62 72 43 53 63 73) - movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=(44 54 64 74 45 55 65 75) - - movdqa xmm7,xmm0 ; transpose coefficients(phase 2) - punpckldq xmm0,xmm1 ; xmm0=(00 10 20 30 01 11 21 31) - punpckhdq xmm7,xmm1 ; xmm7=(02 12 22 32 03 13 23 33) - movdqa xmm2,xmm4 ; transpose coefficients(phase 2) - punpckldq xmm4,xmm5 ; xmm4=(04 14 24 34 05 15 25 35) - punpckhdq xmm2,xmm5 ; xmm2=(06 16 26 36 07 17 27 37) - - movdqa xmm1,xmm0 ; transpose coefficients(phase 3) - punpcklqdq xmm0,xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 - punpckhqdq xmm1,xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 - movdqa xmm5,xmm2 ; transpose coefficients(phase 3) - punpcklqdq xmm2,xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 - punpckhqdq xmm5,xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 - - movdqa xmm6,xmm1 - movdqa xmm3,xmm0 - psubw xmm1,xmm2 ; xmm1=data1-data6=tmp6 - psubw xmm0,xmm5 ; xmm0=data0-data7=tmp7 - paddw xmm6,xmm2 ; xmm6=data1+data6=tmp1 - paddw xmm3,xmm5 ; xmm3=data0+data7=tmp0 - - movdqa xmm2, XMMWORD [wk(0)] ; xmm2=(42 52 62 72 43 53 63 73) - movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(44 54 64 74 45 55 65 75) - movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 - movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 - - movdqa xmm1,xmm7 ; transpose coefficients(phase 3) - punpcklqdq xmm7,xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 - punpckhqdq xmm1,xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 - movdqa xmm0,xmm4 ; transpose coefficients(phase 3) - punpcklqdq xmm4,xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 - punpckhqdq xmm0,xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 - - movdqa xmm2,xmm1 - movdqa xmm5,xmm7 - paddw xmm1,xmm4 ; xmm1=data3+data4=tmp3 - paddw xmm7,xmm0 ; xmm7=data2+data5=tmp2 - psubw xmm2,xmm4 ; xmm2=data3-data4=tmp4 - psubw xmm5,xmm0 ; xmm5=data2-data5=tmp5 - - ; -- Even part - - movdqa xmm4,xmm3 - movdqa xmm0,xmm6 - psubw xmm3,xmm1 ; xmm3=tmp13 - psubw xmm6,xmm7 ; xmm6=tmp12 - paddw xmm4,xmm1 ; xmm4=tmp10 - paddw xmm0,xmm7 ; xmm0=tmp11 - - paddw xmm6,xmm3 - psllw xmm6,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm6,[rel PW_F0707] ; xmm6=z1 - - movdqa xmm1,xmm4 - movdqa xmm7,xmm3 - psubw xmm4,xmm0 ; xmm4=data4 - psubw xmm3,xmm6 ; xmm3=data6 - paddw xmm1,xmm0 ; xmm1=data0 - paddw xmm7,xmm6 ; xmm7=data2 - - movdqa xmm0, XMMWORD [wk(0)] ; xmm0=tmp6 - movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp7 - movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=data4 - movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=data6 - - ; -- Odd part - - paddw xmm2,xmm5 ; xmm2=tmp10 - paddw xmm5,xmm0 ; xmm5=tmp11 - paddw xmm0,xmm6 ; xmm0=tmp12, xmm6=tmp7 - - psllw xmm2,PRE_MULTIPLY_SCALE_BITS - psllw xmm0,PRE_MULTIPLY_SCALE_BITS - - psllw xmm5,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm5,[rel PW_F0707] ; xmm5=z3 - - movdqa xmm4,xmm2 ; xmm4=tmp10 - psubw xmm2,xmm0 - pmulhw xmm2,[rel PW_F0382] ; xmm2=z5 - pmulhw xmm4,[rel PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) - pmulhw xmm0,[rel PW_F1306] ; xmm0=MULTIPLY(tmp12,FIX_1_306562) - paddw xmm4,xmm2 ; xmm4=z2 - paddw xmm0,xmm2 ; xmm0=z4 - - movdqa xmm3,xmm6 - psubw xmm6,xmm5 ; xmm6=z13 - paddw xmm3,xmm5 ; xmm3=z11 - - movdqa xmm2,xmm6 - movdqa xmm5,xmm3 - psubw xmm6,xmm4 ; xmm6=data3 - psubw xmm3,xmm0 ; xmm3=data7 - paddw xmm2,xmm4 ; xmm2=data5 - paddw xmm5,xmm0 ; xmm5=data1 - - ; ---- Pass 2: process columns. - - ; xmm1=(00 10 20 30 40 50 60 70), xmm7=(02 12 22 32 42 52 62 72) - ; xmm5=(01 11 21 31 41 51 61 71), xmm6=(03 13 23 33 43 53 63 73) - - movdqa xmm4,xmm1 ; transpose coefficients(phase 1) - punpcklwd xmm1,xmm5 ; xmm1=(00 01 10 11 20 21 30 31) - punpckhwd xmm4,xmm5 ; xmm4=(40 41 50 51 60 61 70 71) - movdqa xmm0,xmm7 ; transpose coefficients(phase 1) - punpcklwd xmm7,xmm6 ; xmm7=(02 03 12 13 22 23 32 33) - punpckhwd xmm0,xmm6 ; xmm0=(42 43 52 53 62 63 72 73) - - movdqa xmm5, XMMWORD [wk(0)] ; xmm5=col4 - movdqa xmm6, XMMWORD [wk(1)] ; xmm6=col6 - - ; xmm5=(04 14 24 34 44 54 64 74), xmm6=(06 16 26 36 46 56 66 76) - ; xmm2=(05 15 25 35 45 55 65 75), xmm3=(07 17 27 37 47 57 67 77) - - movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(02 03 12 13 22 23 32 33) - movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(42 43 52 53 62 63 72 73) - - movdqa xmm7,xmm5 ; transpose coefficients(phase 1) - punpcklwd xmm5,xmm2 ; xmm5=(04 05 14 15 24 25 34 35) - punpckhwd xmm7,xmm2 ; xmm7=(44 45 54 55 64 65 74 75) - movdqa xmm0,xmm6 ; transpose coefficients(phase 1) - punpcklwd xmm6,xmm3 ; xmm6=(06 07 16 17 26 27 36 37) - punpckhwd xmm0,xmm3 ; xmm0=(46 47 56 57 66 67 76 77) - - movdqa xmm2,xmm5 ; transpose coefficients(phase 2) - punpckldq xmm5,xmm6 ; xmm5=(04 05 06 07 14 15 16 17) - punpckhdq xmm2,xmm6 ; xmm2=(24 25 26 27 34 35 36 37) - movdqa xmm3,xmm7 ; transpose coefficients(phase 2) - punpckldq xmm7,xmm0 ; xmm7=(44 45 46 47 54 55 56 57) - punpckhdq xmm3,xmm0 ; xmm3=(64 65 66 67 74 75 76 77) - - movdqa xmm6, XMMWORD [wk(0)] ; xmm6=(02 03 12 13 22 23 32 33) - movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(42 43 52 53 62 63 72 73) - movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(24 25 26 27 34 35 36 37) - movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(44 45 46 47 54 55 56 57) - - movdqa xmm2,xmm1 ; transpose coefficients(phase 2) - punpckldq xmm1,xmm6 ; xmm1=(00 01 02 03 10 11 12 13) - punpckhdq xmm2,xmm6 ; xmm2=(20 21 22 23 30 31 32 33) - movdqa xmm7,xmm4 ; transpose coefficients(phase 2) - punpckldq xmm4,xmm0 ; xmm4=(40 41 42 43 50 51 52 53) - punpckhdq xmm7,xmm0 ; xmm7=(60 61 62 63 70 71 72 73) - - movdqa xmm6,xmm1 ; transpose coefficients(phase 3) - punpcklqdq xmm1,xmm5 ; xmm1=(00 01 02 03 04 05 06 07)=data0 - punpckhqdq xmm6,xmm5 ; xmm6=(10 11 12 13 14 15 16 17)=data1 - movdqa xmm0,xmm7 ; transpose coefficients(phase 3) - punpcklqdq xmm7,xmm3 ; xmm7=(60 61 62 63 64 65 66 67)=data6 - punpckhqdq xmm0,xmm3 ; xmm0=(70 71 72 73 74 75 76 77)=data7 - - movdqa xmm5,xmm6 - movdqa xmm3,xmm1 - psubw xmm6,xmm7 ; xmm6=data1-data6=tmp6 - psubw xmm1,xmm0 ; xmm1=data0-data7=tmp7 - paddw xmm5,xmm7 ; xmm5=data1+data6=tmp1 - paddw xmm3,xmm0 ; xmm3=data0+data7=tmp0 - - movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(24 25 26 27 34 35 36 37) - movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(44 45 46 47 54 55 56 57) - movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=tmp6 - movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=tmp7 - - movdqa xmm6,xmm2 ; transpose coefficients(phase 3) - punpcklqdq xmm2,xmm7 ; xmm2=(20 21 22 23 24 25 26 27)=data2 - punpckhqdq xmm6,xmm7 ; xmm6=(30 31 32 33 34 35 36 37)=data3 - movdqa xmm1,xmm4 ; transpose coefficients(phase 3) - punpcklqdq xmm4,xmm0 ; xmm4=(40 41 42 43 44 45 46 47)=data4 - punpckhqdq xmm1,xmm0 ; xmm1=(50 51 52 53 54 55 56 57)=data5 - - movdqa xmm7,xmm6 - movdqa xmm0,xmm2 - paddw xmm6,xmm4 ; xmm6=data3+data4=tmp3 - paddw xmm2,xmm1 ; xmm2=data2+data5=tmp2 - psubw xmm7,xmm4 ; xmm7=data3-data4=tmp4 - psubw xmm0,xmm1 ; xmm0=data2-data5=tmp5 - - ; -- Even part - - movdqa xmm4,xmm3 - movdqa xmm1,xmm5 - psubw xmm3,xmm6 ; xmm3=tmp13 - psubw xmm5,xmm2 ; xmm5=tmp12 - paddw xmm4,xmm6 ; xmm4=tmp10 - paddw xmm1,xmm2 ; xmm1=tmp11 - - paddw xmm5,xmm3 - psllw xmm5,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm5,[rel PW_F0707] ; xmm5=z1 - - movdqa xmm6,xmm4 - movdqa xmm2,xmm3 - psubw xmm4,xmm1 ; xmm4=data4 - psubw xmm3,xmm5 ; xmm3=data6 - paddw xmm6,xmm1 ; xmm6=data0 - paddw xmm2,xmm5 ; xmm2=data2 - - movdqa XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)], xmm4 - movdqa XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)], xmm3 - movdqa XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)], xmm6 - movdqa XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)], xmm2 - - ; -- Odd part - - movdqa xmm1, XMMWORD [wk(0)] ; xmm1=tmp6 - movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 - - paddw xmm7,xmm0 ; xmm7=tmp10 - paddw xmm0,xmm1 ; xmm0=tmp11 - paddw xmm1,xmm5 ; xmm1=tmp12, xmm5=tmp7 - - psllw xmm7,PRE_MULTIPLY_SCALE_BITS - psllw xmm1,PRE_MULTIPLY_SCALE_BITS - - psllw xmm0,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm0,[rel PW_F0707] ; xmm0=z3 - - movdqa xmm4,xmm7 ; xmm4=tmp10 - psubw xmm7,xmm1 - pmulhw xmm7,[rel PW_F0382] ; xmm7=z5 - pmulhw xmm4,[rel PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) - pmulhw xmm1,[rel PW_F1306] ; xmm1=MULTIPLY(tmp12,FIX_1_306562) - paddw xmm4,xmm7 ; xmm4=z2 - paddw xmm1,xmm7 ; xmm1=z4 - - movdqa xmm3,xmm5 - psubw xmm5,xmm0 ; xmm5=z13 - paddw xmm3,xmm0 ; xmm3=z11 - - movdqa xmm6,xmm5 - movdqa xmm2,xmm3 - psubw xmm5,xmm4 ; xmm5=data3 - psubw xmm3,xmm1 ; xmm3=data7 - paddw xmm6,xmm4 ; xmm6=data5 - paddw xmm2,xmm1 ; xmm2=data1 - - movdqa XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm5 - movdqa XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)], xmm3 - movdqa XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm6 - movdqa XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm2 - - uncollect_args - mov rsp,rbp ; rsp <- aligned rbp - pop rsp ; rsp <- original rbp - pop rbp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jfdctfst-sse2.asm b/simd/jfdctfst-sse2.asm deleted file mode 100644 index 54856a2..0000000 --- a/simd/jfdctfst-sse2.asm +++ /dev/null @@ -1,403 +0,0 @@ -; -; jfdctfst.asm - fast integer FDCT (SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a fast, not so accurate integer implementation of -; the forward DCT (Discrete Cosine Transform). The following code is -; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c -; for more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%define CONST_BITS 8 ; 14 is also OK. - -%if CONST_BITS == 8 -F_0_382 equ 98 ; FIX(0.382683433) -F_0_541 equ 139 ; FIX(0.541196100) -F_0_707 equ 181 ; FIX(0.707106781) -F_1_306 equ 334 ; FIX(1.306562965) -%else -; NASM cannot do compile-time arithmetic on floating-point constants. -%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) -F_0_382 equ DESCALE( 410903207,30-CONST_BITS) ; FIX(0.382683433) -F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) -F_0_707 equ DESCALE( 759250124,30-CONST_BITS) ; FIX(0.707106781) -F_1_306 equ DESCALE(1402911301,30-CONST_BITS) ; FIX(1.306562965) -%endif - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - -; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) -; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) - -%define PRE_MULTIPLY_SCALE_BITS 2 -%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) - - alignz 16 - global EXTN(jconst_fdct_ifast_sse2) - -EXTN(jconst_fdct_ifast_sse2): - -PW_F0707 times 8 dw F_0_707 << CONST_SHIFT -PW_F0382 times 8 dw F_0_382 << CONST_SHIFT -PW_F0541 times 8 dw F_0_541 << CONST_SHIFT -PW_F1306 times 8 dw F_1_306 << CONST_SHIFT - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Perform the forward DCT on one block of samples. -; -; GLOBAL(void) -; jsimd_fdct_ifast_sse2 (DCTELEM *data) -; - -%define data(b) (b)+8 ; DCTELEM *data - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 2 - - align 16 - global EXTN(jsimd_fdct_ifast_sse2) - -EXTN(jsimd_fdct_ifast_sse2): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic ebx -; push ecx ; unused -; push edx ; need not be preserved -; push esi ; unused -; push edi ; unused - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process rows. - - mov edx, POINTER [data(eax)] ; (DCTELEM *) - - movdqa xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)] - movdqa xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)] - movdqa xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)] - movdqa xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)] - - ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) - ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) - - movdqa xmm4,xmm0 ; transpose coefficients(phase 1) - punpcklwd xmm0,xmm1 ; xmm0=(00 10 01 11 02 12 03 13) - punpckhwd xmm4,xmm1 ; xmm4=(04 14 05 15 06 16 07 17) - movdqa xmm5,xmm2 ; transpose coefficients(phase 1) - punpcklwd xmm2,xmm3 ; xmm2=(20 30 21 31 22 32 23 33) - punpckhwd xmm5,xmm3 ; xmm5=(24 34 25 35 26 36 27 37) - - movdqa xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)] - movdqa xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)] - movdqa xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)] - movdqa xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)] - - ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) - ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) - - movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) - movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) - - movdqa xmm2,xmm6 ; transpose coefficients(phase 1) - punpcklwd xmm6,xmm7 ; xmm6=(40 50 41 51 42 52 43 53) - punpckhwd xmm2,xmm7 ; xmm2=(44 54 45 55 46 56 47 57) - movdqa xmm5,xmm1 ; transpose coefficients(phase 1) - punpcklwd xmm1,xmm3 ; xmm1=(60 70 61 71 62 72 63 73) - punpckhwd xmm5,xmm3 ; xmm5=(64 74 65 75 66 76 67 77) - - movdqa xmm7,xmm6 ; transpose coefficients(phase 2) - punpckldq xmm6,xmm1 ; xmm6=(40 50 60 70 41 51 61 71) - punpckhdq xmm7,xmm1 ; xmm7=(42 52 62 72 43 53 63 73) - movdqa xmm3,xmm2 ; transpose coefficients(phase 2) - punpckldq xmm2,xmm5 ; xmm2=(44 54 64 74 45 55 65 75) - punpckhdq xmm3,xmm5 ; xmm3=(46 56 66 76 47 57 67 77) - - movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) - movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) - movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(42 52 62 72 43 53 63 73) - movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=(44 54 64 74 45 55 65 75) - - movdqa xmm7,xmm0 ; transpose coefficients(phase 2) - punpckldq xmm0,xmm1 ; xmm0=(00 10 20 30 01 11 21 31) - punpckhdq xmm7,xmm1 ; xmm7=(02 12 22 32 03 13 23 33) - movdqa xmm2,xmm4 ; transpose coefficients(phase 2) - punpckldq xmm4,xmm5 ; xmm4=(04 14 24 34 05 15 25 35) - punpckhdq xmm2,xmm5 ; xmm2=(06 16 26 36 07 17 27 37) - - movdqa xmm1,xmm0 ; transpose coefficients(phase 3) - punpcklqdq xmm0,xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 - punpckhqdq xmm1,xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 - movdqa xmm5,xmm2 ; transpose coefficients(phase 3) - punpcklqdq xmm2,xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 - punpckhqdq xmm5,xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 - - movdqa xmm6,xmm1 - movdqa xmm3,xmm0 - psubw xmm1,xmm2 ; xmm1=data1-data6=tmp6 - psubw xmm0,xmm5 ; xmm0=data0-data7=tmp7 - paddw xmm6,xmm2 ; xmm6=data1+data6=tmp1 - paddw xmm3,xmm5 ; xmm3=data0+data7=tmp0 - - movdqa xmm2, XMMWORD [wk(0)] ; xmm2=(42 52 62 72 43 53 63 73) - movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(44 54 64 74 45 55 65 75) - movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 - movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 - - movdqa xmm1,xmm7 ; transpose coefficients(phase 3) - punpcklqdq xmm7,xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 - punpckhqdq xmm1,xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 - movdqa xmm0,xmm4 ; transpose coefficients(phase 3) - punpcklqdq xmm4,xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 - punpckhqdq xmm0,xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 - - movdqa xmm2,xmm1 - movdqa xmm5,xmm7 - paddw xmm1,xmm4 ; xmm1=data3+data4=tmp3 - paddw xmm7,xmm0 ; xmm7=data2+data5=tmp2 - psubw xmm2,xmm4 ; xmm2=data3-data4=tmp4 - psubw xmm5,xmm0 ; xmm5=data2-data5=tmp5 - - ; -- Even part - - movdqa xmm4,xmm3 - movdqa xmm0,xmm6 - psubw xmm3,xmm1 ; xmm3=tmp13 - psubw xmm6,xmm7 ; xmm6=tmp12 - paddw xmm4,xmm1 ; xmm4=tmp10 - paddw xmm0,xmm7 ; xmm0=tmp11 - - paddw xmm6,xmm3 - psllw xmm6,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm6,[GOTOFF(ebx,PW_F0707)] ; xmm6=z1 - - movdqa xmm1,xmm4 - movdqa xmm7,xmm3 - psubw xmm4,xmm0 ; xmm4=data4 - psubw xmm3,xmm6 ; xmm3=data6 - paddw xmm1,xmm0 ; xmm1=data0 - paddw xmm7,xmm6 ; xmm7=data2 - - movdqa xmm0, XMMWORD [wk(0)] ; xmm0=tmp6 - movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp7 - movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=data4 - movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=data6 - - ; -- Odd part - - paddw xmm2,xmm5 ; xmm2=tmp10 - paddw xmm5,xmm0 ; xmm5=tmp11 - paddw xmm0,xmm6 ; xmm0=tmp12, xmm6=tmp7 - - psllw xmm2,PRE_MULTIPLY_SCALE_BITS - psllw xmm0,PRE_MULTIPLY_SCALE_BITS - - psllw xmm5,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm5,[GOTOFF(ebx,PW_F0707)] ; xmm5=z3 - - movdqa xmm4,xmm2 ; xmm4=tmp10 - psubw xmm2,xmm0 - pmulhw xmm2,[GOTOFF(ebx,PW_F0382)] ; xmm2=z5 - pmulhw xmm4,[GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) - pmulhw xmm0,[GOTOFF(ebx,PW_F1306)] ; xmm0=MULTIPLY(tmp12,FIX_1_306562) - paddw xmm4,xmm2 ; xmm4=z2 - paddw xmm0,xmm2 ; xmm0=z4 - - movdqa xmm3,xmm6 - psubw xmm6,xmm5 ; xmm6=z13 - paddw xmm3,xmm5 ; xmm3=z11 - - movdqa xmm2,xmm6 - movdqa xmm5,xmm3 - psubw xmm6,xmm4 ; xmm6=data3 - psubw xmm3,xmm0 ; xmm3=data7 - paddw xmm2,xmm4 ; xmm2=data5 - paddw xmm5,xmm0 ; xmm5=data1 - - ; ---- Pass 2: process columns. - -; mov edx, POINTER [data(eax)] ; (DCTELEM *) - - ; xmm1=(00 10 20 30 40 50 60 70), xmm7=(02 12 22 32 42 52 62 72) - ; xmm5=(01 11 21 31 41 51 61 71), xmm6=(03 13 23 33 43 53 63 73) - - movdqa xmm4,xmm1 ; transpose coefficients(phase 1) - punpcklwd xmm1,xmm5 ; xmm1=(00 01 10 11 20 21 30 31) - punpckhwd xmm4,xmm5 ; xmm4=(40 41 50 51 60 61 70 71) - movdqa xmm0,xmm7 ; transpose coefficients(phase 1) - punpcklwd xmm7,xmm6 ; xmm7=(02 03 12 13 22 23 32 33) - punpckhwd xmm0,xmm6 ; xmm0=(42 43 52 53 62 63 72 73) - - movdqa xmm5, XMMWORD [wk(0)] ; xmm5=col4 - movdqa xmm6, XMMWORD [wk(1)] ; xmm6=col6 - - ; xmm5=(04 14 24 34 44 54 64 74), xmm6=(06 16 26 36 46 56 66 76) - ; xmm2=(05 15 25 35 45 55 65 75), xmm3=(07 17 27 37 47 57 67 77) - - movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(02 03 12 13 22 23 32 33) - movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(42 43 52 53 62 63 72 73) - - movdqa xmm7,xmm5 ; transpose coefficients(phase 1) - punpcklwd xmm5,xmm2 ; xmm5=(04 05 14 15 24 25 34 35) - punpckhwd xmm7,xmm2 ; xmm7=(44 45 54 55 64 65 74 75) - movdqa xmm0,xmm6 ; transpose coefficients(phase 1) - punpcklwd xmm6,xmm3 ; xmm6=(06 07 16 17 26 27 36 37) - punpckhwd xmm0,xmm3 ; xmm0=(46 47 56 57 66 67 76 77) - - movdqa xmm2,xmm5 ; transpose coefficients(phase 2) - punpckldq xmm5,xmm6 ; xmm5=(04 05 06 07 14 15 16 17) - punpckhdq xmm2,xmm6 ; xmm2=(24 25 26 27 34 35 36 37) - movdqa xmm3,xmm7 ; transpose coefficients(phase 2) - punpckldq xmm7,xmm0 ; xmm7=(44 45 46 47 54 55 56 57) - punpckhdq xmm3,xmm0 ; xmm3=(64 65 66 67 74 75 76 77) - - movdqa xmm6, XMMWORD [wk(0)] ; xmm6=(02 03 12 13 22 23 32 33) - movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(42 43 52 53 62 63 72 73) - movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(24 25 26 27 34 35 36 37) - movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(44 45 46 47 54 55 56 57) - - movdqa xmm2,xmm1 ; transpose coefficients(phase 2) - punpckldq xmm1,xmm6 ; xmm1=(00 01 02 03 10 11 12 13) - punpckhdq xmm2,xmm6 ; xmm2=(20 21 22 23 30 31 32 33) - movdqa xmm7,xmm4 ; transpose coefficients(phase 2) - punpckldq xmm4,xmm0 ; xmm4=(40 41 42 43 50 51 52 53) - punpckhdq xmm7,xmm0 ; xmm7=(60 61 62 63 70 71 72 73) - - movdqa xmm6,xmm1 ; transpose coefficients(phase 3) - punpcklqdq xmm1,xmm5 ; xmm1=(00 01 02 03 04 05 06 07)=data0 - punpckhqdq xmm6,xmm5 ; xmm6=(10 11 12 13 14 15 16 17)=data1 - movdqa xmm0,xmm7 ; transpose coefficients(phase 3) - punpcklqdq xmm7,xmm3 ; xmm7=(60 61 62 63 64 65 66 67)=data6 - punpckhqdq xmm0,xmm3 ; xmm0=(70 71 72 73 74 75 76 77)=data7 - - movdqa xmm5,xmm6 - movdqa xmm3,xmm1 - psubw xmm6,xmm7 ; xmm6=data1-data6=tmp6 - psubw xmm1,xmm0 ; xmm1=data0-data7=tmp7 - paddw xmm5,xmm7 ; xmm5=data1+data6=tmp1 - paddw xmm3,xmm0 ; xmm3=data0+data7=tmp0 - - movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(24 25 26 27 34 35 36 37) - movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(44 45 46 47 54 55 56 57) - movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=tmp6 - movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=tmp7 - - movdqa xmm6,xmm2 ; transpose coefficients(phase 3) - punpcklqdq xmm2,xmm7 ; xmm2=(20 21 22 23 24 25 26 27)=data2 - punpckhqdq xmm6,xmm7 ; xmm6=(30 31 32 33 34 35 36 37)=data3 - movdqa xmm1,xmm4 ; transpose coefficients(phase 3) - punpcklqdq xmm4,xmm0 ; xmm4=(40 41 42 43 44 45 46 47)=data4 - punpckhqdq xmm1,xmm0 ; xmm1=(50 51 52 53 54 55 56 57)=data5 - - movdqa xmm7,xmm6 - movdqa xmm0,xmm2 - paddw xmm6,xmm4 ; xmm6=data3+data4=tmp3 - paddw xmm2,xmm1 ; xmm2=data2+data5=tmp2 - psubw xmm7,xmm4 ; xmm7=data3-data4=tmp4 - psubw xmm0,xmm1 ; xmm0=data2-data5=tmp5 - - ; -- Even part - - movdqa xmm4,xmm3 - movdqa xmm1,xmm5 - psubw xmm3,xmm6 ; xmm3=tmp13 - psubw xmm5,xmm2 ; xmm5=tmp12 - paddw xmm4,xmm6 ; xmm4=tmp10 - paddw xmm1,xmm2 ; xmm1=tmp11 - - paddw xmm5,xmm3 - psllw xmm5,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm5,[GOTOFF(ebx,PW_F0707)] ; xmm5=z1 - - movdqa xmm6,xmm4 - movdqa xmm2,xmm3 - psubw xmm4,xmm1 ; xmm4=data4 - psubw xmm3,xmm5 ; xmm3=data6 - paddw xmm6,xmm1 ; xmm6=data0 - paddw xmm2,xmm5 ; xmm2=data2 - - movdqa XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm4 - movdqa XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm3 - movdqa XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm6 - movdqa XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm2 - - ; -- Odd part - - movdqa xmm1, XMMWORD [wk(0)] ; xmm1=tmp6 - movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 - - paddw xmm7,xmm0 ; xmm7=tmp10 - paddw xmm0,xmm1 ; xmm0=tmp11 - paddw xmm1,xmm5 ; xmm1=tmp12, xmm5=tmp7 - - psllw xmm7,PRE_MULTIPLY_SCALE_BITS - psllw xmm1,PRE_MULTIPLY_SCALE_BITS - - psllw xmm0,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm0,[GOTOFF(ebx,PW_F0707)] ; xmm0=z3 - - movdqa xmm4,xmm7 ; xmm4=tmp10 - psubw xmm7,xmm1 - pmulhw xmm7,[GOTOFF(ebx,PW_F0382)] ; xmm7=z5 - pmulhw xmm4,[GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) - pmulhw xmm1,[GOTOFF(ebx,PW_F1306)] ; xmm1=MULTIPLY(tmp12,FIX_1_306562) - paddw xmm4,xmm7 ; xmm4=z2 - paddw xmm1,xmm7 ; xmm1=z4 - - movdqa xmm3,xmm5 - psubw xmm5,xmm0 ; xmm5=z13 - paddw xmm3,xmm0 ; xmm3=z11 - - movdqa xmm6,xmm5 - movdqa xmm2,xmm3 - psubw xmm5,xmm4 ; xmm5=data3 - psubw xmm3,xmm1 ; xmm3=data7 - paddw xmm6,xmm4 ; xmm6=data5 - paddw xmm2,xmm1 ; xmm2=data1 - - movdqa XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm5 - movdqa XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm3 - movdqa XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm6 - movdqa XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm2 - -; pop edi ; unused -; pop esi ; unused -; pop edx ; need not be preserved -; pop ecx ; unused - poppic ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jfdctint-altivec.c b/simd/jfdctint-altivec.c deleted file mode 100644 index e6e8a56..0000000 --- a/simd/jfdctint-altivec.c +++ /dev/null @@ -1,262 +0,0 @@ -/* - * AltiVec optimizations for libjpeg-turbo - * - * Copyright (C) 2014, D. R. Commander. All Rights Reserved. - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgment in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* SLOW INTEGER FORWARD DCT */ - -#include "jsimd_altivec.h" - - -#define F_0_298 2446 /* FIX(0.298631336) */ -#define F_0_390 3196 /* FIX(0.390180644) */ -#define F_0_541 4433 /* FIX(0.541196100) */ -#define F_0_765 6270 /* FIX(0.765366865) */ -#define F_0_899 7373 /* FIX(0.899976223) */ -#define F_1_175 9633 /* FIX(1.175875602) */ -#define F_1_501 12299 /* FIX(1.501321110) */ -#define F_1_847 15137 /* FIX(1.847759065) */ -#define F_1_961 16069 /* FIX(1.961570560) */ -#define F_2_053 16819 /* FIX(2.053119869) */ -#define F_2_562 20995 /* FIX(2.562915447) */ -#define F_3_072 25172 /* FIX(3.072711026) */ - -#define CONST_BITS 13 -#define PASS1_BITS 2 -#define DESCALE_P1 (CONST_BITS - PASS1_BITS) -#define DESCALE_P2 (CONST_BITS + PASS1_BITS) - - -#define DO_FDCT_COMMON(PASS) \ -{ \ - /* (Original) \ - * z1 = (tmp12 + tmp13) * 0.541196100; \ - * data2 = z1 + tmp13 * 0.765366865; \ - * data6 = z1 + tmp12 * -1.847759065; \ - * \ - * (This implementation) \ - * data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; \ - * data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); \ - */ \ - \ - tmp1312l = vec_mergeh(tmp13, tmp12); \ - tmp1312h = vec_mergel(tmp13, tmp12); \ - \ - out2l = vec_msums(tmp1312l, pw_f130_f054, pd_descale_p##PASS); \ - out2h = vec_msums(tmp1312h, pw_f130_f054, pd_descale_p##PASS); \ - out6l = vec_msums(tmp1312l, pw_f054_mf130, pd_descale_p##PASS); \ - out6h = vec_msums(tmp1312h, pw_f054_mf130, pd_descale_p##PASS); \ - \ - out2l = vec_sra(out2l, descale_p##PASS); \ - out2h = vec_sra(out2h, descale_p##PASS); \ - out6l = vec_sra(out6l, descale_p##PASS); \ - out6h = vec_sra(out6h, descale_p##PASS); \ - \ - out2 = vec_pack(out2l, out2h); \ - out6 = vec_pack(out6l, out6h); \ - \ - /* Odd part */ \ - \ - z3 = vec_add(tmp4, tmp6); \ - z4 = vec_add(tmp5, tmp7); \ - \ - /* (Original) \ - * z5 = (z3 + z4) * 1.175875602; \ - * z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \ - * z3 += z5; z4 += z5; \ - * \ - * (This implementation) \ - * z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \ - * z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \ - */ \ - \ - z34l = vec_mergeh(z3, z4); \ - z34h = vec_mergel(z3, z4); \ - \ - z3l = vec_msums(z34l, pw_mf078_f117, pd_descale_p##PASS); \ - z3h = vec_msums(z34h, pw_mf078_f117, pd_descale_p##PASS); \ - z4l = vec_msums(z34l, pw_f117_f078, pd_descale_p##PASS); \ - z4h = vec_msums(z34h, pw_f117_f078, pd_descale_p##PASS); \ - \ - /* (Original) \ - * z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; \ - * tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; \ - * tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; \ - * z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \ - * data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; \ - * data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; \ - * \ - * (This implementation) \ - * tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; \ - * tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; \ - * tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); \ - * tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); \ - * data7 = tmp4 + z3; data5 = tmp5 + z4; \ - * data3 = tmp6 + z3; data1 = tmp7 + z4; \ - */ \ - \ - tmp47l = vec_mergeh(tmp4, tmp7); \ - tmp47h = vec_mergel(tmp4, tmp7); \ - \ - out7l = vec_msums(tmp47l, pw_mf060_mf089, z3l); \ - out7h = vec_msums(tmp47h, pw_mf060_mf089, z3h); \ - out1l = vec_msums(tmp47l, pw_mf089_f060, z4l); \ - out1h = vec_msums(tmp47h, pw_mf089_f060, z4h); \ - \ - out7l = vec_sra(out7l, descale_p##PASS); \ - out7h = vec_sra(out7h, descale_p##PASS); \ - out1l = vec_sra(out1l, descale_p##PASS); \ - out1h = vec_sra(out1h, descale_p##PASS); \ - \ - out7 = vec_pack(out7l, out7h); \ - out1 = vec_pack(out1l, out1h); \ - \ - tmp56l = vec_mergeh(tmp5, tmp6); \ - tmp56h = vec_mergel(tmp5, tmp6); \ - \ - out5l = vec_msums(tmp56l, pw_mf050_mf256, z4l); \ - out5h = vec_msums(tmp56h, pw_mf050_mf256, z4h); \ - out3l = vec_msums(tmp56l, pw_mf256_f050, z3l); \ - out3h = vec_msums(tmp56h, pw_mf256_f050, z3h); \ - \ - out5l = vec_sra(out5l, descale_p##PASS); \ - out5h = vec_sra(out5h, descale_p##PASS); \ - out3l = vec_sra(out3l, descale_p##PASS); \ - out3h = vec_sra(out3h, descale_p##PASS); \ - \ - out5 = vec_pack(out5l, out5h); \ - out3 = vec_pack(out3l, out3h); \ -} - -#define DO_FDCT_PASS1() \ -{ \ - /* Even part */ \ - \ - tmp10 = vec_add(tmp0, tmp3); \ - tmp13 = vec_sub(tmp0, tmp3); \ - tmp11 = vec_add(tmp1, tmp2); \ - tmp12 = vec_sub(tmp1, tmp2); \ - \ - out0 = vec_add(tmp10, tmp11); \ - out0 = vec_sl(out0, pass1_bits); \ - out4 = vec_sub(tmp10, tmp11); \ - out4 = vec_sl(out4, pass1_bits); \ - \ - DO_FDCT_COMMON(1); \ -} - -#define DO_FDCT_PASS2() \ -{ \ - /* Even part */ \ - \ - tmp10 = vec_add(tmp0, tmp3); \ - tmp13 = vec_sub(tmp0, tmp3); \ - tmp11 = vec_add(tmp1, tmp2); \ - tmp12 = vec_sub(tmp1, tmp2); \ - \ - out0 = vec_add(tmp10, tmp11); \ - out0 = vec_add(out0, pw_descale_p2x); \ - out0 = vec_sra(out0, pass1_bits); \ - out4 = vec_sub(tmp10, tmp11); \ - out4 = vec_add(out4, pw_descale_p2x); \ - out4 = vec_sra(out4, pass1_bits); \ - \ - DO_FDCT_COMMON(2); \ -} - - -void -jsimd_fdct_islow_altivec (DCTELEM *data) -{ - __vector short row0, row1, row2, row3, row4, row5, row6, row7, - col0, col1, col2, col3, col4, col5, col6, col7, - tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp10, tmp11, tmp12, tmp13, - tmp47l, tmp47h, tmp56l, tmp56h, tmp1312l, tmp1312h, - z3, z4, z34l, z34h, - out0, out1, out2, out3, out4, out5, out6, out7; - __vector int z3l, z3h, z4l, z4h, - out1l, out1h, out2l, out2h, out3l, out3h, out5l, out5h, out6l, out6h, - out7l, out7h; - - /* Constants */ - __vector short - pw_f130_f054 = { __4X2(F_0_541 + F_0_765, F_0_541) }, - pw_f054_mf130 = { __4X2(F_0_541, F_0_541 - F_1_847) }, - pw_mf078_f117 = { __4X2(F_1_175 - F_1_961, F_1_175) }, - pw_f117_f078 = { __4X2(F_1_175, F_1_175 - F_0_390) }, - pw_mf060_mf089 = { __4X2(F_0_298 - F_0_899, -F_0_899) }, - pw_mf089_f060 = { __4X2(-F_0_899, F_1_501 - F_0_899) }, - pw_mf050_mf256 = { __4X2(F_2_053 - F_2_562, -F_2_562) }, - pw_mf256_f050 = { __4X2(-F_2_562, F_3_072 - F_2_562) }, - pw_descale_p2x = { __8X(1 << (PASS1_BITS - 1)) }; - __vector unsigned short pass1_bits = { __8X(PASS1_BITS) }; - __vector int pd_descale_p1 = { __4X(1 << (DESCALE_P1 - 1)) }, - pd_descale_p2 = { __4X(1 << (DESCALE_P2 - 1)) }; - __vector unsigned int descale_p1 = { __4X(DESCALE_P1) }, - descale_p2 = { __4X(DESCALE_P2) }; - - /* Pass 1: process rows */ - - row0 = vec_ld(0, data); - row1 = vec_ld(16, data); - row2 = vec_ld(32, data); - row3 = vec_ld(48, data); - row4 = vec_ld(64, data); - row5 = vec_ld(80, data); - row6 = vec_ld(96, data); - row7 = vec_ld(112, data); - - TRANSPOSE(row, col); - - tmp0 = vec_add(col0, col7); - tmp7 = vec_sub(col0, col7); - tmp1 = vec_add(col1, col6); - tmp6 = vec_sub(col1, col6); - tmp2 = vec_add(col2, col5); - tmp5 = vec_sub(col2, col5); - tmp3 = vec_add(col3, col4); - tmp4 = vec_sub(col3, col4); - - DO_FDCT_PASS1(); - - /* Pass 2: process columns */ - - TRANSPOSE(out, row); - - tmp0 = vec_add(row0, row7); - tmp7 = vec_sub(row0, row7); - tmp1 = vec_add(row1, row6); - tmp6 = vec_sub(row1, row6); - tmp2 = vec_add(row2, row5); - tmp5 = vec_sub(row2, row5); - tmp3 = vec_add(row3, row4); - tmp4 = vec_sub(row3, row4); - - DO_FDCT_PASS2(); - - vec_st(out0, 0, data); - vec_st(out1, 16, data); - vec_st(out2, 32, data); - vec_st(out3, 48, data); - vec_st(out4, 64, data); - vec_st(out5, 80, data); - vec_st(out6, 96, data); - vec_st(out7, 112, data); -} diff --git a/simd/jfdctint-mmx.asm b/simd/jfdctint-mmx.asm deleted file mode 100644 index 9142ad8..0000000 --- a/simd/jfdctint-mmx.asm +++ /dev/null @@ -1,621 +0,0 @@ -; -; jfdctint.asm - accurate integer FDCT (MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a slow-but-accurate integer implementation of the -; forward DCT (Discrete Cosine Transform). The following code is based -; directly on the IJG's original jfdctint.c; see the jfdctint.c for -; more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%define CONST_BITS 13 -%define PASS1_BITS 2 - -%define DESCALE_P1 (CONST_BITS-PASS1_BITS) -%define DESCALE_P2 (CONST_BITS+PASS1_BITS) - -%if CONST_BITS == 13 -F_0_298 equ 2446 ; FIX(0.298631336) -F_0_390 equ 3196 ; FIX(0.390180644) -F_0_541 equ 4433 ; FIX(0.541196100) -F_0_765 equ 6270 ; FIX(0.765366865) -F_0_899 equ 7373 ; FIX(0.899976223) -F_1_175 equ 9633 ; FIX(1.175875602) -F_1_501 equ 12299 ; FIX(1.501321110) -F_1_847 equ 15137 ; FIX(1.847759065) -F_1_961 equ 16069 ; FIX(1.961570560) -F_2_053 equ 16819 ; FIX(2.053119869) -F_2_562 equ 20995 ; FIX(2.562915447) -F_3_072 equ 25172 ; FIX(3.072711026) -%else -; NASM cannot do compile-time arithmetic on floating-point constants. -%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) -F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) -F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) -F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) -F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) -F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) -F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) -F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) -F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) -F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) -F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) -F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) -F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) -%endif - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_fdct_islow_mmx) - -EXTN(jconst_fdct_islow_mmx): - -PW_F130_F054 times 2 dw (F_0_541+F_0_765), F_0_541 -PW_F054_MF130 times 2 dw F_0_541, (F_0_541-F_1_847) -PW_MF078_F117 times 2 dw (F_1_175-F_1_961), F_1_175 -PW_F117_F078 times 2 dw F_1_175, (F_1_175-F_0_390) -PW_MF060_MF089 times 2 dw (F_0_298-F_0_899),-F_0_899 -PW_MF089_F060 times 2 dw -F_0_899, (F_1_501-F_0_899) -PW_MF050_MF256 times 2 dw (F_2_053-F_2_562),-F_2_562 -PW_MF256_F050 times 2 dw -F_2_562, (F_3_072-F_2_562) -PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1-1) -PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2-1) -PW_DESCALE_P2X times 4 dw 1 << (PASS1_BITS-1) - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Perform the forward DCT on one block of samples. -; -; GLOBAL(void) -; jsimd_fdct_islow_mmx (DCTELEM *data) -; - -%define data(b) (b)+8 ; DCTELEM *data - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] -%define WK_NUM 2 - - align 16 - global EXTN(jsimd_fdct_islow_mmx) - -EXTN(jsimd_fdct_islow_mmx): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved -; push esi ; unused -; push edi ; unused - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process rows. - - mov edx, POINTER [data(eax)] ; (DCTELEM *) - mov ecx, DCTSIZE/4 - alignx 16,7 -.rowloop: - - movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] - movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] - movq mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)] - movq mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)] - - ; mm0=(20 21 22 23), mm2=(24 25 26 27) - ; mm1=(30 31 32 33), mm3=(34 35 36 37) - - movq mm4,mm0 ; transpose coefficients(phase 1) - punpcklwd mm0,mm1 ; mm0=(20 30 21 31) - punpckhwd mm4,mm1 ; mm4=(22 32 23 33) - movq mm5,mm2 ; transpose coefficients(phase 1) - punpcklwd mm2,mm3 ; mm2=(24 34 25 35) - punpckhwd mm5,mm3 ; mm5=(26 36 27 37) - - movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] - movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] - movq mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)] - movq mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)] - - ; mm6=(00 01 02 03), mm1=(04 05 06 07) - ; mm7=(10 11 12 13), mm3=(14 15 16 17) - - movq MMWORD [wk(0)], mm4 ; wk(0)=(22 32 23 33) - movq MMWORD [wk(1)], mm2 ; wk(1)=(24 34 25 35) - - movq mm4,mm6 ; transpose coefficients(phase 1) - punpcklwd mm6,mm7 ; mm6=(00 10 01 11) - punpckhwd mm4,mm7 ; mm4=(02 12 03 13) - movq mm2,mm1 ; transpose coefficients(phase 1) - punpcklwd mm1,mm3 ; mm1=(04 14 05 15) - punpckhwd mm2,mm3 ; mm2=(06 16 07 17) - - movq mm7,mm6 ; transpose coefficients(phase 2) - punpckldq mm6,mm0 ; mm6=(00 10 20 30)=data0 - punpckhdq mm7,mm0 ; mm7=(01 11 21 31)=data1 - movq mm3,mm2 ; transpose coefficients(phase 2) - punpckldq mm2,mm5 ; mm2=(06 16 26 36)=data6 - punpckhdq mm3,mm5 ; mm3=(07 17 27 37)=data7 - - movq mm0,mm7 - movq mm5,mm6 - psubw mm7,mm2 ; mm7=data1-data6=tmp6 - psubw mm6,mm3 ; mm6=data0-data7=tmp7 - paddw mm0,mm2 ; mm0=data1+data6=tmp1 - paddw mm5,mm3 ; mm5=data0+data7=tmp0 - - movq mm2, MMWORD [wk(0)] ; mm2=(22 32 23 33) - movq mm3, MMWORD [wk(1)] ; mm3=(24 34 25 35) - movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 - movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 - - movq mm7,mm4 ; transpose coefficients(phase 2) - punpckldq mm4,mm2 ; mm4=(02 12 22 32)=data2 - punpckhdq mm7,mm2 ; mm7=(03 13 23 33)=data3 - movq mm6,mm1 ; transpose coefficients(phase 2) - punpckldq mm1,mm3 ; mm1=(04 14 24 34)=data4 - punpckhdq mm6,mm3 ; mm6=(05 15 25 35)=data5 - - movq mm2,mm7 - movq mm3,mm4 - paddw mm7,mm1 ; mm7=data3+data4=tmp3 - paddw mm4,mm6 ; mm4=data2+data5=tmp2 - psubw mm2,mm1 ; mm2=data3-data4=tmp4 - psubw mm3,mm6 ; mm3=data2-data5=tmp5 - - ; -- Even part - - movq mm1,mm5 - movq mm6,mm0 - paddw mm5,mm7 ; mm5=tmp10 - paddw mm0,mm4 ; mm0=tmp11 - psubw mm1,mm7 ; mm1=tmp13 - psubw mm6,mm4 ; mm6=tmp12 - - movq mm7,mm5 - paddw mm5,mm0 ; mm5=tmp10+tmp11 - psubw mm7,mm0 ; mm7=tmp10-tmp11 - - psllw mm5,PASS1_BITS ; mm5=data0 - psllw mm7,PASS1_BITS ; mm7=data4 - - movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5 - movq MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm7 - - ; (Original) - ; z1 = (tmp12 + tmp13) * 0.541196100; - ; data2 = z1 + tmp13 * 0.765366865; - ; data6 = z1 + tmp12 * -1.847759065; - ; - ; (This implementation) - ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; - ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); - - movq mm4,mm1 ; mm1=tmp13 - movq mm0,mm1 - punpcklwd mm4,mm6 ; mm6=tmp12 - punpckhwd mm0,mm6 - movq mm1,mm4 - movq mm6,mm0 - pmaddwd mm4,[GOTOFF(ebx,PW_F130_F054)] ; mm4=data2L - pmaddwd mm0,[GOTOFF(ebx,PW_F130_F054)] ; mm0=data2H - pmaddwd mm1,[GOTOFF(ebx,PW_F054_MF130)] ; mm1=data6L - pmaddwd mm6,[GOTOFF(ebx,PW_F054_MF130)] ; mm6=data6H - - paddd mm4,[GOTOFF(ebx,PD_DESCALE_P1)] - paddd mm0,[GOTOFF(ebx,PD_DESCALE_P1)] - psrad mm4,DESCALE_P1 - psrad mm0,DESCALE_P1 - paddd mm1,[GOTOFF(ebx,PD_DESCALE_P1)] - paddd mm6,[GOTOFF(ebx,PD_DESCALE_P1)] - psrad mm1,DESCALE_P1 - psrad mm6,DESCALE_P1 - - packssdw mm4,mm0 ; mm4=data2 - packssdw mm1,mm6 ; mm1=data6 - - movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 - movq MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm1 - - ; -- Odd part - - movq mm5, MMWORD [wk(0)] ; mm5=tmp6 - movq mm7, MMWORD [wk(1)] ; mm7=tmp7 - - movq mm0,mm2 ; mm2=tmp4 - movq mm6,mm3 ; mm3=tmp5 - paddw mm0,mm5 ; mm0=z3 - paddw mm6,mm7 ; mm6=z4 - - ; (Original) - ; z5 = (z3 + z4) * 1.175875602; - ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; - ; z3 += z5; z4 += z5; - ; - ; (This implementation) - ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; - ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); - - movq mm4,mm0 - movq mm1,mm0 - punpcklwd mm4,mm6 - punpckhwd mm1,mm6 - movq mm0,mm4 - movq mm6,mm1 - pmaddwd mm4,[GOTOFF(ebx,PW_MF078_F117)] ; mm4=z3L - pmaddwd mm1,[GOTOFF(ebx,PW_MF078_F117)] ; mm1=z3H - pmaddwd mm0,[GOTOFF(ebx,PW_F117_F078)] ; mm0=z4L - pmaddwd mm6,[GOTOFF(ebx,PW_F117_F078)] ; mm6=z4H - - movq MMWORD [wk(0)], mm4 ; wk(0)=z3L - movq MMWORD [wk(1)], mm1 ; wk(1)=z3H - - ; (Original) - ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; - ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; - ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; - ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; - ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; - ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; - ; - ; (This implementation) - ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; - ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; - ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); - ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); - ; data7 = tmp4 + z3; data5 = tmp5 + z4; - ; data3 = tmp6 + z3; data1 = tmp7 + z4; - - movq mm4,mm2 - movq mm1,mm2 - punpcklwd mm4,mm7 - punpckhwd mm1,mm7 - movq mm2,mm4 - movq mm7,mm1 - pmaddwd mm4,[GOTOFF(ebx,PW_MF060_MF089)] ; mm4=tmp4L - pmaddwd mm1,[GOTOFF(ebx,PW_MF060_MF089)] ; mm1=tmp4H - pmaddwd mm2,[GOTOFF(ebx,PW_MF089_F060)] ; mm2=tmp7L - pmaddwd mm7,[GOTOFF(ebx,PW_MF089_F060)] ; mm7=tmp7H - - paddd mm4, MMWORD [wk(0)] ; mm4=data7L - paddd mm1, MMWORD [wk(1)] ; mm1=data7H - paddd mm2,mm0 ; mm2=data1L - paddd mm7,mm6 ; mm7=data1H - - paddd mm4,[GOTOFF(ebx,PD_DESCALE_P1)] - paddd mm1,[GOTOFF(ebx,PD_DESCALE_P1)] - psrad mm4,DESCALE_P1 - psrad mm1,DESCALE_P1 - paddd mm2,[GOTOFF(ebx,PD_DESCALE_P1)] - paddd mm7,[GOTOFF(ebx,PD_DESCALE_P1)] - psrad mm2,DESCALE_P1 - psrad mm7,DESCALE_P1 - - packssdw mm4,mm1 ; mm4=data7 - packssdw mm2,mm7 ; mm2=data1 - - movq MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm4 - movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2 - - movq mm1,mm3 - movq mm7,mm3 - punpcklwd mm1,mm5 - punpckhwd mm7,mm5 - movq mm3,mm1 - movq mm5,mm7 - pmaddwd mm1,[GOTOFF(ebx,PW_MF050_MF256)] ; mm1=tmp5L - pmaddwd mm7,[GOTOFF(ebx,PW_MF050_MF256)] ; mm7=tmp5H - pmaddwd mm3,[GOTOFF(ebx,PW_MF256_F050)] ; mm3=tmp6L - pmaddwd mm5,[GOTOFF(ebx,PW_MF256_F050)] ; mm5=tmp6H - - paddd mm1,mm0 ; mm1=data5L - paddd mm7,mm6 ; mm7=data5H - paddd mm3, MMWORD [wk(0)] ; mm3=data3L - paddd mm5, MMWORD [wk(1)] ; mm5=data3H - - paddd mm1,[GOTOFF(ebx,PD_DESCALE_P1)] - paddd mm7,[GOTOFF(ebx,PD_DESCALE_P1)] - psrad mm1,DESCALE_P1 - psrad mm7,DESCALE_P1 - paddd mm3,[GOTOFF(ebx,PD_DESCALE_P1)] - paddd mm5,[GOTOFF(ebx,PD_DESCALE_P1)] - psrad mm3,DESCALE_P1 - psrad mm5,DESCALE_P1 - - packssdw mm1,mm7 ; mm1=data5 - packssdw mm3,mm5 ; mm3=data3 - - movq MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm1 - movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3 - - add edx, byte 4*DCTSIZE*SIZEOF_DCTELEM - dec ecx - jnz near .rowloop - - ; ---- Pass 2: process columns. - - mov edx, POINTER [data(eax)] ; (DCTELEM *) - mov ecx, DCTSIZE/4 - alignx 16,7 -.columnloop: - - movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)] - movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)] - movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)] - movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)] - - ; mm0=(02 12 22 32), mm2=(42 52 62 72) - ; mm1=(03 13 23 33), mm3=(43 53 63 73) - - movq mm4,mm0 ; transpose coefficients(phase 1) - punpcklwd mm0,mm1 ; mm0=(02 03 12 13) - punpckhwd mm4,mm1 ; mm4=(22 23 32 33) - movq mm5,mm2 ; transpose coefficients(phase 1) - punpcklwd mm2,mm3 ; mm2=(42 43 52 53) - punpckhwd mm5,mm3 ; mm5=(62 63 72 73) - - movq mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)] - movq mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)] - movq mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)] - movq mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)] - - ; mm6=(00 10 20 30), mm1=(40 50 60 70) - ; mm7=(01 11 21 31), mm3=(41 51 61 71) - - movq MMWORD [wk(0)], mm4 ; wk(0)=(22 23 32 33) - movq MMWORD [wk(1)], mm2 ; wk(1)=(42 43 52 53) - - movq mm4,mm6 ; transpose coefficients(phase 1) - punpcklwd mm6,mm7 ; mm6=(00 01 10 11) - punpckhwd mm4,mm7 ; mm4=(20 21 30 31) - movq mm2,mm1 ; transpose coefficients(phase 1) - punpcklwd mm1,mm3 ; mm1=(40 41 50 51) - punpckhwd mm2,mm3 ; mm2=(60 61 70 71) - - movq mm7,mm6 ; transpose coefficients(phase 2) - punpckldq mm6,mm0 ; mm6=(00 01 02 03)=data0 - punpckhdq mm7,mm0 ; mm7=(10 11 12 13)=data1 - movq mm3,mm2 ; transpose coefficients(phase 2) - punpckldq mm2,mm5 ; mm2=(60 61 62 63)=data6 - punpckhdq mm3,mm5 ; mm3=(70 71 72 73)=data7 - - movq mm0,mm7 - movq mm5,mm6 - psubw mm7,mm2 ; mm7=data1-data6=tmp6 - psubw mm6,mm3 ; mm6=data0-data7=tmp7 - paddw mm0,mm2 ; mm0=data1+data6=tmp1 - paddw mm5,mm3 ; mm5=data0+data7=tmp0 - - movq mm2, MMWORD [wk(0)] ; mm2=(22 23 32 33) - movq mm3, MMWORD [wk(1)] ; mm3=(42 43 52 53) - movq MMWORD [wk(0)], mm7 ; wk(0)=tmp6 - movq MMWORD [wk(1)], mm6 ; wk(1)=tmp7 - - movq mm7,mm4 ; transpose coefficients(phase 2) - punpckldq mm4,mm2 ; mm4=(20 21 22 23)=data2 - punpckhdq mm7,mm2 ; mm7=(30 31 32 33)=data3 - movq mm6,mm1 ; transpose coefficients(phase 2) - punpckldq mm1,mm3 ; mm1=(40 41 42 43)=data4 - punpckhdq mm6,mm3 ; mm6=(50 51 52 53)=data5 - - movq mm2,mm7 - movq mm3,mm4 - paddw mm7,mm1 ; mm7=data3+data4=tmp3 - paddw mm4,mm6 ; mm4=data2+data5=tmp2 - psubw mm2,mm1 ; mm2=data3-data4=tmp4 - psubw mm3,mm6 ; mm3=data2-data5=tmp5 - - ; -- Even part - - movq mm1,mm5 - movq mm6,mm0 - paddw mm5,mm7 ; mm5=tmp10 - paddw mm0,mm4 ; mm0=tmp11 - psubw mm1,mm7 ; mm1=tmp13 - psubw mm6,mm4 ; mm6=tmp12 - - movq mm7,mm5 - paddw mm5,mm0 ; mm5=tmp10+tmp11 - psubw mm7,mm0 ; mm7=tmp10-tmp11 - - paddw mm5,[GOTOFF(ebx,PW_DESCALE_P2X)] - paddw mm7,[GOTOFF(ebx,PW_DESCALE_P2X)] - psraw mm5,PASS1_BITS ; mm5=data0 - psraw mm7,PASS1_BITS ; mm7=data4 - - movq MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5 - movq MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm7 - - ; (Original) - ; z1 = (tmp12 + tmp13) * 0.541196100; - ; data2 = z1 + tmp13 * 0.765366865; - ; data6 = z1 + tmp12 * -1.847759065; - ; - ; (This implementation) - ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; - ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); - - movq mm4,mm1 ; mm1=tmp13 - movq mm0,mm1 - punpcklwd mm4,mm6 ; mm6=tmp12 - punpckhwd mm0,mm6 - movq mm1,mm4 - movq mm6,mm0 - pmaddwd mm4,[GOTOFF(ebx,PW_F130_F054)] ; mm4=data2L - pmaddwd mm0,[GOTOFF(ebx,PW_F130_F054)] ; mm0=data2H - pmaddwd mm1,[GOTOFF(ebx,PW_F054_MF130)] ; mm1=data6L - pmaddwd mm6,[GOTOFF(ebx,PW_F054_MF130)] ; mm6=data6H - - paddd mm4,[GOTOFF(ebx,PD_DESCALE_P2)] - paddd mm0,[GOTOFF(ebx,PD_DESCALE_P2)] - psrad mm4,DESCALE_P2 - psrad mm0,DESCALE_P2 - paddd mm1,[GOTOFF(ebx,PD_DESCALE_P2)] - paddd mm6,[GOTOFF(ebx,PD_DESCALE_P2)] - psrad mm1,DESCALE_P2 - psrad mm6,DESCALE_P2 - - packssdw mm4,mm0 ; mm4=data2 - packssdw mm1,mm6 ; mm1=data6 - - movq MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4 - movq MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm1 - - ; -- Odd part - - movq mm5, MMWORD [wk(0)] ; mm5=tmp6 - movq mm7, MMWORD [wk(1)] ; mm7=tmp7 - - movq mm0,mm2 ; mm2=tmp4 - movq mm6,mm3 ; mm3=tmp5 - paddw mm0,mm5 ; mm0=z3 - paddw mm6,mm7 ; mm6=z4 - - ; (Original) - ; z5 = (z3 + z4) * 1.175875602; - ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; - ; z3 += z5; z4 += z5; - ; - ; (This implementation) - ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; - ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); - - movq mm4,mm0 - movq mm1,mm0 - punpcklwd mm4,mm6 - punpckhwd mm1,mm6 - movq mm0,mm4 - movq mm6,mm1 - pmaddwd mm4,[GOTOFF(ebx,PW_MF078_F117)] ; mm4=z3L - pmaddwd mm1,[GOTOFF(ebx,PW_MF078_F117)] ; mm1=z3H - pmaddwd mm0,[GOTOFF(ebx,PW_F117_F078)] ; mm0=z4L - pmaddwd mm6,[GOTOFF(ebx,PW_F117_F078)] ; mm6=z4H - - movq MMWORD [wk(0)], mm4 ; wk(0)=z3L - movq MMWORD [wk(1)], mm1 ; wk(1)=z3H - - ; (Original) - ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; - ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; - ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; - ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; - ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; - ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; - ; - ; (This implementation) - ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; - ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; - ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); - ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); - ; data7 = tmp4 + z3; data5 = tmp5 + z4; - ; data3 = tmp6 + z3; data1 = tmp7 + z4; - - movq mm4,mm2 - movq mm1,mm2 - punpcklwd mm4,mm7 - punpckhwd mm1,mm7 - movq mm2,mm4 - movq mm7,mm1 - pmaddwd mm4,[GOTOFF(ebx,PW_MF060_MF089)] ; mm4=tmp4L - pmaddwd mm1,[GOTOFF(ebx,PW_MF060_MF089)] ; mm1=tmp4H - pmaddwd mm2,[GOTOFF(ebx,PW_MF089_F060)] ; mm2=tmp7L - pmaddwd mm7,[GOTOFF(ebx,PW_MF089_F060)] ; mm7=tmp7H - - paddd mm4, MMWORD [wk(0)] ; mm4=data7L - paddd mm1, MMWORD [wk(1)] ; mm1=data7H - paddd mm2,mm0 ; mm2=data1L - paddd mm7,mm6 ; mm7=data1H - - paddd mm4,[GOTOFF(ebx,PD_DESCALE_P2)] - paddd mm1,[GOTOFF(ebx,PD_DESCALE_P2)] - psrad mm4,DESCALE_P2 - psrad mm1,DESCALE_P2 - paddd mm2,[GOTOFF(ebx,PD_DESCALE_P2)] - paddd mm7,[GOTOFF(ebx,PD_DESCALE_P2)] - psrad mm2,DESCALE_P2 - psrad mm7,DESCALE_P2 - - packssdw mm4,mm1 ; mm4=data7 - packssdw mm2,mm7 ; mm2=data1 - - movq MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm4 - movq MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2 - - movq mm1,mm3 - movq mm7,mm3 - punpcklwd mm1,mm5 - punpckhwd mm7,mm5 - movq mm3,mm1 - movq mm5,mm7 - pmaddwd mm1,[GOTOFF(ebx,PW_MF050_MF256)] ; mm1=tmp5L - pmaddwd mm7,[GOTOFF(ebx,PW_MF050_MF256)] ; mm7=tmp5H - pmaddwd mm3,[GOTOFF(ebx,PW_MF256_F050)] ; mm3=tmp6L - pmaddwd mm5,[GOTOFF(ebx,PW_MF256_F050)] ; mm5=tmp6H - - paddd mm1,mm0 ; mm1=data5L - paddd mm7,mm6 ; mm7=data5H - paddd mm3, MMWORD [wk(0)] ; mm3=data3L - paddd mm5, MMWORD [wk(1)] ; mm5=data3H - - paddd mm1,[GOTOFF(ebx,PD_DESCALE_P2)] - paddd mm7,[GOTOFF(ebx,PD_DESCALE_P2)] - psrad mm1,DESCALE_P2 - psrad mm7,DESCALE_P2 - paddd mm3,[GOTOFF(ebx,PD_DESCALE_P2)] - paddd mm5,[GOTOFF(ebx,PD_DESCALE_P2)] - psrad mm3,DESCALE_P2 - psrad mm5,DESCALE_P2 - - packssdw mm1,mm7 ; mm1=data5 - packssdw mm3,mm5 ; mm3=data3 - - movq MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm1 - movq MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3 - - add edx, byte 4*SIZEOF_DCTELEM - dec ecx - jnz near .columnloop - - emms ; empty MMX state - -; pop edi ; unused -; pop esi ; unused -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - poppic ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jfdctint-sse2-64.asm b/simd/jfdctint-sse2-64.asm deleted file mode 100644 index 9a0ca0f..0000000 --- a/simd/jfdctint-sse2-64.asm +++ /dev/null @@ -1,621 +0,0 @@ -; -; jfdctint.asm - accurate integer FDCT (64-bit SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a slow-but-accurate integer implementation of the -; forward DCT (Discrete Cosine Transform). The following code is based -; directly on the IJG's original jfdctint.c; see the jfdctint.c for -; more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%define CONST_BITS 13 -%define PASS1_BITS 2 - -%define DESCALE_P1 (CONST_BITS-PASS1_BITS) -%define DESCALE_P2 (CONST_BITS+PASS1_BITS) - -%if CONST_BITS == 13 -F_0_298 equ 2446 ; FIX(0.298631336) -F_0_390 equ 3196 ; FIX(0.390180644) -F_0_541 equ 4433 ; FIX(0.541196100) -F_0_765 equ 6270 ; FIX(0.765366865) -F_0_899 equ 7373 ; FIX(0.899976223) -F_1_175 equ 9633 ; FIX(1.175875602) -F_1_501 equ 12299 ; FIX(1.501321110) -F_1_847 equ 15137 ; FIX(1.847759065) -F_1_961 equ 16069 ; FIX(1.961570560) -F_2_053 equ 16819 ; FIX(2.053119869) -F_2_562 equ 20995 ; FIX(2.562915447) -F_3_072 equ 25172 ; FIX(3.072711026) -%else -; NASM cannot do compile-time arithmetic on floating-point constants. -%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) -F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) -F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) -F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) -F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) -F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) -F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) -F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) -F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) -F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) -F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) -F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) -F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) -%endif - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_fdct_islow_sse2) - -EXTN(jconst_fdct_islow_sse2): - -PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 -PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) -PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 -PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) -PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 -PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) -PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 -PW_MF256_F050 times 4 dw -F_2_562, (F_3_072-F_2_562) -PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1-1) -PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2-1) -PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS-1) - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 64 -; -; Perform the forward DCT on one block of samples. -; -; GLOBAL(void) -; jsimd_fdct_islow_sse2 (DCTELEM *data) -; - -; r10 = DCTELEM *data - -%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 6 - - align 16 - global EXTN(jsimd_fdct_islow_sse2) - -EXTN(jsimd_fdct_islow_sse2): - push rbp - mov rax,rsp ; rax = original rbp - sub rsp, byte 4 - and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [rsp],rax - mov rbp,rsp ; rbp = aligned rbp - lea rsp, [wk(0)] - collect_args - - ; ---- Pass 1: process rows. - - mov rdx, r10 ; (DCTELEM *) - - movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)] - movdqa xmm1, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)] - movdqa xmm2, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)] - movdqa xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)] - - ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) - ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) - - movdqa xmm4,xmm0 ; transpose coefficients(phase 1) - punpcklwd xmm0,xmm1 ; xmm0=(00 10 01 11 02 12 03 13) - punpckhwd xmm4,xmm1 ; xmm4=(04 14 05 15 06 16 07 17) - movdqa xmm5,xmm2 ; transpose coefficients(phase 1) - punpcklwd xmm2,xmm3 ; xmm2=(20 30 21 31 22 32 23 33) - punpckhwd xmm5,xmm3 ; xmm5=(24 34 25 35 26 36 27 37) - - movdqa xmm6, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)] - movdqa xmm7, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)] - movdqa xmm1, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)] - movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)] - - ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) - ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) - - movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) - movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) - - movdqa xmm2,xmm6 ; transpose coefficients(phase 1) - punpcklwd xmm6,xmm7 ; xmm6=(40 50 41 51 42 52 43 53) - punpckhwd xmm2,xmm7 ; xmm2=(44 54 45 55 46 56 47 57) - movdqa xmm5,xmm1 ; transpose coefficients(phase 1) - punpcklwd xmm1,xmm3 ; xmm1=(60 70 61 71 62 72 63 73) - punpckhwd xmm5,xmm3 ; xmm5=(64 74 65 75 66 76 67 77) - - movdqa xmm7,xmm6 ; transpose coefficients(phase 2) - punpckldq xmm6,xmm1 ; xmm6=(40 50 60 70 41 51 61 71) - punpckhdq xmm7,xmm1 ; xmm7=(42 52 62 72 43 53 63 73) - movdqa xmm3,xmm2 ; transpose coefficients(phase 2) - punpckldq xmm2,xmm5 ; xmm2=(44 54 64 74 45 55 65 75) - punpckhdq xmm3,xmm5 ; xmm3=(46 56 66 76 47 57 67 77) - - movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) - movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) - movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=(42 52 62 72 43 53 63 73) - movdqa XMMWORD [wk(3)], xmm2 ; wk(3)=(44 54 64 74 45 55 65 75) - - movdqa xmm7,xmm0 ; transpose coefficients(phase 2) - punpckldq xmm0,xmm1 ; xmm0=(00 10 20 30 01 11 21 31) - punpckhdq xmm7,xmm1 ; xmm7=(02 12 22 32 03 13 23 33) - movdqa xmm2,xmm4 ; transpose coefficients(phase 2) - punpckldq xmm4,xmm5 ; xmm4=(04 14 24 34 05 15 25 35) - punpckhdq xmm2,xmm5 ; xmm2=(06 16 26 36 07 17 27 37) - - movdqa xmm1,xmm0 ; transpose coefficients(phase 3) - punpcklqdq xmm0,xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 - punpckhqdq xmm1,xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 - movdqa xmm5,xmm2 ; transpose coefficients(phase 3) - punpcklqdq xmm2,xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 - punpckhqdq xmm5,xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 - - movdqa xmm6,xmm1 - movdqa xmm3,xmm0 - psubw xmm1,xmm2 ; xmm1=data1-data6=tmp6 - psubw xmm0,xmm5 ; xmm0=data0-data7=tmp7 - paddw xmm6,xmm2 ; xmm6=data1+data6=tmp1 - paddw xmm3,xmm5 ; xmm3=data0+data7=tmp0 - - movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(42 52 62 72 43 53 63 73) - movdqa xmm5, XMMWORD [wk(3)] ; xmm5=(44 54 64 74 45 55 65 75) - movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 - movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 - - movdqa xmm1,xmm7 ; transpose coefficients(phase 3) - punpcklqdq xmm7,xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 - punpckhqdq xmm1,xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 - movdqa xmm0,xmm4 ; transpose coefficients(phase 3) - punpcklqdq xmm4,xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 - punpckhqdq xmm0,xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 - - movdqa xmm2,xmm1 - movdqa xmm5,xmm7 - paddw xmm1,xmm4 ; xmm1=data3+data4=tmp3 - paddw xmm7,xmm0 ; xmm7=data2+data5=tmp2 - psubw xmm2,xmm4 ; xmm2=data3-data4=tmp4 - psubw xmm5,xmm0 ; xmm5=data2-data5=tmp5 - - ; -- Even part - - movdqa xmm4,xmm3 - movdqa xmm0,xmm6 - paddw xmm3,xmm1 ; xmm3=tmp10 - paddw xmm6,xmm7 ; xmm6=tmp11 - psubw xmm4,xmm1 ; xmm4=tmp13 - psubw xmm0,xmm7 ; xmm0=tmp12 - - movdqa xmm1,xmm3 - paddw xmm3,xmm6 ; xmm3=tmp10+tmp11 - psubw xmm1,xmm6 ; xmm1=tmp10-tmp11 - - psllw xmm3,PASS1_BITS ; xmm3=data0 - psllw xmm1,PASS1_BITS ; xmm1=data4 - - movdqa XMMWORD [wk(2)], xmm3 ; wk(2)=data0 - movdqa XMMWORD [wk(3)], xmm1 ; wk(3)=data4 - - ; (Original) - ; z1 = (tmp12 + tmp13) * 0.541196100; - ; data2 = z1 + tmp13 * 0.765366865; - ; data6 = z1 + tmp12 * -1.847759065; - ; - ; (This implementation) - ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; - ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); - - movdqa xmm7,xmm4 ; xmm4=tmp13 - movdqa xmm6,xmm4 - punpcklwd xmm7,xmm0 ; xmm0=tmp12 - punpckhwd xmm6,xmm0 - movdqa xmm4,xmm7 - movdqa xmm0,xmm6 - pmaddwd xmm7,[rel PW_F130_F054] ; xmm7=data2L - pmaddwd xmm6,[rel PW_F130_F054] ; xmm6=data2H - pmaddwd xmm4,[rel PW_F054_MF130] ; xmm4=data6L - pmaddwd xmm0,[rel PW_F054_MF130] ; xmm0=data6H - - paddd xmm7,[rel PD_DESCALE_P1] - paddd xmm6,[rel PD_DESCALE_P1] - psrad xmm7,DESCALE_P1 - psrad xmm6,DESCALE_P1 - paddd xmm4,[rel PD_DESCALE_P1] - paddd xmm0,[rel PD_DESCALE_P1] - psrad xmm4,DESCALE_P1 - psrad xmm0,DESCALE_P1 - - packssdw xmm7,xmm6 ; xmm7=data2 - packssdw xmm4,xmm0 ; xmm4=data6 - - movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=data2 - movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=data6 - - ; -- Odd part - - movdqa xmm3, XMMWORD [wk(0)] ; xmm3=tmp6 - movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp7 - - movdqa xmm6,xmm2 ; xmm2=tmp4 - movdqa xmm0,xmm5 ; xmm5=tmp5 - paddw xmm6,xmm3 ; xmm6=z3 - paddw xmm0,xmm1 ; xmm0=z4 - - ; (Original) - ; z5 = (z3 + z4) * 1.175875602; - ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; - ; z3 += z5; z4 += z5; - ; - ; (This implementation) - ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; - ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); - - movdqa xmm7,xmm6 - movdqa xmm4,xmm6 - punpcklwd xmm7,xmm0 - punpckhwd xmm4,xmm0 - movdqa xmm6,xmm7 - movdqa xmm0,xmm4 - pmaddwd xmm7,[rel PW_MF078_F117] ; xmm7=z3L - pmaddwd xmm4,[rel PW_MF078_F117] ; xmm4=z3H - pmaddwd xmm6,[rel PW_F117_F078] ; xmm6=z4L - pmaddwd xmm0,[rel PW_F117_F078] ; xmm0=z4H - - movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=z3L - movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=z3H - - ; (Original) - ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; - ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; - ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; - ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; - ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; - ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; - ; - ; (This implementation) - ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; - ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; - ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); - ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); - ; data7 = tmp4 + z3; data5 = tmp5 + z4; - ; data3 = tmp6 + z3; data1 = tmp7 + z4; - - movdqa xmm7,xmm2 - movdqa xmm4,xmm2 - punpcklwd xmm7,xmm1 - punpckhwd xmm4,xmm1 - movdqa xmm2,xmm7 - movdqa xmm1,xmm4 - pmaddwd xmm7,[rel PW_MF060_MF089] ; xmm7=tmp4L - pmaddwd xmm4,[rel PW_MF060_MF089] ; xmm4=tmp4H - pmaddwd xmm2,[rel PW_MF089_F060] ; xmm2=tmp7L - pmaddwd xmm1,[rel PW_MF089_F060] ; xmm1=tmp7H - - paddd xmm7, XMMWORD [wk(0)] ; xmm7=data7L - paddd xmm4, XMMWORD [wk(1)] ; xmm4=data7H - paddd xmm2,xmm6 ; xmm2=data1L - paddd xmm1,xmm0 ; xmm1=data1H - - paddd xmm7,[rel PD_DESCALE_P1] - paddd xmm4,[rel PD_DESCALE_P1] - psrad xmm7,DESCALE_P1 - psrad xmm4,DESCALE_P1 - paddd xmm2,[rel PD_DESCALE_P1] - paddd xmm1,[rel PD_DESCALE_P1] - psrad xmm2,DESCALE_P1 - psrad xmm1,DESCALE_P1 - - packssdw xmm7,xmm4 ; xmm7=data7 - packssdw xmm2,xmm1 ; xmm2=data1 - - movdqa xmm4,xmm5 - movdqa xmm1,xmm5 - punpcklwd xmm4,xmm3 - punpckhwd xmm1,xmm3 - movdqa xmm5,xmm4 - movdqa xmm3,xmm1 - pmaddwd xmm4,[rel PW_MF050_MF256] ; xmm4=tmp5L - pmaddwd xmm1,[rel PW_MF050_MF256] ; xmm1=tmp5H - pmaddwd xmm5,[rel PW_MF256_F050] ; xmm5=tmp6L - pmaddwd xmm3,[rel PW_MF256_F050] ; xmm3=tmp6H - - paddd xmm4,xmm6 ; xmm4=data5L - paddd xmm1,xmm0 ; xmm1=data5H - paddd xmm5, XMMWORD [wk(0)] ; xmm5=data3L - paddd xmm3, XMMWORD [wk(1)] ; xmm3=data3H - - paddd xmm4,[rel PD_DESCALE_P1] - paddd xmm1,[rel PD_DESCALE_P1] - psrad xmm4,DESCALE_P1 - psrad xmm1,DESCALE_P1 - paddd xmm5,[rel PD_DESCALE_P1] - paddd xmm3,[rel PD_DESCALE_P1] - psrad xmm5,DESCALE_P1 - psrad xmm3,DESCALE_P1 - - packssdw xmm4,xmm1 ; xmm4=data5 - packssdw xmm5,xmm3 ; xmm5=data3 - - ; ---- Pass 2: process columns. - - movdqa xmm6, XMMWORD [wk(2)] ; xmm6=col0 - movdqa xmm0, XMMWORD [wk(4)] ; xmm0=col2 - - ; xmm6=(00 10 20 30 40 50 60 70), xmm0=(02 12 22 32 42 52 62 72) - ; xmm2=(01 11 21 31 41 51 61 71), xmm5=(03 13 23 33 43 53 63 73) - - movdqa xmm1,xmm6 ; transpose coefficients(phase 1) - punpcklwd xmm6,xmm2 ; xmm6=(00 01 10 11 20 21 30 31) - punpckhwd xmm1,xmm2 ; xmm1=(40 41 50 51 60 61 70 71) - movdqa xmm3,xmm0 ; transpose coefficients(phase 1) - punpcklwd xmm0,xmm5 ; xmm0=(02 03 12 13 22 23 32 33) - punpckhwd xmm3,xmm5 ; xmm3=(42 43 52 53 62 63 72 73) - - movdqa xmm2, XMMWORD [wk(3)] ; xmm2=col4 - movdqa xmm5, XMMWORD [wk(5)] ; xmm5=col6 - - ; xmm2=(04 14 24 34 44 54 64 74), xmm5=(06 16 26 36 46 56 66 76) - ; xmm4=(05 15 25 35 45 55 65 75), xmm7=(07 17 27 37 47 57 67 77) - - movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=(02 03 12 13 22 23 32 33) - movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(42 43 52 53 62 63 72 73) - - movdqa xmm0,xmm2 ; transpose coefficients(phase 1) - punpcklwd xmm2,xmm4 ; xmm2=(04 05 14 15 24 25 34 35) - punpckhwd xmm0,xmm4 ; xmm0=(44 45 54 55 64 65 74 75) - movdqa xmm3,xmm5 ; transpose coefficients(phase 1) - punpcklwd xmm5,xmm7 ; xmm5=(06 07 16 17 26 27 36 37) - punpckhwd xmm3,xmm7 ; xmm3=(46 47 56 57 66 67 76 77) - - movdqa xmm4,xmm2 ; transpose coefficients(phase 2) - punpckldq xmm2,xmm5 ; xmm2=(04 05 06 07 14 15 16 17) - punpckhdq xmm4,xmm5 ; xmm4=(24 25 26 27 34 35 36 37) - movdqa xmm7,xmm0 ; transpose coefficients(phase 2) - punpckldq xmm0,xmm3 ; xmm0=(44 45 46 47 54 55 56 57) - punpckhdq xmm7,xmm3 ; xmm7=(64 65 66 67 74 75 76 77) - - movdqa xmm5, XMMWORD [wk(0)] ; xmm5=(02 03 12 13 22 23 32 33) - movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53 62 63 72 73) - movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=(24 25 26 27 34 35 36 37) - movdqa XMMWORD [wk(3)], xmm0 ; wk(3)=(44 45 46 47 54 55 56 57) - - movdqa xmm4,xmm6 ; transpose coefficients(phase 2) - punpckldq xmm6,xmm5 ; xmm6=(00 01 02 03 10 11 12 13) - punpckhdq xmm4,xmm5 ; xmm4=(20 21 22 23 30 31 32 33) - movdqa xmm0,xmm1 ; transpose coefficients(phase 2) - punpckldq xmm1,xmm3 ; xmm1=(40 41 42 43 50 51 52 53) - punpckhdq xmm0,xmm3 ; xmm0=(60 61 62 63 70 71 72 73) - - movdqa xmm5,xmm6 ; transpose coefficients(phase 3) - punpcklqdq xmm6,xmm2 ; xmm6=(00 01 02 03 04 05 06 07)=data0 - punpckhqdq xmm5,xmm2 ; xmm5=(10 11 12 13 14 15 16 17)=data1 - movdqa xmm3,xmm0 ; transpose coefficients(phase 3) - punpcklqdq xmm0,xmm7 ; xmm0=(60 61 62 63 64 65 66 67)=data6 - punpckhqdq xmm3,xmm7 ; xmm3=(70 71 72 73 74 75 76 77)=data7 - - movdqa xmm2,xmm5 - movdqa xmm7,xmm6 - psubw xmm5,xmm0 ; xmm5=data1-data6=tmp6 - psubw xmm6,xmm3 ; xmm6=data0-data7=tmp7 - paddw xmm2,xmm0 ; xmm2=data1+data6=tmp1 - paddw xmm7,xmm3 ; xmm7=data0+data7=tmp0 - - movdqa xmm0, XMMWORD [wk(2)] ; xmm0=(24 25 26 27 34 35 36 37) - movdqa xmm3, XMMWORD [wk(3)] ; xmm3=(44 45 46 47 54 55 56 57) - movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=tmp6 - movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 - - movdqa xmm5,xmm4 ; transpose coefficients(phase 3) - punpcklqdq xmm4,xmm0 ; xmm4=(20 21 22 23 24 25 26 27)=data2 - punpckhqdq xmm5,xmm0 ; xmm5=(30 31 32 33 34 35 36 37)=data3 - movdqa xmm6,xmm1 ; transpose coefficients(phase 3) - punpcklqdq xmm1,xmm3 ; xmm1=(40 41 42 43 44 45 46 47)=data4 - punpckhqdq xmm6,xmm3 ; xmm6=(50 51 52 53 54 55 56 57)=data5 - - movdqa xmm0,xmm5 - movdqa xmm3,xmm4 - paddw xmm5,xmm1 ; xmm5=data3+data4=tmp3 - paddw xmm4,xmm6 ; xmm4=data2+data5=tmp2 - psubw xmm0,xmm1 ; xmm0=data3-data4=tmp4 - psubw xmm3,xmm6 ; xmm3=data2-data5=tmp5 - - ; -- Even part - - movdqa xmm1,xmm7 - movdqa xmm6,xmm2 - paddw xmm7,xmm5 ; xmm7=tmp10 - paddw xmm2,xmm4 ; xmm2=tmp11 - psubw xmm1,xmm5 ; xmm1=tmp13 - psubw xmm6,xmm4 ; xmm6=tmp12 - - movdqa xmm5,xmm7 - paddw xmm7,xmm2 ; xmm7=tmp10+tmp11 - psubw xmm5,xmm2 ; xmm5=tmp10-tmp11 - - paddw xmm7,[rel PW_DESCALE_P2X] - paddw xmm5,[rel PW_DESCALE_P2X] - psraw xmm7,PASS1_BITS ; xmm7=data0 - psraw xmm5,PASS1_BITS ; xmm5=data4 - - movdqa XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)], xmm7 - movdqa XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)], xmm5 - - ; (Original) - ; z1 = (tmp12 + tmp13) * 0.541196100; - ; data2 = z1 + tmp13 * 0.765366865; - ; data6 = z1 + tmp12 * -1.847759065; - ; - ; (This implementation) - ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; - ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); - - movdqa xmm4,xmm1 ; xmm1=tmp13 - movdqa xmm2,xmm1 - punpcklwd xmm4,xmm6 ; xmm6=tmp12 - punpckhwd xmm2,xmm6 - movdqa xmm1,xmm4 - movdqa xmm6,xmm2 - pmaddwd xmm4,[rel PW_F130_F054] ; xmm4=data2L - pmaddwd xmm2,[rel PW_F130_F054] ; xmm2=data2H - pmaddwd xmm1,[rel PW_F054_MF130] ; xmm1=data6L - pmaddwd xmm6,[rel PW_F054_MF130] ; xmm6=data6H - - paddd xmm4,[rel PD_DESCALE_P2] - paddd xmm2,[rel PD_DESCALE_P2] - psrad xmm4,DESCALE_P2 - psrad xmm2,DESCALE_P2 - paddd xmm1,[rel PD_DESCALE_P2] - paddd xmm6,[rel PD_DESCALE_P2] - psrad xmm1,DESCALE_P2 - psrad xmm6,DESCALE_P2 - - packssdw xmm4,xmm2 ; xmm4=data2 - packssdw xmm1,xmm6 ; xmm1=data6 - - movdqa XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)], xmm4 - movdqa XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)], xmm1 - - ; -- Odd part - - movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp6 - movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 - - movdqa xmm2,xmm0 ; xmm0=tmp4 - movdqa xmm6,xmm3 ; xmm3=tmp5 - paddw xmm2,xmm7 ; xmm2=z3 - paddw xmm6,xmm5 ; xmm6=z4 - - ; (Original) - ; z5 = (z3 + z4) * 1.175875602; - ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; - ; z3 += z5; z4 += z5; - ; - ; (This implementation) - ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; - ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); - - movdqa xmm4,xmm2 - movdqa xmm1,xmm2 - punpcklwd xmm4,xmm6 - punpckhwd xmm1,xmm6 - movdqa xmm2,xmm4 - movdqa xmm6,xmm1 - pmaddwd xmm4,[rel PW_MF078_F117] ; xmm4=z3L - pmaddwd xmm1,[rel PW_MF078_F117] ; xmm1=z3H - pmaddwd xmm2,[rel PW_F117_F078] ; xmm2=z4L - pmaddwd xmm6,[rel PW_F117_F078] ; xmm6=z4H - - movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=z3L - movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=z3H - - ; (Original) - ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; - ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; - ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; - ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; - ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; - ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; - ; - ; (This implementation) - ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; - ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; - ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); - ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); - ; data7 = tmp4 + z3; data5 = tmp5 + z4; - ; data3 = tmp6 + z3; data1 = tmp7 + z4; - - movdqa xmm4,xmm0 - movdqa xmm1,xmm0 - punpcklwd xmm4,xmm5 - punpckhwd xmm1,xmm5 - movdqa xmm0,xmm4 - movdqa xmm5,xmm1 - pmaddwd xmm4,[rel PW_MF060_MF089] ; xmm4=tmp4L - pmaddwd xmm1,[rel PW_MF060_MF089] ; xmm1=tmp4H - pmaddwd xmm0,[rel PW_MF089_F060] ; xmm0=tmp7L - pmaddwd xmm5,[rel PW_MF089_F060] ; xmm5=tmp7H - - paddd xmm4, XMMWORD [wk(0)] ; xmm4=data7L - paddd xmm1, XMMWORD [wk(1)] ; xmm1=data7H - paddd xmm0,xmm2 ; xmm0=data1L - paddd xmm5,xmm6 ; xmm5=data1H - - paddd xmm4,[rel PD_DESCALE_P2] - paddd xmm1,[rel PD_DESCALE_P2] - psrad xmm4,DESCALE_P2 - psrad xmm1,DESCALE_P2 - paddd xmm0,[rel PD_DESCALE_P2] - paddd xmm5,[rel PD_DESCALE_P2] - psrad xmm0,DESCALE_P2 - psrad xmm5,DESCALE_P2 - - packssdw xmm4,xmm1 ; xmm4=data7 - packssdw xmm0,xmm5 ; xmm0=data1 - - movdqa XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)], xmm4 - movdqa XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm0 - - movdqa xmm1,xmm3 - movdqa xmm5,xmm3 - punpcklwd xmm1,xmm7 - punpckhwd xmm5,xmm7 - movdqa xmm3,xmm1 - movdqa xmm7,xmm5 - pmaddwd xmm1,[rel PW_MF050_MF256] ; xmm1=tmp5L - pmaddwd xmm5,[rel PW_MF050_MF256] ; xmm5=tmp5H - pmaddwd xmm3,[rel PW_MF256_F050] ; xmm3=tmp6L - pmaddwd xmm7,[rel PW_MF256_F050] ; xmm7=tmp6H - - paddd xmm1,xmm2 ; xmm1=data5L - paddd xmm5,xmm6 ; xmm5=data5H - paddd xmm3, XMMWORD [wk(0)] ; xmm3=data3L - paddd xmm7, XMMWORD [wk(1)] ; xmm7=data3H - - paddd xmm1,[rel PD_DESCALE_P2] - paddd xmm5,[rel PD_DESCALE_P2] - psrad xmm1,DESCALE_P2 - psrad xmm5,DESCALE_P2 - paddd xmm3,[rel PD_DESCALE_P2] - paddd xmm7,[rel PD_DESCALE_P2] - psrad xmm3,DESCALE_P2 - psrad xmm7,DESCALE_P2 - - packssdw xmm1,xmm5 ; xmm1=data5 - packssdw xmm3,xmm7 ; xmm3=data3 - - movdqa XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm1 - movdqa XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm3 - - uncollect_args - mov rsp,rbp ; rsp <- aligned rbp - pop rsp ; rsp <- original rbp - pop rbp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jfdctint-sse2.asm b/simd/jfdctint-sse2.asm deleted file mode 100644 index db9d0bb..0000000 --- a/simd/jfdctint-sse2.asm +++ /dev/null @@ -1,633 +0,0 @@ -; -; jfdctint.asm - accurate integer FDCT (SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a slow-but-accurate integer implementation of the -; forward DCT (Discrete Cosine Transform). The following code is based -; directly on the IJG's original jfdctint.c; see the jfdctint.c for -; more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%define CONST_BITS 13 -%define PASS1_BITS 2 - -%define DESCALE_P1 (CONST_BITS-PASS1_BITS) -%define DESCALE_P2 (CONST_BITS+PASS1_BITS) - -%if CONST_BITS == 13 -F_0_298 equ 2446 ; FIX(0.298631336) -F_0_390 equ 3196 ; FIX(0.390180644) -F_0_541 equ 4433 ; FIX(0.541196100) -F_0_765 equ 6270 ; FIX(0.765366865) -F_0_899 equ 7373 ; FIX(0.899976223) -F_1_175 equ 9633 ; FIX(1.175875602) -F_1_501 equ 12299 ; FIX(1.501321110) -F_1_847 equ 15137 ; FIX(1.847759065) -F_1_961 equ 16069 ; FIX(1.961570560) -F_2_053 equ 16819 ; FIX(2.053119869) -F_2_562 equ 20995 ; FIX(2.562915447) -F_3_072 equ 25172 ; FIX(3.072711026) -%else -; NASM cannot do compile-time arithmetic on floating-point constants. -%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) -F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) -F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) -F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) -F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) -F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) -F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) -F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) -F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) -F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) -F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) -F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) -F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) -%endif - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_fdct_islow_sse2) - -EXTN(jconst_fdct_islow_sse2): - -PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 -PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) -PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 -PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) -PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 -PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) -PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 -PW_MF256_F050 times 4 dw -F_2_562, (F_3_072-F_2_562) -PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1-1) -PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2-1) -PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS-1) - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Perform the forward DCT on one block of samples. -; -; GLOBAL(void) -; jsimd_fdct_islow_sse2 (DCTELEM *data) -; - -%define data(b) (b)+8 ; DCTELEM *data - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 6 - - align 16 - global EXTN(jsimd_fdct_islow_sse2) - -EXTN(jsimd_fdct_islow_sse2): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic ebx -; push ecx ; unused -; push edx ; need not be preserved -; push esi ; unused -; push edi ; unused - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process rows. - - mov edx, POINTER [data(eax)] ; (DCTELEM *) - - movdqa xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)] - movdqa xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)] - movdqa xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)] - movdqa xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)] - - ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) - ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) - - movdqa xmm4,xmm0 ; transpose coefficients(phase 1) - punpcklwd xmm0,xmm1 ; xmm0=(00 10 01 11 02 12 03 13) - punpckhwd xmm4,xmm1 ; xmm4=(04 14 05 15 06 16 07 17) - movdqa xmm5,xmm2 ; transpose coefficients(phase 1) - punpcklwd xmm2,xmm3 ; xmm2=(20 30 21 31 22 32 23 33) - punpckhwd xmm5,xmm3 ; xmm5=(24 34 25 35 26 36 27 37) - - movdqa xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)] - movdqa xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)] - movdqa xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)] - movdqa xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)] - - ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) - ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) - - movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) - movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) - - movdqa xmm2,xmm6 ; transpose coefficients(phase 1) - punpcklwd xmm6,xmm7 ; xmm6=(40 50 41 51 42 52 43 53) - punpckhwd xmm2,xmm7 ; xmm2=(44 54 45 55 46 56 47 57) - movdqa xmm5,xmm1 ; transpose coefficients(phase 1) - punpcklwd xmm1,xmm3 ; xmm1=(60 70 61 71 62 72 63 73) - punpckhwd xmm5,xmm3 ; xmm5=(64 74 65 75 66 76 67 77) - - movdqa xmm7,xmm6 ; transpose coefficients(phase 2) - punpckldq xmm6,xmm1 ; xmm6=(40 50 60 70 41 51 61 71) - punpckhdq xmm7,xmm1 ; xmm7=(42 52 62 72 43 53 63 73) - movdqa xmm3,xmm2 ; transpose coefficients(phase 2) - punpckldq xmm2,xmm5 ; xmm2=(44 54 64 74 45 55 65 75) - punpckhdq xmm3,xmm5 ; xmm3=(46 56 66 76 47 57 67 77) - - movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) - movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) - movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=(42 52 62 72 43 53 63 73) - movdqa XMMWORD [wk(3)], xmm2 ; wk(3)=(44 54 64 74 45 55 65 75) - - movdqa xmm7,xmm0 ; transpose coefficients(phase 2) - punpckldq xmm0,xmm1 ; xmm0=(00 10 20 30 01 11 21 31) - punpckhdq xmm7,xmm1 ; xmm7=(02 12 22 32 03 13 23 33) - movdqa xmm2,xmm4 ; transpose coefficients(phase 2) - punpckldq xmm4,xmm5 ; xmm4=(04 14 24 34 05 15 25 35) - punpckhdq xmm2,xmm5 ; xmm2=(06 16 26 36 07 17 27 37) - - movdqa xmm1,xmm0 ; transpose coefficients(phase 3) - punpcklqdq xmm0,xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 - punpckhqdq xmm1,xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 - movdqa xmm5,xmm2 ; transpose coefficients(phase 3) - punpcklqdq xmm2,xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 - punpckhqdq xmm5,xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 - - movdqa xmm6,xmm1 - movdqa xmm3,xmm0 - psubw xmm1,xmm2 ; xmm1=data1-data6=tmp6 - psubw xmm0,xmm5 ; xmm0=data0-data7=tmp7 - paddw xmm6,xmm2 ; xmm6=data1+data6=tmp1 - paddw xmm3,xmm5 ; xmm3=data0+data7=tmp0 - - movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(42 52 62 72 43 53 63 73) - movdqa xmm5, XMMWORD [wk(3)] ; xmm5=(44 54 64 74 45 55 65 75) - movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 - movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 - - movdqa xmm1,xmm7 ; transpose coefficients(phase 3) - punpcklqdq xmm7,xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 - punpckhqdq xmm1,xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 - movdqa xmm0,xmm4 ; transpose coefficients(phase 3) - punpcklqdq xmm4,xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 - punpckhqdq xmm0,xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 - - movdqa xmm2,xmm1 - movdqa xmm5,xmm7 - paddw xmm1,xmm4 ; xmm1=data3+data4=tmp3 - paddw xmm7,xmm0 ; xmm7=data2+data5=tmp2 - psubw xmm2,xmm4 ; xmm2=data3-data4=tmp4 - psubw xmm5,xmm0 ; xmm5=data2-data5=tmp5 - - ; -- Even part - - movdqa xmm4,xmm3 - movdqa xmm0,xmm6 - paddw xmm3,xmm1 ; xmm3=tmp10 - paddw xmm6,xmm7 ; xmm6=tmp11 - psubw xmm4,xmm1 ; xmm4=tmp13 - psubw xmm0,xmm7 ; xmm0=tmp12 - - movdqa xmm1,xmm3 - paddw xmm3,xmm6 ; xmm3=tmp10+tmp11 - psubw xmm1,xmm6 ; xmm1=tmp10-tmp11 - - psllw xmm3,PASS1_BITS ; xmm3=data0 - psllw xmm1,PASS1_BITS ; xmm1=data4 - - movdqa XMMWORD [wk(2)], xmm3 ; wk(2)=data0 - movdqa XMMWORD [wk(3)], xmm1 ; wk(3)=data4 - - ; (Original) - ; z1 = (tmp12 + tmp13) * 0.541196100; - ; data2 = z1 + tmp13 * 0.765366865; - ; data6 = z1 + tmp12 * -1.847759065; - ; - ; (This implementation) - ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; - ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); - - movdqa xmm7,xmm4 ; xmm4=tmp13 - movdqa xmm6,xmm4 - punpcklwd xmm7,xmm0 ; xmm0=tmp12 - punpckhwd xmm6,xmm0 - movdqa xmm4,xmm7 - movdqa xmm0,xmm6 - pmaddwd xmm7,[GOTOFF(ebx,PW_F130_F054)] ; xmm7=data2L - pmaddwd xmm6,[GOTOFF(ebx,PW_F130_F054)] ; xmm6=data2H - pmaddwd xmm4,[GOTOFF(ebx,PW_F054_MF130)] ; xmm4=data6L - pmaddwd xmm0,[GOTOFF(ebx,PW_F054_MF130)] ; xmm0=data6H - - paddd xmm7,[GOTOFF(ebx,PD_DESCALE_P1)] - paddd xmm6,[GOTOFF(ebx,PD_DESCALE_P1)] - psrad xmm7,DESCALE_P1 - psrad xmm6,DESCALE_P1 - paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P1)] - paddd xmm0,[GOTOFF(ebx,PD_DESCALE_P1)] - psrad xmm4,DESCALE_P1 - psrad xmm0,DESCALE_P1 - - packssdw xmm7,xmm6 ; xmm7=data2 - packssdw xmm4,xmm0 ; xmm4=data6 - - movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=data2 - movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=data6 - - ; -- Odd part - - movdqa xmm3, XMMWORD [wk(0)] ; xmm3=tmp6 - movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp7 - - movdqa xmm6,xmm2 ; xmm2=tmp4 - movdqa xmm0,xmm5 ; xmm5=tmp5 - paddw xmm6,xmm3 ; xmm6=z3 - paddw xmm0,xmm1 ; xmm0=z4 - - ; (Original) - ; z5 = (z3 + z4) * 1.175875602; - ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; - ; z3 += z5; z4 += z5; - ; - ; (This implementation) - ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; - ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); - - movdqa xmm7,xmm6 - movdqa xmm4,xmm6 - punpcklwd xmm7,xmm0 - punpckhwd xmm4,xmm0 - movdqa xmm6,xmm7 - movdqa xmm0,xmm4 - pmaddwd xmm7,[GOTOFF(ebx,PW_MF078_F117)] ; xmm7=z3L - pmaddwd xmm4,[GOTOFF(ebx,PW_MF078_F117)] ; xmm4=z3H - pmaddwd xmm6,[GOTOFF(ebx,PW_F117_F078)] ; xmm6=z4L - pmaddwd xmm0,[GOTOFF(ebx,PW_F117_F078)] ; xmm0=z4H - - movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=z3L - movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=z3H - - ; (Original) - ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; - ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; - ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; - ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; - ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; - ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; - ; - ; (This implementation) - ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; - ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; - ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); - ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); - ; data7 = tmp4 + z3; data5 = tmp5 + z4; - ; data3 = tmp6 + z3; data1 = tmp7 + z4; - - movdqa xmm7,xmm2 - movdqa xmm4,xmm2 - punpcklwd xmm7,xmm1 - punpckhwd xmm4,xmm1 - movdqa xmm2,xmm7 - movdqa xmm1,xmm4 - pmaddwd xmm7,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm7=tmp4L - pmaddwd xmm4,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm4=tmp4H - pmaddwd xmm2,[GOTOFF(ebx,PW_MF089_F060)] ; xmm2=tmp7L - pmaddwd xmm1,[GOTOFF(ebx,PW_MF089_F060)] ; xmm1=tmp7H - - paddd xmm7, XMMWORD [wk(0)] ; xmm7=data7L - paddd xmm4, XMMWORD [wk(1)] ; xmm4=data7H - paddd xmm2,xmm6 ; xmm2=data1L - paddd xmm1,xmm0 ; xmm1=data1H - - paddd xmm7,[GOTOFF(ebx,PD_DESCALE_P1)] - paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P1)] - psrad xmm7,DESCALE_P1 - psrad xmm4,DESCALE_P1 - paddd xmm2,[GOTOFF(ebx,PD_DESCALE_P1)] - paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P1)] - psrad xmm2,DESCALE_P1 - psrad xmm1,DESCALE_P1 - - packssdw xmm7,xmm4 ; xmm7=data7 - packssdw xmm2,xmm1 ; xmm2=data1 - - movdqa xmm4,xmm5 - movdqa xmm1,xmm5 - punpcklwd xmm4,xmm3 - punpckhwd xmm1,xmm3 - movdqa xmm5,xmm4 - movdqa xmm3,xmm1 - pmaddwd xmm4,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm4=tmp5L - pmaddwd xmm1,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm1=tmp5H - pmaddwd xmm5,[GOTOFF(ebx,PW_MF256_F050)] ; xmm5=tmp6L - pmaddwd xmm3,[GOTOFF(ebx,PW_MF256_F050)] ; xmm3=tmp6H - - paddd xmm4,xmm6 ; xmm4=data5L - paddd xmm1,xmm0 ; xmm1=data5H - paddd xmm5, XMMWORD [wk(0)] ; xmm5=data3L - paddd xmm3, XMMWORD [wk(1)] ; xmm3=data3H - - paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P1)] - paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P1)] - psrad xmm4,DESCALE_P1 - psrad xmm1,DESCALE_P1 - paddd xmm5,[GOTOFF(ebx,PD_DESCALE_P1)] - paddd xmm3,[GOTOFF(ebx,PD_DESCALE_P1)] - psrad xmm5,DESCALE_P1 - psrad xmm3,DESCALE_P1 - - packssdw xmm4,xmm1 ; xmm4=data5 - packssdw xmm5,xmm3 ; xmm5=data3 - - ; ---- Pass 2: process columns. - -; mov edx, POINTER [data(eax)] ; (DCTELEM *) - - movdqa xmm6, XMMWORD [wk(2)] ; xmm6=col0 - movdqa xmm0, XMMWORD [wk(4)] ; xmm0=col2 - - ; xmm6=(00 10 20 30 40 50 60 70), xmm0=(02 12 22 32 42 52 62 72) - ; xmm2=(01 11 21 31 41 51 61 71), xmm5=(03 13 23 33 43 53 63 73) - - movdqa xmm1,xmm6 ; transpose coefficients(phase 1) - punpcklwd xmm6,xmm2 ; xmm6=(00 01 10 11 20 21 30 31) - punpckhwd xmm1,xmm2 ; xmm1=(40 41 50 51 60 61 70 71) - movdqa xmm3,xmm0 ; transpose coefficients(phase 1) - punpcklwd xmm0,xmm5 ; xmm0=(02 03 12 13 22 23 32 33) - punpckhwd xmm3,xmm5 ; xmm3=(42 43 52 53 62 63 72 73) - - movdqa xmm2, XMMWORD [wk(3)] ; xmm2=col4 - movdqa xmm5, XMMWORD [wk(5)] ; xmm5=col6 - - ; xmm2=(04 14 24 34 44 54 64 74), xmm5=(06 16 26 36 46 56 66 76) - ; xmm4=(05 15 25 35 45 55 65 75), xmm7=(07 17 27 37 47 57 67 77) - - movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=(02 03 12 13 22 23 32 33) - movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(42 43 52 53 62 63 72 73) - - movdqa xmm0,xmm2 ; transpose coefficients(phase 1) - punpcklwd xmm2,xmm4 ; xmm2=(04 05 14 15 24 25 34 35) - punpckhwd xmm0,xmm4 ; xmm0=(44 45 54 55 64 65 74 75) - movdqa xmm3,xmm5 ; transpose coefficients(phase 1) - punpcklwd xmm5,xmm7 ; xmm5=(06 07 16 17 26 27 36 37) - punpckhwd xmm3,xmm7 ; xmm3=(46 47 56 57 66 67 76 77) - - movdqa xmm4,xmm2 ; transpose coefficients(phase 2) - punpckldq xmm2,xmm5 ; xmm2=(04 05 06 07 14 15 16 17) - punpckhdq xmm4,xmm5 ; xmm4=(24 25 26 27 34 35 36 37) - movdqa xmm7,xmm0 ; transpose coefficients(phase 2) - punpckldq xmm0,xmm3 ; xmm0=(44 45 46 47 54 55 56 57) - punpckhdq xmm7,xmm3 ; xmm7=(64 65 66 67 74 75 76 77) - - movdqa xmm5, XMMWORD [wk(0)] ; xmm5=(02 03 12 13 22 23 32 33) - movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53 62 63 72 73) - movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=(24 25 26 27 34 35 36 37) - movdqa XMMWORD [wk(3)], xmm0 ; wk(3)=(44 45 46 47 54 55 56 57) - - movdqa xmm4,xmm6 ; transpose coefficients(phase 2) - punpckldq xmm6,xmm5 ; xmm6=(00 01 02 03 10 11 12 13) - punpckhdq xmm4,xmm5 ; xmm4=(20 21 22 23 30 31 32 33) - movdqa xmm0,xmm1 ; transpose coefficients(phase 2) - punpckldq xmm1,xmm3 ; xmm1=(40 41 42 43 50 51 52 53) - punpckhdq xmm0,xmm3 ; xmm0=(60 61 62 63 70 71 72 73) - - movdqa xmm5,xmm6 ; transpose coefficients(phase 3) - punpcklqdq xmm6,xmm2 ; xmm6=(00 01 02 03 04 05 06 07)=data0 - punpckhqdq xmm5,xmm2 ; xmm5=(10 11 12 13 14 15 16 17)=data1 - movdqa xmm3,xmm0 ; transpose coefficients(phase 3) - punpcklqdq xmm0,xmm7 ; xmm0=(60 61 62 63 64 65 66 67)=data6 - punpckhqdq xmm3,xmm7 ; xmm3=(70 71 72 73 74 75 76 77)=data7 - - movdqa xmm2,xmm5 - movdqa xmm7,xmm6 - psubw xmm5,xmm0 ; xmm5=data1-data6=tmp6 - psubw xmm6,xmm3 ; xmm6=data0-data7=tmp7 - paddw xmm2,xmm0 ; xmm2=data1+data6=tmp1 - paddw xmm7,xmm3 ; xmm7=data0+data7=tmp0 - - movdqa xmm0, XMMWORD [wk(2)] ; xmm0=(24 25 26 27 34 35 36 37) - movdqa xmm3, XMMWORD [wk(3)] ; xmm3=(44 45 46 47 54 55 56 57) - movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=tmp6 - movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 - - movdqa xmm5,xmm4 ; transpose coefficients(phase 3) - punpcklqdq xmm4,xmm0 ; xmm4=(20 21 22 23 24 25 26 27)=data2 - punpckhqdq xmm5,xmm0 ; xmm5=(30 31 32 33 34 35 36 37)=data3 - movdqa xmm6,xmm1 ; transpose coefficients(phase 3) - punpcklqdq xmm1,xmm3 ; xmm1=(40 41 42 43 44 45 46 47)=data4 - punpckhqdq xmm6,xmm3 ; xmm6=(50 51 52 53 54 55 56 57)=data5 - - movdqa xmm0,xmm5 - movdqa xmm3,xmm4 - paddw xmm5,xmm1 ; xmm5=data3+data4=tmp3 - paddw xmm4,xmm6 ; xmm4=data2+data5=tmp2 - psubw xmm0,xmm1 ; xmm0=data3-data4=tmp4 - psubw xmm3,xmm6 ; xmm3=data2-data5=tmp5 - - ; -- Even part - - movdqa xmm1,xmm7 - movdqa xmm6,xmm2 - paddw xmm7,xmm5 ; xmm7=tmp10 - paddw xmm2,xmm4 ; xmm2=tmp11 - psubw xmm1,xmm5 ; xmm1=tmp13 - psubw xmm6,xmm4 ; xmm6=tmp12 - - movdqa xmm5,xmm7 - paddw xmm7,xmm2 ; xmm7=tmp10+tmp11 - psubw xmm5,xmm2 ; xmm5=tmp10-tmp11 - - paddw xmm7,[GOTOFF(ebx,PW_DESCALE_P2X)] - paddw xmm5,[GOTOFF(ebx,PW_DESCALE_P2X)] - psraw xmm7,PASS1_BITS ; xmm7=data0 - psraw xmm5,PASS1_BITS ; xmm5=data4 - - movdqa XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm7 - movdqa XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm5 - - ; (Original) - ; z1 = (tmp12 + tmp13) * 0.541196100; - ; data2 = z1 + tmp13 * 0.765366865; - ; data6 = z1 + tmp12 * -1.847759065; - ; - ; (This implementation) - ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; - ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); - - movdqa xmm4,xmm1 ; xmm1=tmp13 - movdqa xmm2,xmm1 - punpcklwd xmm4,xmm6 ; xmm6=tmp12 - punpckhwd xmm2,xmm6 - movdqa xmm1,xmm4 - movdqa xmm6,xmm2 - pmaddwd xmm4,[GOTOFF(ebx,PW_F130_F054)] ; xmm4=data2L - pmaddwd xmm2,[GOTOFF(ebx,PW_F130_F054)] ; xmm2=data2H - pmaddwd xmm1,[GOTOFF(ebx,PW_F054_MF130)] ; xmm1=data6L - pmaddwd xmm6,[GOTOFF(ebx,PW_F054_MF130)] ; xmm6=data6H - - paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P2)] - paddd xmm2,[GOTOFF(ebx,PD_DESCALE_P2)] - psrad xmm4,DESCALE_P2 - psrad xmm2,DESCALE_P2 - paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P2)] - paddd xmm6,[GOTOFF(ebx,PD_DESCALE_P2)] - psrad xmm1,DESCALE_P2 - psrad xmm6,DESCALE_P2 - - packssdw xmm4,xmm2 ; xmm4=data2 - packssdw xmm1,xmm6 ; xmm1=data6 - - movdqa XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm4 - movdqa XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm1 - - ; -- Odd part - - movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp6 - movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 - - movdqa xmm2,xmm0 ; xmm0=tmp4 - movdqa xmm6,xmm3 ; xmm3=tmp5 - paddw xmm2,xmm7 ; xmm2=z3 - paddw xmm6,xmm5 ; xmm6=z4 - - ; (Original) - ; z5 = (z3 + z4) * 1.175875602; - ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; - ; z3 += z5; z4 += z5; - ; - ; (This implementation) - ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; - ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); - - movdqa xmm4,xmm2 - movdqa xmm1,xmm2 - punpcklwd xmm4,xmm6 - punpckhwd xmm1,xmm6 - movdqa xmm2,xmm4 - movdqa xmm6,xmm1 - pmaddwd xmm4,[GOTOFF(ebx,PW_MF078_F117)] ; xmm4=z3L - pmaddwd xmm1,[GOTOFF(ebx,PW_MF078_F117)] ; xmm1=z3H - pmaddwd xmm2,[GOTOFF(ebx,PW_F117_F078)] ; xmm2=z4L - pmaddwd xmm6,[GOTOFF(ebx,PW_F117_F078)] ; xmm6=z4H - - movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=z3L - movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=z3H - - ; (Original) - ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; - ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; - ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; - ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; - ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; - ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; - ; - ; (This implementation) - ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; - ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; - ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); - ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); - ; data7 = tmp4 + z3; data5 = tmp5 + z4; - ; data3 = tmp6 + z3; data1 = tmp7 + z4; - - movdqa xmm4,xmm0 - movdqa xmm1,xmm0 - punpcklwd xmm4,xmm5 - punpckhwd xmm1,xmm5 - movdqa xmm0,xmm4 - movdqa xmm5,xmm1 - pmaddwd xmm4,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm4=tmp4L - pmaddwd xmm1,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm1=tmp4H - pmaddwd xmm0,[GOTOFF(ebx,PW_MF089_F060)] ; xmm0=tmp7L - pmaddwd xmm5,[GOTOFF(ebx,PW_MF089_F060)] ; xmm5=tmp7H - - paddd xmm4, XMMWORD [wk(0)] ; xmm4=data7L - paddd xmm1, XMMWORD [wk(1)] ; xmm1=data7H - paddd xmm0,xmm2 ; xmm0=data1L - paddd xmm5,xmm6 ; xmm5=data1H - - paddd xmm4,[GOTOFF(ebx,PD_DESCALE_P2)] - paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P2)] - psrad xmm4,DESCALE_P2 - psrad xmm1,DESCALE_P2 - paddd xmm0,[GOTOFF(ebx,PD_DESCALE_P2)] - paddd xmm5,[GOTOFF(ebx,PD_DESCALE_P2)] - psrad xmm0,DESCALE_P2 - psrad xmm5,DESCALE_P2 - - packssdw xmm4,xmm1 ; xmm4=data7 - packssdw xmm0,xmm5 ; xmm0=data1 - - movdqa XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm4 - movdqa XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm0 - - movdqa xmm1,xmm3 - movdqa xmm5,xmm3 - punpcklwd xmm1,xmm7 - punpckhwd xmm5,xmm7 - movdqa xmm3,xmm1 - movdqa xmm7,xmm5 - pmaddwd xmm1,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm1=tmp5L - pmaddwd xmm5,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm5=tmp5H - pmaddwd xmm3,[GOTOFF(ebx,PW_MF256_F050)] ; xmm3=tmp6L - pmaddwd xmm7,[GOTOFF(ebx,PW_MF256_F050)] ; xmm7=tmp6H - - paddd xmm1,xmm2 ; xmm1=data5L - paddd xmm5,xmm6 ; xmm5=data5H - paddd xmm3, XMMWORD [wk(0)] ; xmm3=data3L - paddd xmm7, XMMWORD [wk(1)] ; xmm7=data3H - - paddd xmm1,[GOTOFF(ebx,PD_DESCALE_P2)] - paddd xmm5,[GOTOFF(ebx,PD_DESCALE_P2)] - psrad xmm1,DESCALE_P2 - psrad xmm5,DESCALE_P2 - paddd xmm3,[GOTOFF(ebx,PD_DESCALE_P2)] - paddd xmm7,[GOTOFF(ebx,PD_DESCALE_P2)] - psrad xmm3,DESCALE_P2 - psrad xmm7,DESCALE_P2 - - packssdw xmm1,xmm5 ; xmm1=data5 - packssdw xmm3,xmm7 ; xmm3=data3 - - movdqa XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm1 - movdqa XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm3 - -; pop edi ; unused -; pop esi ; unused -; pop edx ; need not be preserved -; pop ecx ; unused - poppic ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jidctflt-3dn.asm b/simd/jidctflt-3dn.asm deleted file mode 100644 index 99356f2..0000000 --- a/simd/jidctflt-3dn.asm +++ /dev/null @@ -1,451 +0,0 @@ -; -; jidctflt.asm - floating-point IDCT (3DNow! & MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a floating-point implementation of the inverse DCT -; (Discrete Cosine Transform). The following code is based directly on -; the IJG's original jidctflt.c; see the jidctflt.c for more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_idct_float_3dnow) - -EXTN(jconst_idct_float_3dnow): - -PD_1_414 times 2 dd 1.414213562373095048801689 -PD_1_847 times 2 dd 1.847759065022573512256366 -PD_1_082 times 2 dd 1.082392200292393968799446 -PD_2_613 times 2 dd 2.613125929752753055713286 -PD_RNDINT_MAGIC times 2 dd 100663296.0 ; (float)(0x00C00000 << 3) -PB_CENTERJSAMP times 8 db CENTERJSAMPLE - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Perform dequantization and inverse DCT on one block of coefficients. -; -; GLOBAL(void) -; jsimd_idct_float_3dnow (void *dct_table, JCOEFPTR coef_block, -; JSAMPARRAY output_buf, JDIMENSION output_col) -; - -%define dct_table(b) (b)+8 ; void *dct_table -%define coef_block(b) (b)+12 ; JCOEFPTR coef_block -%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf -%define output_col(b) (b)+20 ; JDIMENSION output_col - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] -%define WK_NUM 2 -%define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT - ; FAST_FLOAT workspace[DCTSIZE2] - - align 16 - global EXTN(jsimd_idct_float_3dnow) - -EXTN(jsimd_idct_float_3dnow): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [workspace] - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process columns from input, store into work array. - -; mov eax, [original_ebp] - mov edx, POINTER [dct_table(eax)] ; quantptr - mov esi, JCOEFPTR [coef_block(eax)] ; inptr - lea edi, [workspace] ; FAST_FLOAT *wsptr - mov ecx, DCTSIZE/2 ; ctr - alignx 16,7 -.columnloop: -%ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] - jnz short .columnDCT - - pushpic ebx ; save GOT address - mov ebx, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)] - mov eax, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)] - or ebx, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)] - or ebx, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)] - or eax,ebx - poppic ebx ; restore GOT address - jnz short .columnDCT - - ; -- AC terms all zero - - movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)] - - punpcklwd mm0,mm0 - psrad mm0,(DWORD_BIT-WORD_BIT) - pi2fd mm0,mm0 - - pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - - movq mm1,mm0 - punpckldq mm0,mm0 - punpckhdq mm1,mm1 - - movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm0 - movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm0 - movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm0 - movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0 - movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1 - movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm1 - movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm1 - movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1 - jmp near .nextcolumn - alignx 16,7 -%endif -.columnDCT: - - ; -- Even part - - movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)] - movd mm1, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] - movd mm2, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)] - movd mm3, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)] - - punpcklwd mm0,mm0 - punpcklwd mm1,mm1 - psrad mm0,(DWORD_BIT-WORD_BIT) - psrad mm1,(DWORD_BIT-WORD_BIT) - pi2fd mm0,mm0 - pi2fd mm1,mm1 - - pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - pfmul mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - - punpcklwd mm2,mm2 - punpcklwd mm3,mm3 - psrad mm2,(DWORD_BIT-WORD_BIT) - psrad mm3,(DWORD_BIT-WORD_BIT) - pi2fd mm2,mm2 - pi2fd mm3,mm3 - - pfmul mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - pfmul mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - - movq mm4,mm0 - movq mm5,mm1 - pfsub mm0,mm2 ; mm0=tmp11 - pfsub mm1,mm3 - pfadd mm4,mm2 ; mm4=tmp10 - pfadd mm5,mm3 ; mm5=tmp13 - - pfmul mm1,[GOTOFF(ebx,PD_1_414)] - pfsub mm1,mm5 ; mm1=tmp12 - - movq mm6,mm4 - movq mm7,mm0 - pfsub mm4,mm5 ; mm4=tmp3 - pfsub mm0,mm1 ; mm0=tmp2 - pfadd mm6,mm5 ; mm6=tmp0 - pfadd mm7,mm1 ; mm7=tmp1 - - movq MMWORD [wk(1)], mm4 ; tmp3 - movq MMWORD [wk(0)], mm0 ; tmp2 - - ; -- Odd part - - movd mm2, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - movd mm3, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)] - movd mm5, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)] - movd mm1, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)] - - punpcklwd mm2,mm2 - punpcklwd mm3,mm3 - psrad mm2,(DWORD_BIT-WORD_BIT) - psrad mm3,(DWORD_BIT-WORD_BIT) - pi2fd mm2,mm2 - pi2fd mm3,mm3 - - pfmul mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - pfmul mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - - punpcklwd mm5,mm5 - punpcklwd mm1,mm1 - psrad mm5,(DWORD_BIT-WORD_BIT) - psrad mm1,(DWORD_BIT-WORD_BIT) - pi2fd mm5,mm5 - pi2fd mm1,mm1 - - pfmul mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - pfmul mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - - movq mm4,mm2 - movq mm0,mm5 - pfadd mm2,mm1 ; mm2=z11 - pfadd mm5,mm3 ; mm5=z13 - pfsub mm4,mm1 ; mm4=z12 - pfsub mm0,mm3 ; mm0=z10 - - movq mm1,mm2 - pfsub mm2,mm5 - pfadd mm1,mm5 ; mm1=tmp7 - - pfmul mm2,[GOTOFF(ebx,PD_1_414)] ; mm2=tmp11 - - movq mm3,mm0 - pfadd mm0,mm4 - pfmul mm0,[GOTOFF(ebx,PD_1_847)] ; mm0=z5 - pfmul mm3,[GOTOFF(ebx,PD_2_613)] ; mm3=(z10 * 2.613125930) - pfmul mm4,[GOTOFF(ebx,PD_1_082)] ; mm4=(z12 * 1.082392200) - pfsubr mm3,mm0 ; mm3=tmp12 - pfsub mm4,mm0 ; mm4=tmp10 - - ; -- Final output stage - - pfsub mm3,mm1 ; mm3=tmp6 - movq mm5,mm6 - movq mm0,mm7 - pfadd mm6,mm1 ; mm6=data0=(00 01) - pfadd mm7,mm3 ; mm7=data1=(10 11) - pfsub mm5,mm1 ; mm5=data7=(70 71) - pfsub mm0,mm3 ; mm0=data6=(60 61) - pfsub mm2,mm3 ; mm2=tmp5 - - movq mm1,mm6 ; transpose coefficients - punpckldq mm6,mm7 ; mm6=(00 10) - punpckhdq mm1,mm7 ; mm1=(01 11) - movq mm3,mm0 ; transpose coefficients - punpckldq mm0,mm5 ; mm0=(60 70) - punpckhdq mm3,mm5 ; mm3=(61 71) - - movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm6 - movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1 - movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0 - movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm3 - - movq mm7, MMWORD [wk(0)] ; mm7=tmp2 - movq mm5, MMWORD [wk(1)] ; mm5=tmp3 - - pfadd mm4,mm2 ; mm4=tmp4 - movq mm6,mm7 - movq mm1,mm5 - pfadd mm7,mm2 ; mm7=data2=(20 21) - pfadd mm5,mm4 ; mm5=data4=(40 41) - pfsub mm6,mm2 ; mm6=data5=(50 51) - pfsub mm1,mm4 ; mm1=data3=(30 31) - - movq mm0,mm7 ; transpose coefficients - punpckldq mm7,mm1 ; mm7=(20 30) - punpckhdq mm0,mm1 ; mm0=(21 31) - movq mm3,mm5 ; transpose coefficients - punpckldq mm5,mm6 ; mm5=(40 50) - punpckhdq mm3,mm6 ; mm3=(41 51) - - movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm7 - movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm0 - movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5 - movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm3 - -.nextcolumn: - add esi, byte 2*SIZEOF_JCOEF ; coef_block - add edx, byte 2*SIZEOF_FLOAT_MULT_TYPE ; quantptr - add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr - dec ecx ; ctr - jnz near .columnloop - - ; -- Prefetch the next coefficient block - - prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] - prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] - prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] - prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] - - ; ---- Pass 2: process rows from work array, store into output array. - - mov eax, [original_ebp] - lea esi, [workspace] ; FAST_FLOAT *wsptr - mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) - mov eax, JDIMENSION [output_col(eax)] - mov ecx, DCTSIZE/2 ; ctr - alignx 16,7 -.rowloop: - - ; -- Even part - - movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] - movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)] - movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)] - movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)] - - movq mm4,mm0 - movq mm5,mm1 - pfsub mm0,mm2 ; mm0=tmp11 - pfsub mm1,mm3 - pfadd mm4,mm2 ; mm4=tmp10 - pfadd mm5,mm3 ; mm5=tmp13 - - pfmul mm1,[GOTOFF(ebx,PD_1_414)] - pfsub mm1,mm5 ; mm1=tmp12 - - movq mm6,mm4 - movq mm7,mm0 - pfsub mm4,mm5 ; mm4=tmp3 - pfsub mm0,mm1 ; mm0=tmp2 - pfadd mm6,mm5 ; mm6=tmp0 - pfadd mm7,mm1 ; mm7=tmp1 - - movq MMWORD [wk(1)], mm4 ; tmp3 - movq MMWORD [wk(0)], mm0 ; tmp2 - - ; -- Odd part - - movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] - movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)] - movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)] - movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)] - - movq mm4,mm2 - movq mm0,mm5 - pfadd mm2,mm1 ; mm2=z11 - pfadd mm5,mm3 ; mm5=z13 - pfsub mm4,mm1 ; mm4=z12 - pfsub mm0,mm3 ; mm0=z10 - - movq mm1,mm2 - pfsub mm2,mm5 - pfadd mm1,mm5 ; mm1=tmp7 - - pfmul mm2,[GOTOFF(ebx,PD_1_414)] ; mm2=tmp11 - - movq mm3,mm0 - pfadd mm0,mm4 - pfmul mm0,[GOTOFF(ebx,PD_1_847)] ; mm0=z5 - pfmul mm3,[GOTOFF(ebx,PD_2_613)] ; mm3=(z10 * 2.613125930) - pfmul mm4,[GOTOFF(ebx,PD_1_082)] ; mm4=(z12 * 1.082392200) - pfsubr mm3,mm0 ; mm3=tmp12 - pfsub mm4,mm0 ; mm4=tmp10 - - ; -- Final output stage - - pfsub mm3,mm1 ; mm3=tmp6 - movq mm5,mm6 - movq mm0,mm7 - pfadd mm6,mm1 ; mm6=data0=(00 10) - pfadd mm7,mm3 ; mm7=data1=(01 11) - pfsub mm5,mm1 ; mm5=data7=(07 17) - pfsub mm0,mm3 ; mm0=data6=(06 16) - pfsub mm2,mm3 ; mm2=tmp5 - - movq mm1,[GOTOFF(ebx,PD_RNDINT_MAGIC)] ; mm1=[PD_RNDINT_MAGIC] - pcmpeqd mm3,mm3 - psrld mm3,WORD_BIT ; mm3={0xFFFF 0x0000 0xFFFF 0x0000} - - pfadd mm6,mm1 ; mm6=roundint(data0/8)=(00 ** 10 **) - pfadd mm7,mm1 ; mm7=roundint(data1/8)=(01 ** 11 **) - pfadd mm0,mm1 ; mm0=roundint(data6/8)=(06 ** 16 **) - pfadd mm5,mm1 ; mm5=roundint(data7/8)=(07 ** 17 **) - - pand mm6,mm3 ; mm6=(00 -- 10 --) - pslld mm7,WORD_BIT ; mm7=(-- 01 -- 11) - pand mm0,mm3 ; mm0=(06 -- 16 --) - pslld mm5,WORD_BIT ; mm5=(-- 07 -- 17) - por mm6,mm7 ; mm6=(00 01 10 11) - por mm0,mm5 ; mm0=(06 07 16 17) - - movq mm1, MMWORD [wk(0)] ; mm1=tmp2 - movq mm3, MMWORD [wk(1)] ; mm3=tmp3 - - pfadd mm4,mm2 ; mm4=tmp4 - movq mm7,mm1 - movq mm5,mm3 - pfadd mm1,mm2 ; mm1=data2=(02 12) - pfadd mm3,mm4 ; mm3=data4=(04 14) - pfsub mm7,mm2 ; mm7=data5=(05 15) - pfsub mm5,mm4 ; mm5=data3=(03 13) - - movq mm2,[GOTOFF(ebx,PD_RNDINT_MAGIC)] ; mm2=[PD_RNDINT_MAGIC] - pcmpeqd mm4,mm4 - psrld mm4,WORD_BIT ; mm4={0xFFFF 0x0000 0xFFFF 0x0000} - - pfadd mm3,mm2 ; mm3=roundint(data4/8)=(04 ** 14 **) - pfadd mm7,mm2 ; mm7=roundint(data5/8)=(05 ** 15 **) - pfadd mm1,mm2 ; mm1=roundint(data2/8)=(02 ** 12 **) - pfadd mm5,mm2 ; mm5=roundint(data3/8)=(03 ** 13 **) - - pand mm3,mm4 ; mm3=(04 -- 14 --) - pslld mm7,WORD_BIT ; mm7=(-- 05 -- 15) - pand mm1,mm4 ; mm1=(02 -- 12 --) - pslld mm5,WORD_BIT ; mm5=(-- 03 -- 13) - por mm3,mm7 ; mm3=(04 05 14 15) - por mm1,mm5 ; mm1=(02 03 12 13) - - movq mm2,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm2=[PB_CENTERJSAMP] - - packsswb mm6,mm3 ; mm6=(00 01 10 11 04 05 14 15) - packsswb mm1,mm0 ; mm1=(02 03 12 13 06 07 16 17) - paddb mm6,mm2 - paddb mm1,mm2 - - movq mm4,mm6 ; transpose coefficients(phase 2) - punpcklwd mm6,mm1 ; mm6=(00 01 02 03 10 11 12 13) - punpckhwd mm4,mm1 ; mm4=(04 05 06 07 14 15 16 17) - - movq mm7,mm6 ; transpose coefficients(phase 3) - punpckldq mm6,mm4 ; mm6=(00 01 02 03 04 05 06 07) - punpckhdq mm7,mm4 ; mm7=(10 11 12 13 14 15 16 17) - - pushpic ebx ; save GOT address - - mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] - mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] - movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6 - movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7 - - poppic ebx ; restore GOT address - - add esi, byte 2*SIZEOF_FAST_FLOAT ; wsptr - add edi, byte 2*SIZEOF_JSAMPROW - dec ecx ; ctr - jnz near .rowloop - - femms ; empty MMX/3DNow! state - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jidctflt-sse.asm b/simd/jidctflt-sse.asm deleted file mode 100644 index 4d4af2f..0000000 --- a/simd/jidctflt-sse.asm +++ /dev/null @@ -1,571 +0,0 @@ -; -; jidctflt.asm - floating-point IDCT (SSE & MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a floating-point implementation of the inverse DCT -; (Discrete Cosine Transform). The following code is based directly on -; the IJG's original jidctflt.c; see the jidctflt.c for more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) - shufps %1,%2,0x44 -%endmacro - -%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) - shufps %1,%2,0xEE -%endmacro - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_idct_float_sse) - -EXTN(jconst_idct_float_sse): - -PD_1_414 times 4 dd 1.414213562373095048801689 -PD_1_847 times 4 dd 1.847759065022573512256366 -PD_1_082 times 4 dd 1.082392200292393968799446 -PD_M2_613 times 4 dd -2.613125929752753055713286 -PD_0_125 times 4 dd 0.125 ; 1/8 -PB_CENTERJSAMP times 8 db CENTERJSAMPLE - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Perform dequantization and inverse DCT on one block of coefficients. -; -; GLOBAL(void) -; jsimd_idct_float_sse (void *dct_table, JCOEFPTR coef_block, -; JSAMPARRAY output_buf, JDIMENSION output_col) -; - -%define dct_table(b) (b)+8 ; void *dct_table -%define coef_block(b) (b)+12 ; JCOEFPTR coef_block -%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf -%define output_col(b) (b)+20 ; JDIMENSION output_col - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 2 -%define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT - ; FAST_FLOAT workspace[DCTSIZE2] - - align 16 - global EXTN(jsimd_idct_float_sse) - -EXTN(jsimd_idct_float_sse): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [workspace] - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process columns from input, store into work array. - -; mov eax, [original_ebp] - mov edx, POINTER [dct_table(eax)] ; quantptr - mov esi, JCOEFPTR [coef_block(eax)] ; inptr - lea edi, [workspace] ; FAST_FLOAT *wsptr - mov ecx, DCTSIZE/4 ; ctr - alignx 16,7 -.columnloop: -%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] - jnz near .columnDCT - - movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] - por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] - por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] - por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] - por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] - por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] - por mm1,mm0 - packsswb mm1,mm1 - movd eax,mm1 - test eax,eax - jnz short .columnDCT - - ; -- AC terms all zero - - movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] - - punpckhwd mm1,mm0 ; mm1=(** 02 ** 03) - punpcklwd mm0,mm0 ; mm0=(00 00 01 01) - psrad mm1,(DWORD_BIT-WORD_BIT) ; mm1=in0H=(02 03) - psrad mm0,(DWORD_BIT-WORD_BIT) ; mm0=in0L=(00 01) - cvtpi2ps xmm3,mm1 ; xmm3=(02 03 ** **) - cvtpi2ps xmm0,mm0 ; xmm0=(00 01 ** **) - movlhps xmm0,xmm3 ; xmm0=in0=(00 01 02 03) - - mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - - movaps xmm1,xmm0 - movaps xmm2,xmm0 - movaps xmm3,xmm0 - - shufps xmm0,xmm0,0x00 ; xmm0=(00 00 00 00) - shufps xmm1,xmm1,0x55 ; xmm1=(01 01 01 01) - shufps xmm2,xmm2,0xAA ; xmm2=(02 02 02 02) - shufps xmm3,xmm3,0xFF ; xmm3=(03 03 03 03) - - movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0 - movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0 - movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1 - movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1 - movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2 - movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2 - movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3 - movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 - jmp near .nextcolumn - alignx 16,7 -%endif -.columnDCT: - - ; -- Even part - - movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] - movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] - movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] - - punpckhwd mm4,mm0 ; mm4=(** 02 ** 03) - punpcklwd mm0,mm0 ; mm0=(00 00 01 01) - punpckhwd mm5,mm1 ; mm5=(** 22 ** 23) - punpcklwd mm1,mm1 ; mm1=(20 20 21 21) - - psrad mm4,(DWORD_BIT-WORD_BIT) ; mm4=in0H=(02 03) - psrad mm0,(DWORD_BIT-WORD_BIT) ; mm0=in0L=(00 01) - cvtpi2ps xmm4,mm4 ; xmm4=(02 03 ** **) - cvtpi2ps xmm0,mm0 ; xmm0=(00 01 ** **) - psrad mm5,(DWORD_BIT-WORD_BIT) ; mm5=in2H=(22 23) - psrad mm1,(DWORD_BIT-WORD_BIT) ; mm1=in2L=(20 21) - cvtpi2ps xmm5,mm5 ; xmm5=(22 23 ** **) - cvtpi2ps xmm1,mm1 ; xmm1=(20 21 ** **) - - punpckhwd mm6,mm2 ; mm6=(** 42 ** 43) - punpcklwd mm2,mm2 ; mm2=(40 40 41 41) - punpckhwd mm7,mm3 ; mm7=(** 62 ** 63) - punpcklwd mm3,mm3 ; mm3=(60 60 61 61) - - psrad mm6,(DWORD_BIT-WORD_BIT) ; mm6=in4H=(42 43) - psrad mm2,(DWORD_BIT-WORD_BIT) ; mm2=in4L=(40 41) - cvtpi2ps xmm6,mm6 ; xmm6=(42 43 ** **) - cvtpi2ps xmm2,mm2 ; xmm2=(40 41 ** **) - psrad mm7,(DWORD_BIT-WORD_BIT) ; mm7=in6H=(62 63) - psrad mm3,(DWORD_BIT-WORD_BIT) ; mm3=in6L=(60 61) - cvtpi2ps xmm7,mm7 ; xmm7=(62 63 ** **) - cvtpi2ps xmm3,mm3 ; xmm3=(60 61 ** **) - - movlhps xmm0,xmm4 ; xmm0=in0=(00 01 02 03) - movlhps xmm1,xmm5 ; xmm1=in2=(20 21 22 23) - mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - mulps xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - - movlhps xmm2,xmm6 ; xmm2=in4=(40 41 42 43) - movlhps xmm3,xmm7 ; xmm3=in6=(60 61 62 63) - mulps xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - mulps xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - - movaps xmm4,xmm0 - movaps xmm5,xmm1 - subps xmm0,xmm2 ; xmm0=tmp11 - subps xmm1,xmm3 - addps xmm4,xmm2 ; xmm4=tmp10 - addps xmm5,xmm3 ; xmm5=tmp13 - - mulps xmm1,[GOTOFF(ebx,PD_1_414)] - subps xmm1,xmm5 ; xmm1=tmp12 - - movaps xmm6,xmm4 - movaps xmm7,xmm0 - subps xmm4,xmm5 ; xmm4=tmp3 - subps xmm0,xmm1 ; xmm0=tmp2 - addps xmm6,xmm5 ; xmm6=tmp0 - addps xmm7,xmm1 ; xmm7=tmp1 - - movaps XMMWORD [wk(1)], xmm4 ; tmp3 - movaps XMMWORD [wk(0)], xmm0 ; tmp2 - - ; -- Odd part - - movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movq mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] - movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] - - punpckhwd mm6,mm4 ; mm6=(** 12 ** 13) - punpcklwd mm4,mm4 ; mm4=(10 10 11 11) - punpckhwd mm2,mm0 ; mm2=(** 32 ** 33) - punpcklwd mm0,mm0 ; mm0=(30 30 31 31) - - psrad mm6,(DWORD_BIT-WORD_BIT) ; mm6=in1H=(12 13) - psrad mm4,(DWORD_BIT-WORD_BIT) ; mm4=in1L=(10 11) - cvtpi2ps xmm4,mm6 ; xmm4=(12 13 ** **) - cvtpi2ps xmm2,mm4 ; xmm2=(10 11 ** **) - psrad mm2,(DWORD_BIT-WORD_BIT) ; mm2=in3H=(32 33) - psrad mm0,(DWORD_BIT-WORD_BIT) ; mm0=in3L=(30 31) - cvtpi2ps xmm0,mm2 ; xmm0=(32 33 ** **) - cvtpi2ps xmm3,mm0 ; xmm3=(30 31 ** **) - - punpckhwd mm7,mm5 ; mm7=(** 52 ** 53) - punpcklwd mm5,mm5 ; mm5=(50 50 51 51) - punpckhwd mm3,mm1 ; mm3=(** 72 ** 73) - punpcklwd mm1,mm1 ; mm1=(70 70 71 71) - - movlhps xmm2,xmm4 ; xmm2=in1=(10 11 12 13) - movlhps xmm3,xmm0 ; xmm3=in3=(30 31 32 33) - - psrad mm7,(DWORD_BIT-WORD_BIT) ; mm7=in5H=(52 53) - psrad mm5,(DWORD_BIT-WORD_BIT) ; mm5=in5L=(50 51) - cvtpi2ps xmm4,mm7 ; xmm4=(52 53 ** **) - cvtpi2ps xmm5,mm5 ; xmm5=(50 51 ** **) - psrad mm3,(DWORD_BIT-WORD_BIT) ; mm3=in7H=(72 73) - psrad mm1,(DWORD_BIT-WORD_BIT) ; mm1=in7L=(70 71) - cvtpi2ps xmm0,mm3 ; xmm0=(72 73 ** **) - cvtpi2ps xmm1,mm1 ; xmm1=(70 71 ** **) - - mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - mulps xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - - movlhps xmm5,xmm4 ; xmm5=in5=(50 51 52 53) - movlhps xmm1,xmm0 ; xmm1=in7=(70 71 72 73) - mulps xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - mulps xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - - movaps xmm4,xmm2 - movaps xmm0,xmm5 - addps xmm2,xmm1 ; xmm2=z11 - addps xmm5,xmm3 ; xmm5=z13 - subps xmm4,xmm1 ; xmm4=z12 - subps xmm0,xmm3 ; xmm0=z10 - - movaps xmm1,xmm2 - subps xmm2,xmm5 - addps xmm1,xmm5 ; xmm1=tmp7 - - mulps xmm2,[GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 - - movaps xmm3,xmm0 - addps xmm0,xmm4 - mulps xmm0,[GOTOFF(ebx,PD_1_847)] ; xmm0=z5 - mulps xmm3,[GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) - mulps xmm4,[GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) - addps xmm3,xmm0 ; xmm3=tmp12 - subps xmm4,xmm0 ; xmm4=tmp10 - - ; -- Final output stage - - subps xmm3,xmm1 ; xmm3=tmp6 - movaps xmm5,xmm6 - movaps xmm0,xmm7 - addps xmm6,xmm1 ; xmm6=data0=(00 01 02 03) - addps xmm7,xmm3 ; xmm7=data1=(10 11 12 13) - subps xmm5,xmm1 ; xmm5=data7=(70 71 72 73) - subps xmm0,xmm3 ; xmm0=data6=(60 61 62 63) - subps xmm2,xmm3 ; xmm2=tmp5 - - movaps xmm1,xmm6 ; transpose coefficients(phase 1) - unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) - unpckhps xmm1,xmm7 ; xmm1=(02 12 03 13) - movaps xmm3,xmm0 ; transpose coefficients(phase 1) - unpcklps xmm0,xmm5 ; xmm0=(60 70 61 71) - unpckhps xmm3,xmm5 ; xmm3=(62 72 63 73) - - movaps xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 - movaps xmm5, XMMWORD [wk(1)] ; xmm5=tmp3 - - movaps XMMWORD [wk(0)], xmm0 ; wk(0)=(60 70 61 71) - movaps XMMWORD [wk(1)], xmm3 ; wk(1)=(62 72 63 73) - - addps xmm4,xmm2 ; xmm4=tmp4 - movaps xmm0,xmm7 - movaps xmm3,xmm5 - addps xmm7,xmm2 ; xmm7=data2=(20 21 22 23) - addps xmm5,xmm4 ; xmm5=data4=(40 41 42 43) - subps xmm0,xmm2 ; xmm0=data5=(50 51 52 53) - subps xmm3,xmm4 ; xmm3=data3=(30 31 32 33) - - movaps xmm2,xmm7 ; transpose coefficients(phase 1) - unpcklps xmm7,xmm3 ; xmm7=(20 30 21 31) - unpckhps xmm2,xmm3 ; xmm2=(22 32 23 33) - movaps xmm4,xmm5 ; transpose coefficients(phase 1) - unpcklps xmm5,xmm0 ; xmm5=(40 50 41 51) - unpckhps xmm4,xmm0 ; xmm4=(42 52 43 53) - - movaps xmm3,xmm6 ; transpose coefficients(phase 2) - unpcklps2 xmm6,xmm7 ; xmm6=(00 10 20 30) - unpckhps2 xmm3,xmm7 ; xmm3=(01 11 21 31) - movaps xmm0,xmm1 ; transpose coefficients(phase 2) - unpcklps2 xmm1,xmm2 ; xmm1=(02 12 22 32) - unpckhps2 xmm0,xmm2 ; xmm0=(03 13 23 33) - - movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71) - movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73) - - movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6 - movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3 - movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1 - movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0 - - movaps xmm6,xmm5 ; transpose coefficients(phase 2) - unpcklps2 xmm5,xmm7 ; xmm5=(40 50 60 70) - unpckhps2 xmm6,xmm7 ; xmm6=(41 51 61 71) - movaps xmm3,xmm4 ; transpose coefficients(phase 2) - unpcklps2 xmm4,xmm2 ; xmm4=(42 52 62 72) - unpckhps2 xmm3,xmm2 ; xmm3=(43 53 63 73) - - movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5 - movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6 - movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4 - movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 - -.nextcolumn: - add esi, byte 4*SIZEOF_JCOEF ; coef_block - add edx, byte 4*SIZEOF_FLOAT_MULT_TYPE ; quantptr - add edi, 4*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr - dec ecx ; ctr - jnz near .columnloop - - ; -- Prefetch the next coefficient block - - prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] - prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] - prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] - prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] - - ; ---- Pass 2: process rows from work array, store into output array. - - mov eax, [original_ebp] - lea esi, [workspace] ; FAST_FLOAT *wsptr - mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) - mov eax, JDIMENSION [output_col(eax)] - mov ecx, DCTSIZE/4 ; ctr - alignx 16,7 -.rowloop: - - ; -- Even part - - movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)] - movaps xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)] - - movaps xmm4,xmm0 - movaps xmm5,xmm1 - subps xmm0,xmm2 ; xmm0=tmp11 - subps xmm1,xmm3 - addps xmm4,xmm2 ; xmm4=tmp10 - addps xmm5,xmm3 ; xmm5=tmp13 - - mulps xmm1,[GOTOFF(ebx,PD_1_414)] - subps xmm1,xmm5 ; xmm1=tmp12 - - movaps xmm6,xmm4 - movaps xmm7,xmm0 - subps xmm4,xmm5 ; xmm4=tmp3 - subps xmm0,xmm1 ; xmm0=tmp2 - addps xmm6,xmm5 ; xmm6=tmp0 - addps xmm7,xmm1 ; xmm7=tmp1 - - movaps XMMWORD [wk(1)], xmm4 ; tmp3 - movaps XMMWORD [wk(0)], xmm0 ; tmp2 - - ; -- Odd part - - movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)] - movaps xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)] - - movaps xmm4,xmm2 - movaps xmm0,xmm5 - addps xmm2,xmm1 ; xmm2=z11 - addps xmm5,xmm3 ; xmm5=z13 - subps xmm4,xmm1 ; xmm4=z12 - subps xmm0,xmm3 ; xmm0=z10 - - movaps xmm1,xmm2 - subps xmm2,xmm5 - addps xmm1,xmm5 ; xmm1=tmp7 - - mulps xmm2,[GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 - - movaps xmm3,xmm0 - addps xmm0,xmm4 - mulps xmm0,[GOTOFF(ebx,PD_1_847)] ; xmm0=z5 - mulps xmm3,[GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) - mulps xmm4,[GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) - addps xmm3,xmm0 ; xmm3=tmp12 - subps xmm4,xmm0 ; xmm4=tmp10 - - ; -- Final output stage - - subps xmm3,xmm1 ; xmm3=tmp6 - movaps xmm5,xmm6 - movaps xmm0,xmm7 - addps xmm6,xmm1 ; xmm6=data0=(00 10 20 30) - addps xmm7,xmm3 ; xmm7=data1=(01 11 21 31) - subps xmm5,xmm1 ; xmm5=data7=(07 17 27 37) - subps xmm0,xmm3 ; xmm0=data6=(06 16 26 36) - subps xmm2,xmm3 ; xmm2=tmp5 - - movaps xmm1,[GOTOFF(ebx,PD_0_125)] ; xmm1=[PD_0_125] - - mulps xmm6,xmm1 ; descale(1/8) - mulps xmm7,xmm1 ; descale(1/8) - mulps xmm5,xmm1 ; descale(1/8) - mulps xmm0,xmm1 ; descale(1/8) - - movhlps xmm3,xmm6 - movhlps xmm1,xmm7 - cvtps2pi mm0,xmm6 ; round to int32, mm0=data0L=(00 10) - cvtps2pi mm1,xmm7 ; round to int32, mm1=data1L=(01 11) - cvtps2pi mm2,xmm3 ; round to int32, mm2=data0H=(20 30) - cvtps2pi mm3,xmm1 ; round to int32, mm3=data1H=(21 31) - packssdw mm0,mm2 ; mm0=data0=(00 10 20 30) - packssdw mm1,mm3 ; mm1=data1=(01 11 21 31) - - movhlps xmm6,xmm5 - movhlps xmm7,xmm0 - cvtps2pi mm4,xmm5 ; round to int32, mm4=data7L=(07 17) - cvtps2pi mm5,xmm0 ; round to int32, mm5=data6L=(06 16) - cvtps2pi mm6,xmm6 ; round to int32, mm6=data7H=(27 37) - cvtps2pi mm7,xmm7 ; round to int32, mm7=data6H=(26 36) - packssdw mm4,mm6 ; mm4=data7=(07 17 27 37) - packssdw mm5,mm7 ; mm5=data6=(06 16 26 36) - - packsswb mm0,mm5 ; mm0=(00 10 20 30 06 16 26 36) - packsswb mm1,mm4 ; mm1=(01 11 21 31 07 17 27 37) - - movaps xmm3, XMMWORD [wk(0)] ; xmm3=tmp2 - movaps xmm1, XMMWORD [wk(1)] ; xmm1=tmp3 - - movaps xmm6,[GOTOFF(ebx,PD_0_125)] ; xmm6=[PD_0_125] - - addps xmm4,xmm2 ; xmm4=tmp4 - movaps xmm5,xmm3 - movaps xmm0,xmm1 - addps xmm3,xmm2 ; xmm3=data2=(02 12 22 32) - addps xmm1,xmm4 ; xmm1=data4=(04 14 24 34) - subps xmm5,xmm2 ; xmm5=data5=(05 15 25 35) - subps xmm0,xmm4 ; xmm0=data3=(03 13 23 33) - - mulps xmm3,xmm6 ; descale(1/8) - mulps xmm1,xmm6 ; descale(1/8) - mulps xmm5,xmm6 ; descale(1/8) - mulps xmm0,xmm6 ; descale(1/8) - - movhlps xmm7,xmm3 - movhlps xmm2,xmm1 - cvtps2pi mm2,xmm3 ; round to int32, mm2=data2L=(02 12) - cvtps2pi mm3,xmm1 ; round to int32, mm3=data4L=(04 14) - cvtps2pi mm6,xmm7 ; round to int32, mm6=data2H=(22 32) - cvtps2pi mm7,xmm2 ; round to int32, mm7=data4H=(24 34) - packssdw mm2,mm6 ; mm2=data2=(02 12 22 32) - packssdw mm3,mm7 ; mm3=data4=(04 14 24 34) - - movhlps xmm4,xmm5 - movhlps xmm6,xmm0 - cvtps2pi mm5,xmm5 ; round to int32, mm5=data5L=(05 15) - cvtps2pi mm4,xmm0 ; round to int32, mm4=data3L=(03 13) - cvtps2pi mm6,xmm4 ; round to int32, mm6=data5H=(25 35) - cvtps2pi mm7,xmm6 ; round to int32, mm7=data3H=(23 33) - packssdw mm5,mm6 ; mm5=data5=(05 15 25 35) - packssdw mm4,mm7 ; mm4=data3=(03 13 23 33) - - movq mm6,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm6=[PB_CENTERJSAMP] - - packsswb mm2,mm3 ; mm2=(02 12 22 32 04 14 24 34) - packsswb mm4,mm5 ; mm4=(03 13 23 33 05 15 25 35) - - paddb mm0,mm6 - paddb mm1,mm6 - paddb mm2,mm6 - paddb mm4,mm6 - - movq mm7,mm0 ; transpose coefficients(phase 1) - punpcklbw mm0,mm1 ; mm0=(00 01 10 11 20 21 30 31) - punpckhbw mm7,mm1 ; mm7=(06 07 16 17 26 27 36 37) - movq mm3,mm2 ; transpose coefficients(phase 1) - punpcklbw mm2,mm4 ; mm2=(02 03 12 13 22 23 32 33) - punpckhbw mm3,mm4 ; mm3=(04 05 14 15 24 25 34 35) - - movq mm5,mm0 ; transpose coefficients(phase 2) - punpcklwd mm0,mm2 ; mm0=(00 01 02 03 10 11 12 13) - punpckhwd mm5,mm2 ; mm5=(20 21 22 23 30 31 32 33) - movq mm6,mm3 ; transpose coefficients(phase 2) - punpcklwd mm3,mm7 ; mm3=(04 05 06 07 14 15 16 17) - punpckhwd mm6,mm7 ; mm6=(24 25 26 27 34 35 36 37) - - movq mm1,mm0 ; transpose coefficients(phase 3) - punpckldq mm0,mm3 ; mm0=(00 01 02 03 04 05 06 07) - punpckhdq mm1,mm3 ; mm1=(10 11 12 13 14 15 16 17) - movq mm4,mm5 ; transpose coefficients(phase 3) - punpckldq mm5,mm6 ; mm5=(20 21 22 23 24 25 26 27) - punpckhdq mm4,mm6 ; mm4=(30 31 32 33 34 35 36 37) - - pushpic ebx ; save GOT address - - mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] - mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] - movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0 - movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1 - mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] - mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] - movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5 - movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4 - - poppic ebx ; restore GOT address - - add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr - add edi, byte 4*SIZEOF_JSAMPROW - dec ecx ; ctr - jnz near .rowloop - - emms ; empty MMX state - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jidctflt-sse2-64.asm b/simd/jidctflt-sse2-64.asm deleted file mode 100644 index bdda05d..0000000 --- a/simd/jidctflt-sse2-64.asm +++ /dev/null @@ -1,482 +0,0 @@ -; -; jidctflt.asm - floating-point IDCT (64-bit SSE & SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a floating-point implementation of the inverse DCT -; (Discrete Cosine Transform). The following code is based directly on -; the IJG's original jidctflt.c; see the jidctflt.c for more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) - shufps %1,%2,0x44 -%endmacro - -%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) - shufps %1,%2,0xEE -%endmacro - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_idct_float_sse2) - -EXTN(jconst_idct_float_sse2): - -PD_1_414 times 4 dd 1.414213562373095048801689 -PD_1_847 times 4 dd 1.847759065022573512256366 -PD_1_082 times 4 dd 1.082392200292393968799446 -PD_M2_613 times 4 dd -2.613125929752753055713286 -PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3) -PB_CENTERJSAMP times 16 db CENTERJSAMPLE - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 64 -; -; Perform dequantization and inverse DCT on one block of coefficients. -; -; GLOBAL(void) -; jsimd_idct_float_sse2 (void *dct_table, JCOEFPTR coef_block, -; JSAMPARRAY output_buf, JDIMENSION output_col) -; - -; r10 = void *dct_table -; r11 = JCOEFPTR coef_block -; r12 = JSAMPARRAY output_buf -; r13 = JDIMENSION output_col - -%define original_rbp rbp+0 -%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 2 -%define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT - ; FAST_FLOAT workspace[DCTSIZE2] - - align 16 - global EXTN(jsimd_idct_float_sse2) - -EXTN(jsimd_idct_float_sse2): - push rbp - mov rax,rsp ; rax = original rbp - sub rsp, byte 4 - and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [rsp],rax - mov rbp,rsp ; rbp = aligned rbp - lea rsp, [workspace] - collect_args - push rbx - - ; ---- Pass 1: process columns from input, store into work array. - - mov rdx, r10 ; quantptr - mov rsi, r11 ; inptr - lea rdi, [workspace] ; FAST_FLOAT *wsptr - mov rcx, DCTSIZE/4 ; ctr -.columnloop: -%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE - mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] - jnz near .columnDCT - - movq xmm1, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)] - movq xmm2, XMM_MMWORD [MMBLOCK(2,0,rsi,SIZEOF_JCOEF)] - movq xmm3, XMM_MMWORD [MMBLOCK(3,0,rsi,SIZEOF_JCOEF)] - movq xmm4, XMM_MMWORD [MMBLOCK(4,0,rsi,SIZEOF_JCOEF)] - movq xmm5, XMM_MMWORD [MMBLOCK(5,0,rsi,SIZEOF_JCOEF)] - movq xmm6, XMM_MMWORD [MMBLOCK(6,0,rsi,SIZEOF_JCOEF)] - movq xmm7, XMM_MMWORD [MMBLOCK(7,0,rsi,SIZEOF_JCOEF)] - por xmm1,xmm2 - por xmm3,xmm4 - por xmm5,xmm6 - por xmm1,xmm3 - por xmm5,xmm7 - por xmm1,xmm5 - packsswb xmm1,xmm1 - movd eax,xmm1 - test rax,rax - jnz short .columnDCT - - ; -- AC terms all zero - - movq xmm0, XMM_MMWORD [MMBLOCK(0,0,rsi,SIZEOF_JCOEF)] - - punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) - psrad xmm0,(DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) - cvtdq2ps xmm0,xmm0 ; xmm0=in0=(00 01 02 03) - - mulps xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] - - movaps xmm1,xmm0 - movaps xmm2,xmm0 - movaps xmm3,xmm0 - - shufps xmm0,xmm0,0x00 ; xmm0=(00 00 00 00) - shufps xmm1,xmm1,0x55 ; xmm1=(01 01 01 01) - shufps xmm2,xmm2,0xAA ; xmm2=(02 02 02 02) - shufps xmm3,xmm3,0xFF ; xmm3=(03 03 03 03) - - movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm0 - movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0 - movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm1 - movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1 - movaps XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm2 - movaps XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm2 - movaps XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm3 - movaps XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3 - jmp near .nextcolumn -%endif -.columnDCT: - - ; -- Even part - - movq xmm0, XMM_MMWORD [MMBLOCK(0,0,rsi,SIZEOF_JCOEF)] - movq xmm1, XMM_MMWORD [MMBLOCK(2,0,rsi,SIZEOF_JCOEF)] - movq xmm2, XMM_MMWORD [MMBLOCK(4,0,rsi,SIZEOF_JCOEF)] - movq xmm3, XMM_MMWORD [MMBLOCK(6,0,rsi,SIZEOF_JCOEF)] - - punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) - punpcklwd xmm1,xmm1 ; xmm1=(20 20 21 21 22 22 23 23) - psrad xmm0,(DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) - psrad xmm1,(DWORD_BIT-WORD_BIT) ; xmm1=in2=(20 21 22 23) - cvtdq2ps xmm0,xmm0 ; xmm0=in0=(00 01 02 03) - cvtdq2ps xmm1,xmm1 ; xmm1=in2=(20 21 22 23) - - punpcklwd xmm2,xmm2 ; xmm2=(40 40 41 41 42 42 43 43) - punpcklwd xmm3,xmm3 ; xmm3=(60 60 61 61 62 62 63 63) - psrad xmm2,(DWORD_BIT-WORD_BIT) ; xmm2=in4=(40 41 42 43) - psrad xmm3,(DWORD_BIT-WORD_BIT) ; xmm3=in6=(60 61 62 63) - cvtdq2ps xmm2,xmm2 ; xmm2=in4=(40 41 42 43) - cvtdq2ps xmm3,xmm3 ; xmm3=in6=(60 61 62 63) - - mulps xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] - mulps xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] - mulps xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] - mulps xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] - - movaps xmm4,xmm0 - movaps xmm5,xmm1 - subps xmm0,xmm2 ; xmm0=tmp11 - subps xmm1,xmm3 - addps xmm4,xmm2 ; xmm4=tmp10 - addps xmm5,xmm3 ; xmm5=tmp13 - - mulps xmm1,[rel PD_1_414] - subps xmm1,xmm5 ; xmm1=tmp12 - - movaps xmm6,xmm4 - movaps xmm7,xmm0 - subps xmm4,xmm5 ; xmm4=tmp3 - subps xmm0,xmm1 ; xmm0=tmp2 - addps xmm6,xmm5 ; xmm6=tmp0 - addps xmm7,xmm1 ; xmm7=tmp1 - - movaps XMMWORD [wk(1)], xmm4 ; tmp3 - movaps XMMWORD [wk(0)], xmm0 ; tmp2 - - ; -- Odd part - - movq xmm2, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)] - movq xmm3, XMM_MMWORD [MMBLOCK(3,0,rsi,SIZEOF_JCOEF)] - movq xmm5, XMM_MMWORD [MMBLOCK(5,0,rsi,SIZEOF_JCOEF)] - movq xmm1, XMM_MMWORD [MMBLOCK(7,0,rsi,SIZEOF_JCOEF)] - - punpcklwd xmm2,xmm2 ; xmm2=(10 10 11 11 12 12 13 13) - punpcklwd xmm3,xmm3 ; xmm3=(30 30 31 31 32 32 33 33) - psrad xmm2,(DWORD_BIT-WORD_BIT) ; xmm2=in1=(10 11 12 13) - psrad xmm3,(DWORD_BIT-WORD_BIT) ; xmm3=in3=(30 31 32 33) - cvtdq2ps xmm2,xmm2 ; xmm2=in1=(10 11 12 13) - cvtdq2ps xmm3,xmm3 ; xmm3=in3=(30 31 32 33) - - punpcklwd xmm5,xmm5 ; xmm5=(50 50 51 51 52 52 53 53) - punpcklwd xmm1,xmm1 ; xmm1=(70 70 71 71 72 72 73 73) - psrad xmm5,(DWORD_BIT-WORD_BIT) ; xmm5=in5=(50 51 52 53) - psrad xmm1,(DWORD_BIT-WORD_BIT) ; xmm1=in7=(70 71 72 73) - cvtdq2ps xmm5,xmm5 ; xmm5=in5=(50 51 52 53) - cvtdq2ps xmm1,xmm1 ; xmm1=in7=(70 71 72 73) - - mulps xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] - mulps xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] - mulps xmm5, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] - mulps xmm1, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] - - movaps xmm4,xmm2 - movaps xmm0,xmm5 - addps xmm2,xmm1 ; xmm2=z11 - addps xmm5,xmm3 ; xmm5=z13 - subps xmm4,xmm1 ; xmm4=z12 - subps xmm0,xmm3 ; xmm0=z10 - - movaps xmm1,xmm2 - subps xmm2,xmm5 - addps xmm1,xmm5 ; xmm1=tmp7 - - mulps xmm2,[rel PD_1_414] ; xmm2=tmp11 - - movaps xmm3,xmm0 - addps xmm0,xmm4 - mulps xmm0,[rel PD_1_847] ; xmm0=z5 - mulps xmm3,[rel PD_M2_613] ; xmm3=(z10 * -2.613125930) - mulps xmm4,[rel PD_1_082] ; xmm4=(z12 * 1.082392200) - addps xmm3,xmm0 ; xmm3=tmp12 - subps xmm4,xmm0 ; xmm4=tmp10 - - ; -- Final output stage - - subps xmm3,xmm1 ; xmm3=tmp6 - movaps xmm5,xmm6 - movaps xmm0,xmm7 - addps xmm6,xmm1 ; xmm6=data0=(00 01 02 03) - addps xmm7,xmm3 ; xmm7=data1=(10 11 12 13) - subps xmm5,xmm1 ; xmm5=data7=(70 71 72 73) - subps xmm0,xmm3 ; xmm0=data6=(60 61 62 63) - subps xmm2,xmm3 ; xmm2=tmp5 - - movaps xmm1,xmm6 ; transpose coefficients(phase 1) - unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) - unpckhps xmm1,xmm7 ; xmm1=(02 12 03 13) - movaps xmm3,xmm0 ; transpose coefficients(phase 1) - unpcklps xmm0,xmm5 ; xmm0=(60 70 61 71) - unpckhps xmm3,xmm5 ; xmm3=(62 72 63 73) - - movaps xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 - movaps xmm5, XMMWORD [wk(1)] ; xmm5=tmp3 - - movaps XMMWORD [wk(0)], xmm0 ; wk(0)=(60 70 61 71) - movaps XMMWORD [wk(1)], xmm3 ; wk(1)=(62 72 63 73) - - addps xmm4,xmm2 ; xmm4=tmp4 - movaps xmm0,xmm7 - movaps xmm3,xmm5 - addps xmm7,xmm2 ; xmm7=data2=(20 21 22 23) - addps xmm5,xmm4 ; xmm5=data4=(40 41 42 43) - subps xmm0,xmm2 ; xmm0=data5=(50 51 52 53) - subps xmm3,xmm4 ; xmm3=data3=(30 31 32 33) - - movaps xmm2,xmm7 ; transpose coefficients(phase 1) - unpcklps xmm7,xmm3 ; xmm7=(20 30 21 31) - unpckhps xmm2,xmm3 ; xmm2=(22 32 23 33) - movaps xmm4,xmm5 ; transpose coefficients(phase 1) - unpcklps xmm5,xmm0 ; xmm5=(40 50 41 51) - unpckhps xmm4,xmm0 ; xmm4=(42 52 43 53) - - movaps xmm3,xmm6 ; transpose coefficients(phase 2) - unpcklps2 xmm6,xmm7 ; xmm6=(00 10 20 30) - unpckhps2 xmm3,xmm7 ; xmm3=(01 11 21 31) - movaps xmm0,xmm1 ; transpose coefficients(phase 2) - unpcklps2 xmm1,xmm2 ; xmm1=(02 12 22 32) - unpckhps2 xmm0,xmm2 ; xmm0=(03 13 23 33) - - movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71) - movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73) - - movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm6 - movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3 - movaps XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm1 - movaps XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm0 - - movaps xmm6,xmm5 ; transpose coefficients(phase 2) - unpcklps2 xmm5,xmm7 ; xmm5=(40 50 60 70) - unpckhps2 xmm6,xmm7 ; xmm6=(41 51 61 71) - movaps xmm3,xmm4 ; transpose coefficients(phase 2) - unpcklps2 xmm4,xmm2 ; xmm4=(42 52 62 72) - unpckhps2 xmm3,xmm2 ; xmm3=(43 53 63 73) - - movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm5 - movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm6 - movaps XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm4 - movaps XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3 - -.nextcolumn: - add rsi, byte 4*SIZEOF_JCOEF ; coef_block - add rdx, byte 4*SIZEOF_FLOAT_MULT_TYPE ; quantptr - add rdi, 4*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr - dec rcx ; ctr - jnz near .columnloop - - ; -- Prefetch the next coefficient block - - prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] - prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] - prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] - prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] - - ; ---- Pass 2: process rows from work array, store into output array. - - mov rax, [original_rbp] - lea rsi, [workspace] ; FAST_FLOAT *wsptr - mov rdi, r12 ; (JSAMPROW *) - mov eax, r13d - mov rcx, DCTSIZE/4 ; ctr -.rowloop: - - ; -- Even part - - movaps xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_FAST_FLOAT)] - movaps xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_FAST_FLOAT)] - - movaps xmm4,xmm0 - movaps xmm5,xmm1 - subps xmm0,xmm2 ; xmm0=tmp11 - subps xmm1,xmm3 - addps xmm4,xmm2 ; xmm4=tmp10 - addps xmm5,xmm3 ; xmm5=tmp13 - - mulps xmm1,[rel PD_1_414] - subps xmm1,xmm5 ; xmm1=tmp12 - - movaps xmm6,xmm4 - movaps xmm7,xmm0 - subps xmm4,xmm5 ; xmm4=tmp3 - subps xmm0,xmm1 ; xmm0=tmp2 - addps xmm6,xmm5 ; xmm6=tmp0 - addps xmm7,xmm1 ; xmm7=tmp1 - - movaps XMMWORD [wk(1)], xmm4 ; tmp3 - movaps XMMWORD [wk(0)], xmm0 ; tmp2 - - ; -- Odd part - - movaps xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_FAST_FLOAT)] - movaps xmm5, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_FAST_FLOAT)] - - movaps xmm4,xmm2 - movaps xmm0,xmm5 - addps xmm2,xmm1 ; xmm2=z11 - addps xmm5,xmm3 ; xmm5=z13 - subps xmm4,xmm1 ; xmm4=z12 - subps xmm0,xmm3 ; xmm0=z10 - - movaps xmm1,xmm2 - subps xmm2,xmm5 - addps xmm1,xmm5 ; xmm1=tmp7 - - mulps xmm2,[rel PD_1_414] ; xmm2=tmp11 - - movaps xmm3,xmm0 - addps xmm0,xmm4 - mulps xmm0,[rel PD_1_847] ; xmm0=z5 - mulps xmm3,[rel PD_M2_613] ; xmm3=(z10 * -2.613125930) - mulps xmm4,[rel PD_1_082] ; xmm4=(z12 * 1.082392200) - addps xmm3,xmm0 ; xmm3=tmp12 - subps xmm4,xmm0 ; xmm4=tmp10 - - ; -- Final output stage - - subps xmm3,xmm1 ; xmm3=tmp6 - movaps xmm5,xmm6 - movaps xmm0,xmm7 - addps xmm6,xmm1 ; xmm6=data0=(00 10 20 30) - addps xmm7,xmm3 ; xmm7=data1=(01 11 21 31) - subps xmm5,xmm1 ; xmm5=data7=(07 17 27 37) - subps xmm0,xmm3 ; xmm0=data6=(06 16 26 36) - subps xmm2,xmm3 ; xmm2=tmp5 - - movaps xmm1,[rel PD_RNDINT_MAGIC] ; xmm1=[rel PD_RNDINT_MAGIC] - pcmpeqd xmm3,xmm3 - psrld xmm3,WORD_BIT ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..} - - addps xmm6,xmm1 ; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **) - addps xmm7,xmm1 ; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **) - addps xmm0,xmm1 ; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **) - addps xmm5,xmm1 ; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **) - - pand xmm6,xmm3 ; xmm6=(00 -- 10 -- 20 -- 30 --) - pslld xmm7,WORD_BIT ; xmm7=(-- 01 -- 11 -- 21 -- 31) - pand xmm0,xmm3 ; xmm0=(06 -- 16 -- 26 -- 36 --) - pslld xmm5,WORD_BIT ; xmm5=(-- 07 -- 17 -- 27 -- 37) - por xmm6,xmm7 ; xmm6=(00 01 10 11 20 21 30 31) - por xmm0,xmm5 ; xmm0=(06 07 16 17 26 27 36 37) - - movaps xmm1, XMMWORD [wk(0)] ; xmm1=tmp2 - movaps xmm3, XMMWORD [wk(1)] ; xmm3=tmp3 - - addps xmm4,xmm2 ; xmm4=tmp4 - movaps xmm7,xmm1 - movaps xmm5,xmm3 - addps xmm1,xmm2 ; xmm1=data2=(02 12 22 32) - addps xmm3,xmm4 ; xmm3=data4=(04 14 24 34) - subps xmm7,xmm2 ; xmm7=data5=(05 15 25 35) - subps xmm5,xmm4 ; xmm5=data3=(03 13 23 33) - - movaps xmm2,[rel PD_RNDINT_MAGIC] ; xmm2=[rel PD_RNDINT_MAGIC] - pcmpeqd xmm4,xmm4 - psrld xmm4,WORD_BIT ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..} - - addps xmm3,xmm2 ; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **) - addps xmm7,xmm2 ; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **) - addps xmm1,xmm2 ; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **) - addps xmm5,xmm2 ; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **) - - pand xmm3,xmm4 ; xmm3=(04 -- 14 -- 24 -- 34 --) - pslld xmm7,WORD_BIT ; xmm7=(-- 05 -- 15 -- 25 -- 35) - pand xmm1,xmm4 ; xmm1=(02 -- 12 -- 22 -- 32 --) - pslld xmm5,WORD_BIT ; xmm5=(-- 03 -- 13 -- 23 -- 33) - por xmm3,xmm7 ; xmm3=(04 05 14 15 24 25 34 35) - por xmm1,xmm5 ; xmm1=(02 03 12 13 22 23 32 33) - - movdqa xmm2,[rel PB_CENTERJSAMP] ; xmm2=[rel PB_CENTERJSAMP] - - packsswb xmm6,xmm3 ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35) - packsswb xmm1,xmm0 ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37) - paddb xmm6,xmm2 - paddb xmm1,xmm2 - - movdqa xmm4,xmm6 ; transpose coefficients(phase 2) - punpcklwd xmm6,xmm1 ; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) - punpckhwd xmm4,xmm1 ; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) - - movdqa xmm7,xmm6 ; transpose coefficients(phase 3) - punpckldq xmm6,xmm4 ; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) - punpckhdq xmm7,xmm4 ; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) - - pshufd xmm5,xmm6,0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) - pshufd xmm3,xmm7,0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) - - mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] - mov rbx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW] - movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6 - movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm7 - mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] - mov rbx, JSAMPROW [rdi+3*SIZEOF_JSAMPROW] - movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5 - movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm3 - - add rsi, byte 4*SIZEOF_FAST_FLOAT ; wsptr - add rdi, byte 4*SIZEOF_JSAMPROW - dec rcx ; ctr - jnz near .rowloop - - pop rbx - uncollect_args - mov rsp,rbp ; rsp <- aligned rbp - pop rsp ; rsp <- original rbp - pop rbp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jidctflt-sse2.asm b/simd/jidctflt-sse2.asm deleted file mode 100644 index a15a9c1..0000000 --- a/simd/jidctflt-sse2.asm +++ /dev/null @@ -1,497 +0,0 @@ -; -; jidctflt.asm - floating-point IDCT (SSE & SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a floating-point implementation of the inverse DCT -; (Discrete Cosine Transform). The following code is based directly on -; the IJG's original jidctflt.c; see the jidctflt.c for more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) - shufps %1,%2,0x44 -%endmacro - -%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) - shufps %1,%2,0xEE -%endmacro - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_idct_float_sse2) - -EXTN(jconst_idct_float_sse2): - -PD_1_414 times 4 dd 1.414213562373095048801689 -PD_1_847 times 4 dd 1.847759065022573512256366 -PD_1_082 times 4 dd 1.082392200292393968799446 -PD_M2_613 times 4 dd -2.613125929752753055713286 -PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3) -PB_CENTERJSAMP times 16 db CENTERJSAMPLE - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Perform dequantization and inverse DCT on one block of coefficients. -; -; GLOBAL(void) -; jsimd_idct_float_sse2 (void *dct_table, JCOEFPTR coef_block, -; JSAMPARRAY output_buf, JDIMENSION output_col) -; - -%define dct_table(b) (b)+8 ; void *dct_table -%define coef_block(b) (b)+12 ; JCOEFPTR coef_block -%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf -%define output_col(b) (b)+20 ; JDIMENSION output_col - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 2 -%define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT - ; FAST_FLOAT workspace[DCTSIZE2] - - align 16 - global EXTN(jsimd_idct_float_sse2) - -EXTN(jsimd_idct_float_sse2): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [workspace] - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process columns from input, store into work array. - -; mov eax, [original_ebp] - mov edx, POINTER [dct_table(eax)] ; quantptr - mov esi, JCOEFPTR [coef_block(eax)] ; inptr - lea edi, [workspace] ; FAST_FLOAT *wsptr - mov ecx, DCTSIZE/4 ; ctr - alignx 16,7 -.columnloop: -%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] - jnz near .columnDCT - - movq xmm1, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movq xmm2, XMM_MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] - movq xmm3, XMM_MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] - movq xmm4, XMM_MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] - movq xmm5, XMM_MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] - movq xmm6, XMM_MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] - movq xmm7, XMM_MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] - por xmm1,xmm2 - por xmm3,xmm4 - por xmm5,xmm6 - por xmm1,xmm3 - por xmm5,xmm7 - por xmm1,xmm5 - packsswb xmm1,xmm1 - movd eax,xmm1 - test eax,eax - jnz short .columnDCT - - ; -- AC terms all zero - - movq xmm0, XMM_MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] - - punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) - psrad xmm0,(DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) - cvtdq2ps xmm0,xmm0 ; xmm0=in0=(00 01 02 03) - - mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - - movaps xmm1,xmm0 - movaps xmm2,xmm0 - movaps xmm3,xmm0 - - shufps xmm0,xmm0,0x00 ; xmm0=(00 00 00 00) - shufps xmm1,xmm1,0x55 ; xmm1=(01 01 01 01) - shufps xmm2,xmm2,0xAA ; xmm2=(02 02 02 02) - shufps xmm3,xmm3,0xFF ; xmm3=(03 03 03 03) - - movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0 - movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0 - movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1 - movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1 - movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2 - movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2 - movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3 - movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 - jmp near .nextcolumn - alignx 16,7 -%endif -.columnDCT: - - ; -- Even part - - movq xmm0, XMM_MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] - movq xmm1, XMM_MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] - movq xmm2, XMM_MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] - movq xmm3, XMM_MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] - - punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) - punpcklwd xmm1,xmm1 ; xmm1=(20 20 21 21 22 22 23 23) - psrad xmm0,(DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) - psrad xmm1,(DWORD_BIT-WORD_BIT) ; xmm1=in2=(20 21 22 23) - cvtdq2ps xmm0,xmm0 ; xmm0=in0=(00 01 02 03) - cvtdq2ps xmm1,xmm1 ; xmm1=in2=(20 21 22 23) - - punpcklwd xmm2,xmm2 ; xmm2=(40 40 41 41 42 42 43 43) - punpcklwd xmm3,xmm3 ; xmm3=(60 60 61 61 62 62 63 63) - psrad xmm2,(DWORD_BIT-WORD_BIT) ; xmm2=in4=(40 41 42 43) - psrad xmm3,(DWORD_BIT-WORD_BIT) ; xmm3=in6=(60 61 62 63) - cvtdq2ps xmm2,xmm2 ; xmm2=in4=(40 41 42 43) - cvtdq2ps xmm3,xmm3 ; xmm3=in6=(60 61 62 63) - - mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - mulps xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - mulps xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - mulps xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - - movaps xmm4,xmm0 - movaps xmm5,xmm1 - subps xmm0,xmm2 ; xmm0=tmp11 - subps xmm1,xmm3 - addps xmm4,xmm2 ; xmm4=tmp10 - addps xmm5,xmm3 ; xmm5=tmp13 - - mulps xmm1,[GOTOFF(ebx,PD_1_414)] - subps xmm1,xmm5 ; xmm1=tmp12 - - movaps xmm6,xmm4 - movaps xmm7,xmm0 - subps xmm4,xmm5 ; xmm4=tmp3 - subps xmm0,xmm1 ; xmm0=tmp2 - addps xmm6,xmm5 ; xmm6=tmp0 - addps xmm7,xmm1 ; xmm7=tmp1 - - movaps XMMWORD [wk(1)], xmm4 ; tmp3 - movaps XMMWORD [wk(0)], xmm0 ; tmp2 - - ; -- Odd part - - movq xmm2, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movq xmm3, XMM_MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] - movq xmm5, XMM_MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] - movq xmm1, XMM_MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] - - punpcklwd xmm2,xmm2 ; xmm2=(10 10 11 11 12 12 13 13) - punpcklwd xmm3,xmm3 ; xmm3=(30 30 31 31 32 32 33 33) - psrad xmm2,(DWORD_BIT-WORD_BIT) ; xmm2=in1=(10 11 12 13) - psrad xmm3,(DWORD_BIT-WORD_BIT) ; xmm3=in3=(30 31 32 33) - cvtdq2ps xmm2,xmm2 ; xmm2=in1=(10 11 12 13) - cvtdq2ps xmm3,xmm3 ; xmm3=in3=(30 31 32 33) - - punpcklwd xmm5,xmm5 ; xmm5=(50 50 51 51 52 52 53 53) - punpcklwd xmm1,xmm1 ; xmm1=(70 70 71 71 72 72 73 73) - psrad xmm5,(DWORD_BIT-WORD_BIT) ; xmm5=in5=(50 51 52 53) - psrad xmm1,(DWORD_BIT-WORD_BIT) ; xmm1=in7=(70 71 72 73) - cvtdq2ps xmm5,xmm5 ; xmm5=in5=(50 51 52 53) - cvtdq2ps xmm1,xmm1 ; xmm1=in7=(70 71 72 73) - - mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - mulps xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - mulps xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - mulps xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)] - - movaps xmm4,xmm2 - movaps xmm0,xmm5 - addps xmm2,xmm1 ; xmm2=z11 - addps xmm5,xmm3 ; xmm5=z13 - subps xmm4,xmm1 ; xmm4=z12 - subps xmm0,xmm3 ; xmm0=z10 - - movaps xmm1,xmm2 - subps xmm2,xmm5 - addps xmm1,xmm5 ; xmm1=tmp7 - - mulps xmm2,[GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 - - movaps xmm3,xmm0 - addps xmm0,xmm4 - mulps xmm0,[GOTOFF(ebx,PD_1_847)] ; xmm0=z5 - mulps xmm3,[GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) - mulps xmm4,[GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) - addps xmm3,xmm0 ; xmm3=tmp12 - subps xmm4,xmm0 ; xmm4=tmp10 - - ; -- Final output stage - - subps xmm3,xmm1 ; xmm3=tmp6 - movaps xmm5,xmm6 - movaps xmm0,xmm7 - addps xmm6,xmm1 ; xmm6=data0=(00 01 02 03) - addps xmm7,xmm3 ; xmm7=data1=(10 11 12 13) - subps xmm5,xmm1 ; xmm5=data7=(70 71 72 73) - subps xmm0,xmm3 ; xmm0=data6=(60 61 62 63) - subps xmm2,xmm3 ; xmm2=tmp5 - - movaps xmm1,xmm6 ; transpose coefficients(phase 1) - unpcklps xmm6,xmm7 ; xmm6=(00 10 01 11) - unpckhps xmm1,xmm7 ; xmm1=(02 12 03 13) - movaps xmm3,xmm0 ; transpose coefficients(phase 1) - unpcklps xmm0,xmm5 ; xmm0=(60 70 61 71) - unpckhps xmm3,xmm5 ; xmm3=(62 72 63 73) - - movaps xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 - movaps xmm5, XMMWORD [wk(1)] ; xmm5=tmp3 - - movaps XMMWORD [wk(0)], xmm0 ; wk(0)=(60 70 61 71) - movaps XMMWORD [wk(1)], xmm3 ; wk(1)=(62 72 63 73) - - addps xmm4,xmm2 ; xmm4=tmp4 - movaps xmm0,xmm7 - movaps xmm3,xmm5 - addps xmm7,xmm2 ; xmm7=data2=(20 21 22 23) - addps xmm5,xmm4 ; xmm5=data4=(40 41 42 43) - subps xmm0,xmm2 ; xmm0=data5=(50 51 52 53) - subps xmm3,xmm4 ; xmm3=data3=(30 31 32 33) - - movaps xmm2,xmm7 ; transpose coefficients(phase 1) - unpcklps xmm7,xmm3 ; xmm7=(20 30 21 31) - unpckhps xmm2,xmm3 ; xmm2=(22 32 23 33) - movaps xmm4,xmm5 ; transpose coefficients(phase 1) - unpcklps xmm5,xmm0 ; xmm5=(40 50 41 51) - unpckhps xmm4,xmm0 ; xmm4=(42 52 43 53) - - movaps xmm3,xmm6 ; transpose coefficients(phase 2) - unpcklps2 xmm6,xmm7 ; xmm6=(00 10 20 30) - unpckhps2 xmm3,xmm7 ; xmm3=(01 11 21 31) - movaps xmm0,xmm1 ; transpose coefficients(phase 2) - unpcklps2 xmm1,xmm2 ; xmm1=(02 12 22 32) - unpckhps2 xmm0,xmm2 ; xmm0=(03 13 23 33) - - movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71) - movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73) - - movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6 - movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3 - movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1 - movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0 - - movaps xmm6,xmm5 ; transpose coefficients(phase 2) - unpcklps2 xmm5,xmm7 ; xmm5=(40 50 60 70) - unpckhps2 xmm6,xmm7 ; xmm6=(41 51 61 71) - movaps xmm3,xmm4 ; transpose coefficients(phase 2) - unpcklps2 xmm4,xmm2 ; xmm4=(42 52 62 72) - unpckhps2 xmm3,xmm2 ; xmm3=(43 53 63 73) - - movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5 - movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6 - movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4 - movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3 - -.nextcolumn: - add esi, byte 4*SIZEOF_JCOEF ; coef_block - add edx, byte 4*SIZEOF_FLOAT_MULT_TYPE ; quantptr - add edi, 4*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr - dec ecx ; ctr - jnz near .columnloop - - ; -- Prefetch the next coefficient block - - prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] - prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] - prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] - prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] - - ; ---- Pass 2: process rows from work array, store into output array. - - mov eax, [original_ebp] - lea esi, [workspace] ; FAST_FLOAT *wsptr - mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) - mov eax, JDIMENSION [output_col(eax)] - mov ecx, DCTSIZE/4 ; ctr - alignx 16,7 -.rowloop: - - ; -- Even part - - movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)] - movaps xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)] - - movaps xmm4,xmm0 - movaps xmm5,xmm1 - subps xmm0,xmm2 ; xmm0=tmp11 - subps xmm1,xmm3 - addps xmm4,xmm2 ; xmm4=tmp10 - addps xmm5,xmm3 ; xmm5=tmp13 - - mulps xmm1,[GOTOFF(ebx,PD_1_414)] - subps xmm1,xmm5 ; xmm1=tmp12 - - movaps xmm6,xmm4 - movaps xmm7,xmm0 - subps xmm4,xmm5 ; xmm4=tmp3 - subps xmm0,xmm1 ; xmm0=tmp2 - addps xmm6,xmm5 ; xmm6=tmp0 - addps xmm7,xmm1 ; xmm7=tmp1 - - movaps XMMWORD [wk(1)], xmm4 ; tmp3 - movaps XMMWORD [wk(0)], xmm0 ; tmp2 - - ; -- Odd part - - movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)] - movaps xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)] - - movaps xmm4,xmm2 - movaps xmm0,xmm5 - addps xmm2,xmm1 ; xmm2=z11 - addps xmm5,xmm3 ; xmm5=z13 - subps xmm4,xmm1 ; xmm4=z12 - subps xmm0,xmm3 ; xmm0=z10 - - movaps xmm1,xmm2 - subps xmm2,xmm5 - addps xmm1,xmm5 ; xmm1=tmp7 - - mulps xmm2,[GOTOFF(ebx,PD_1_414)] ; xmm2=tmp11 - - movaps xmm3,xmm0 - addps xmm0,xmm4 - mulps xmm0,[GOTOFF(ebx,PD_1_847)] ; xmm0=z5 - mulps xmm3,[GOTOFF(ebx,PD_M2_613)] ; xmm3=(z10 * -2.613125930) - mulps xmm4,[GOTOFF(ebx,PD_1_082)] ; xmm4=(z12 * 1.082392200) - addps xmm3,xmm0 ; xmm3=tmp12 - subps xmm4,xmm0 ; xmm4=tmp10 - - ; -- Final output stage - - subps xmm3,xmm1 ; xmm3=tmp6 - movaps xmm5,xmm6 - movaps xmm0,xmm7 - addps xmm6,xmm1 ; xmm6=data0=(00 10 20 30) - addps xmm7,xmm3 ; xmm7=data1=(01 11 21 31) - subps xmm5,xmm1 ; xmm5=data7=(07 17 27 37) - subps xmm0,xmm3 ; xmm0=data6=(06 16 26 36) - subps xmm2,xmm3 ; xmm2=tmp5 - - movaps xmm1,[GOTOFF(ebx,PD_RNDINT_MAGIC)] ; xmm1=[PD_RNDINT_MAGIC] - pcmpeqd xmm3,xmm3 - psrld xmm3,WORD_BIT ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..} - - addps xmm6,xmm1 ; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **) - addps xmm7,xmm1 ; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **) - addps xmm0,xmm1 ; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **) - addps xmm5,xmm1 ; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **) - - pand xmm6,xmm3 ; xmm6=(00 -- 10 -- 20 -- 30 --) - pslld xmm7,WORD_BIT ; xmm7=(-- 01 -- 11 -- 21 -- 31) - pand xmm0,xmm3 ; xmm0=(06 -- 16 -- 26 -- 36 --) - pslld xmm5,WORD_BIT ; xmm5=(-- 07 -- 17 -- 27 -- 37) - por xmm6,xmm7 ; xmm6=(00 01 10 11 20 21 30 31) - por xmm0,xmm5 ; xmm0=(06 07 16 17 26 27 36 37) - - movaps xmm1, XMMWORD [wk(0)] ; xmm1=tmp2 - movaps xmm3, XMMWORD [wk(1)] ; xmm3=tmp3 - - addps xmm4,xmm2 ; xmm4=tmp4 - movaps xmm7,xmm1 - movaps xmm5,xmm3 - addps xmm1,xmm2 ; xmm1=data2=(02 12 22 32) - addps xmm3,xmm4 ; xmm3=data4=(04 14 24 34) - subps xmm7,xmm2 ; xmm7=data5=(05 15 25 35) - subps xmm5,xmm4 ; xmm5=data3=(03 13 23 33) - - movaps xmm2,[GOTOFF(ebx,PD_RNDINT_MAGIC)] ; xmm2=[PD_RNDINT_MAGIC] - pcmpeqd xmm4,xmm4 - psrld xmm4,WORD_BIT ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..} - - addps xmm3,xmm2 ; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **) - addps xmm7,xmm2 ; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **) - addps xmm1,xmm2 ; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **) - addps xmm5,xmm2 ; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **) - - pand xmm3,xmm4 ; xmm3=(04 -- 14 -- 24 -- 34 --) - pslld xmm7,WORD_BIT ; xmm7=(-- 05 -- 15 -- 25 -- 35) - pand xmm1,xmm4 ; xmm1=(02 -- 12 -- 22 -- 32 --) - pslld xmm5,WORD_BIT ; xmm5=(-- 03 -- 13 -- 23 -- 33) - por xmm3,xmm7 ; xmm3=(04 05 14 15 24 25 34 35) - por xmm1,xmm5 ; xmm1=(02 03 12 13 22 23 32 33) - - movdqa xmm2,[GOTOFF(ebx,PB_CENTERJSAMP)] ; xmm2=[PB_CENTERJSAMP] - - packsswb xmm6,xmm3 ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35) - packsswb xmm1,xmm0 ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37) - paddb xmm6,xmm2 - paddb xmm1,xmm2 - - movdqa xmm4,xmm6 ; transpose coefficients(phase 2) - punpcklwd xmm6,xmm1 ; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) - punpckhwd xmm4,xmm1 ; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) - - movdqa xmm7,xmm6 ; transpose coefficients(phase 3) - punpckldq xmm6,xmm4 ; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) - punpckhdq xmm7,xmm4 ; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) - - pshufd xmm5,xmm6,0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) - pshufd xmm3,xmm7,0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) - - pushpic ebx ; save GOT address - - mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] - mov ebx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] - movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6 - movq XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm7 - mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] - mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] - movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5 - movq XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm3 - - poppic ebx ; restore GOT address - - add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr - add edi, byte 4*SIZEOF_JSAMPROW - dec ecx ; ctr - jnz near .rowloop - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jidctfst-mmx.asm b/simd/jidctfst-mmx.asm deleted file mode 100644 index 6e95bfb..0000000 --- a/simd/jidctfst-mmx.asm +++ /dev/null @@ -1,499 +0,0 @@ -; -; jidctfst.asm - fast integer IDCT (MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a fast, not so accurate integer implementation of -; the inverse DCT (Discrete Cosine Transform). The following code is -; based directly on the IJG's original jidctfst.c; see the jidctfst.c -; for more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%define CONST_BITS 8 ; 14 is also OK. -%define PASS1_BITS 2 - -%if IFAST_SCALE_BITS != PASS1_BITS -%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'." -%endif - -%if CONST_BITS == 8 -F_1_082 equ 277 ; FIX(1.082392200) -F_1_414 equ 362 ; FIX(1.414213562) -F_1_847 equ 473 ; FIX(1.847759065) -F_2_613 equ 669 ; FIX(2.613125930) -F_1_613 equ (F_2_613 - 256) ; FIX(2.613125930) - FIX(1) -%else -; NASM cannot do compile-time arithmetic on floating-point constants. -%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) -F_1_082 equ DESCALE(1162209775,30-CONST_BITS) ; FIX(1.082392200) -F_1_414 equ DESCALE(1518500249,30-CONST_BITS) ; FIX(1.414213562) -F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) -F_2_613 equ DESCALE(2805822602,30-CONST_BITS) ; FIX(2.613125930) -F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1) -%endif - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - -; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) -; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) - -%define PRE_MULTIPLY_SCALE_BITS 2 -%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) - - alignz 16 - global EXTN(jconst_idct_ifast_mmx) - -EXTN(jconst_idct_ifast_mmx): - -PW_F1414 times 4 dw F_1_414 << CONST_SHIFT -PW_F1847 times 4 dw F_1_847 << CONST_SHIFT -PW_MF1613 times 4 dw -F_1_613 << CONST_SHIFT -PW_F1082 times 4 dw F_1_082 << CONST_SHIFT -PB_CENTERJSAMP times 8 db CENTERJSAMPLE - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Perform dequantization and inverse DCT on one block of coefficients. -; -; GLOBAL(void) -; jsimd_idct_ifast_mmx (void *dct_table, JCOEFPTR coef_block, -; JSAMPARRAY output_buf, JDIMENSION output_col) -; - -%define dct_table(b) (b)+8 ; jpeg_component_info *compptr -%define coef_block(b) (b)+12 ; JCOEFPTR coef_block -%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf -%define output_col(b) (b)+20 ; JDIMENSION output_col - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] -%define WK_NUM 2 -%define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF - ; JCOEF workspace[DCTSIZE2] - - align 16 - global EXTN(jsimd_idct_ifast_mmx) - -EXTN(jsimd_idct_ifast_mmx): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [workspace] - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process columns from input, store into work array. - -; mov eax, [original_ebp] - mov edx, POINTER [dct_table(eax)] ; quantptr - mov esi, JCOEFPTR [coef_block(eax)] ; inptr - lea edi, [workspace] ; JCOEF *wsptr - mov ecx, DCTSIZE/4 ; ctr - alignx 16,7 -.columnloop: -%ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] - jnz short .columnDCT - - movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] - por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] - por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] - por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] - por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] - por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] - por mm1,mm0 - packsswb mm1,mm1 - movd eax,mm1 - test eax,eax - jnz short .columnDCT - - ; -- AC terms all zero - - movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] - pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] - - movq mm2,mm0 ; mm0=in0=(00 01 02 03) - punpcklwd mm0,mm0 ; mm0=(00 00 01 01) - punpckhwd mm2,mm2 ; mm2=(02 02 03 03) - - movq mm1,mm0 - punpckldq mm0,mm0 ; mm0=(00 00 00 00) - punpckhdq mm1,mm1 ; mm1=(01 01 01 01) - movq mm3,mm2 - punpckldq mm2,mm2 ; mm2=(02 02 02 02) - punpckhdq mm3,mm3 ; mm3=(03 03 03 03) - - movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 - movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0 - movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 - movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1 - movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 - movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2 - movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 - movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3 - jmp near .nextcolumn - alignx 16,7 -%endif -.columnDCT: - - ; -- Even part - - movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] - pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] - pmullw mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)] - movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] - movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] - pmullw mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)] - pmullw mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)] - - movq mm4,mm0 - movq mm5,mm1 - psubw mm0,mm2 ; mm0=tmp11 - psubw mm1,mm3 - paddw mm4,mm2 ; mm4=tmp10 - paddw mm5,mm3 ; mm5=tmp13 - - psllw mm1,PRE_MULTIPLY_SCALE_BITS - pmulhw mm1,[GOTOFF(ebx,PW_F1414)] - psubw mm1,mm5 ; mm1=tmp12 - - movq mm6,mm4 - movq mm7,mm0 - psubw mm4,mm5 ; mm4=tmp3 - psubw mm0,mm1 ; mm0=tmp2 - paddw mm6,mm5 ; mm6=tmp0 - paddw mm7,mm1 ; mm7=tmp1 - - movq MMWORD [wk(1)], mm4 ; wk(1)=tmp3 - movq MMWORD [wk(0)], mm0 ; wk(0)=tmp2 - - ; -- Odd part - - movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] - pmullw mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)] - pmullw mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)] - movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] - pmullw mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)] - pmullw mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)] - - movq mm4,mm2 - movq mm0,mm5 - psubw mm2,mm1 ; mm2=z12 - psubw mm5,mm3 ; mm5=z10 - paddw mm4,mm1 ; mm4=z11 - paddw mm0,mm3 ; mm0=z13 - - movq mm1,mm5 ; mm1=z10(unscaled) - psllw mm2,PRE_MULTIPLY_SCALE_BITS - psllw mm5,PRE_MULTIPLY_SCALE_BITS - - movq mm3,mm4 - psubw mm4,mm0 - paddw mm3,mm0 ; mm3=tmp7 - - psllw mm4,PRE_MULTIPLY_SCALE_BITS - pmulhw mm4,[GOTOFF(ebx,PW_F1414)] ; mm4=tmp11 - - ; To avoid overflow... - ; - ; (Original) - ; tmp12 = -2.613125930 * z10 + z5; - ; - ; (This implementation) - ; tmp12 = (-1.613125930 - 1) * z10 + z5; - ; = -1.613125930 * z10 - z10 + z5; - - movq mm0,mm5 - paddw mm5,mm2 - pmulhw mm5,[GOTOFF(ebx,PW_F1847)] ; mm5=z5 - pmulhw mm0,[GOTOFF(ebx,PW_MF1613)] - pmulhw mm2,[GOTOFF(ebx,PW_F1082)] - psubw mm0,mm1 - psubw mm2,mm5 ; mm2=tmp10 - paddw mm0,mm5 ; mm0=tmp12 - - ; -- Final output stage - - psubw mm0,mm3 ; mm0=tmp6 - movq mm1,mm6 - movq mm5,mm7 - paddw mm6,mm3 ; mm6=data0=(00 01 02 03) - paddw mm7,mm0 ; mm7=data1=(10 11 12 13) - psubw mm1,mm3 ; mm1=data7=(70 71 72 73) - psubw mm5,mm0 ; mm5=data6=(60 61 62 63) - psubw mm4,mm0 ; mm4=tmp5 - - movq mm3,mm6 ; transpose coefficients(phase 1) - punpcklwd mm6,mm7 ; mm6=(00 10 01 11) - punpckhwd mm3,mm7 ; mm3=(02 12 03 13) - movq mm0,mm5 ; transpose coefficients(phase 1) - punpcklwd mm5,mm1 ; mm5=(60 70 61 71) - punpckhwd mm0,mm1 ; mm0=(62 72 63 73) - - movq mm7, MMWORD [wk(0)] ; mm7=tmp2 - movq mm1, MMWORD [wk(1)] ; mm1=tmp3 - - movq MMWORD [wk(0)], mm5 ; wk(0)=(60 70 61 71) - movq MMWORD [wk(1)], mm0 ; wk(1)=(62 72 63 73) - - paddw mm2,mm4 ; mm2=tmp4 - movq mm5,mm7 - movq mm0,mm1 - paddw mm7,mm4 ; mm7=data2=(20 21 22 23) - paddw mm1,mm2 ; mm1=data4=(40 41 42 43) - psubw mm5,mm4 ; mm5=data5=(50 51 52 53) - psubw mm0,mm2 ; mm0=data3=(30 31 32 33) - - movq mm4,mm7 ; transpose coefficients(phase 1) - punpcklwd mm7,mm0 ; mm7=(20 30 21 31) - punpckhwd mm4,mm0 ; mm4=(22 32 23 33) - movq mm2,mm1 ; transpose coefficients(phase 1) - punpcklwd mm1,mm5 ; mm1=(40 50 41 51) - punpckhwd mm2,mm5 ; mm2=(42 52 43 53) - - movq mm0,mm6 ; transpose coefficients(phase 2) - punpckldq mm6,mm7 ; mm6=(00 10 20 30) - punpckhdq mm0,mm7 ; mm0=(01 11 21 31) - movq mm5,mm3 ; transpose coefficients(phase 2) - punpckldq mm3,mm4 ; mm3=(02 12 22 32) - punpckhdq mm5,mm4 ; mm5=(03 13 23 33) - - movq mm7, MMWORD [wk(0)] ; mm7=(60 70 61 71) - movq mm4, MMWORD [wk(1)] ; mm4=(62 72 63 73) - - movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm6 - movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0 - movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm3 - movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5 - - movq mm6,mm1 ; transpose coefficients(phase 2) - punpckldq mm1,mm7 ; mm1=(40 50 60 70) - punpckhdq mm6,mm7 ; mm6=(41 51 61 71) - movq mm0,mm2 ; transpose coefficients(phase 2) - punpckldq mm2,mm4 ; mm2=(42 52 62 72) - punpckhdq mm0,mm4 ; mm0=(43 53 63 73) - - movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1 - movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm6 - movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2 - movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm0 - -.nextcolumn: - add esi, byte 4*SIZEOF_JCOEF ; coef_block - add edx, byte 4*SIZEOF_IFAST_MULT_TYPE ; quantptr - add edi, byte 4*DCTSIZE*SIZEOF_JCOEF ; wsptr - dec ecx ; ctr - jnz near .columnloop - - ; ---- Pass 2: process rows from work array, store into output array. - - mov eax, [original_ebp] - lea esi, [workspace] ; JCOEF *wsptr - mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) - mov eax, JDIMENSION [output_col(eax)] - mov ecx, DCTSIZE/4 ; ctr - alignx 16,7 -.rowloop: - - ; -- Even part - - movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] - movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] - movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] - - movq mm4,mm0 - movq mm5,mm1 - psubw mm0,mm2 ; mm0=tmp11 - psubw mm1,mm3 - paddw mm4,mm2 ; mm4=tmp10 - paddw mm5,mm3 ; mm5=tmp13 - - psllw mm1,PRE_MULTIPLY_SCALE_BITS - pmulhw mm1,[GOTOFF(ebx,PW_F1414)] - psubw mm1,mm5 ; mm1=tmp12 - - movq mm6,mm4 - movq mm7,mm0 - psubw mm4,mm5 ; mm4=tmp3 - psubw mm0,mm1 ; mm0=tmp2 - paddw mm6,mm5 ; mm6=tmp0 - paddw mm7,mm1 ; mm7=tmp1 - - movq MMWORD [wk(1)], mm4 ; wk(1)=tmp3 - movq MMWORD [wk(0)], mm0 ; wk(0)=tmp2 - - ; -- Odd part - - movq mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movq mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] - movq mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] - - movq mm4,mm2 - movq mm0,mm5 - psubw mm2,mm1 ; mm2=z12 - psubw mm5,mm3 ; mm5=z10 - paddw mm4,mm1 ; mm4=z11 - paddw mm0,mm3 ; mm0=z13 - - movq mm1,mm5 ; mm1=z10(unscaled) - psllw mm2,PRE_MULTIPLY_SCALE_BITS - psllw mm5,PRE_MULTIPLY_SCALE_BITS - - movq mm3,mm4 - psubw mm4,mm0 - paddw mm3,mm0 ; mm3=tmp7 - - psllw mm4,PRE_MULTIPLY_SCALE_BITS - pmulhw mm4,[GOTOFF(ebx,PW_F1414)] ; mm4=tmp11 - - ; To avoid overflow... - ; - ; (Original) - ; tmp12 = -2.613125930 * z10 + z5; - ; - ; (This implementation) - ; tmp12 = (-1.613125930 - 1) * z10 + z5; - ; = -1.613125930 * z10 - z10 + z5; - - movq mm0,mm5 - paddw mm5,mm2 - pmulhw mm5,[GOTOFF(ebx,PW_F1847)] ; mm5=z5 - pmulhw mm0,[GOTOFF(ebx,PW_MF1613)] - pmulhw mm2,[GOTOFF(ebx,PW_F1082)] - psubw mm0,mm1 - psubw mm2,mm5 ; mm2=tmp10 - paddw mm0,mm5 ; mm0=tmp12 - - ; -- Final output stage - - psubw mm0,mm3 ; mm0=tmp6 - movq mm1,mm6 - movq mm5,mm7 - paddw mm6,mm3 ; mm6=data0=(00 10 20 30) - paddw mm7,mm0 ; mm7=data1=(01 11 21 31) - psraw mm6,(PASS1_BITS+3) ; descale - psraw mm7,(PASS1_BITS+3) ; descale - psubw mm1,mm3 ; mm1=data7=(07 17 27 37) - psubw mm5,mm0 ; mm5=data6=(06 16 26 36) - psraw mm1,(PASS1_BITS+3) ; descale - psraw mm5,(PASS1_BITS+3) ; descale - psubw mm4,mm0 ; mm4=tmp5 - - packsswb mm6,mm5 ; mm6=(00 10 20 30 06 16 26 36) - packsswb mm7,mm1 ; mm7=(01 11 21 31 07 17 27 37) - - movq mm3, MMWORD [wk(0)] ; mm3=tmp2 - movq mm0, MMWORD [wk(1)] ; mm0=tmp3 - - paddw mm2,mm4 ; mm2=tmp4 - movq mm5,mm3 - movq mm1,mm0 - paddw mm3,mm4 ; mm3=data2=(02 12 22 32) - paddw mm0,mm2 ; mm0=data4=(04 14 24 34) - psraw mm3,(PASS1_BITS+3) ; descale - psraw mm0,(PASS1_BITS+3) ; descale - psubw mm5,mm4 ; mm5=data5=(05 15 25 35) - psubw mm1,mm2 ; mm1=data3=(03 13 23 33) - psraw mm5,(PASS1_BITS+3) ; descale - psraw mm1,(PASS1_BITS+3) ; descale - - movq mm4,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm4=[PB_CENTERJSAMP] - - packsswb mm3,mm0 ; mm3=(02 12 22 32 04 14 24 34) - packsswb mm1,mm5 ; mm1=(03 13 23 33 05 15 25 35) - - paddb mm6,mm4 - paddb mm7,mm4 - paddb mm3,mm4 - paddb mm1,mm4 - - movq mm2,mm6 ; transpose coefficients(phase 1) - punpcklbw mm6,mm7 ; mm6=(00 01 10 11 20 21 30 31) - punpckhbw mm2,mm7 ; mm2=(06 07 16 17 26 27 36 37) - movq mm0,mm3 ; transpose coefficients(phase 1) - punpcklbw mm3,mm1 ; mm3=(02 03 12 13 22 23 32 33) - punpckhbw mm0,mm1 ; mm0=(04 05 14 15 24 25 34 35) - - movq mm5,mm6 ; transpose coefficients(phase 2) - punpcklwd mm6,mm3 ; mm6=(00 01 02 03 10 11 12 13) - punpckhwd mm5,mm3 ; mm5=(20 21 22 23 30 31 32 33) - movq mm4,mm0 ; transpose coefficients(phase 2) - punpcklwd mm0,mm2 ; mm0=(04 05 06 07 14 15 16 17) - punpckhwd mm4,mm2 ; mm4=(24 25 26 27 34 35 36 37) - - movq mm7,mm6 ; transpose coefficients(phase 3) - punpckldq mm6,mm0 ; mm6=(00 01 02 03 04 05 06 07) - punpckhdq mm7,mm0 ; mm7=(10 11 12 13 14 15 16 17) - movq mm1,mm5 ; transpose coefficients(phase 3) - punpckldq mm5,mm4 ; mm5=(20 21 22 23 24 25 26 27) - punpckhdq mm1,mm4 ; mm1=(30 31 32 33 34 35 36 37) - - pushpic ebx ; save GOT address - - mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] - mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] - movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6 - movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7 - mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] - mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] - movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5 - movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1 - - poppic ebx ; restore GOT address - - add esi, byte 4*SIZEOF_JCOEF ; wsptr - add edi, byte 4*SIZEOF_JSAMPROW - dec ecx ; ctr - jnz near .rowloop - - emms ; empty MMX state - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jidctfst-sse2-64.asm b/simd/jidctfst-sse2-64.asm deleted file mode 100644 index 4884642..0000000 --- a/simd/jidctfst-sse2-64.asm +++ /dev/null @@ -1,491 +0,0 @@ -; -; jidctfst.asm - fast integer IDCT (64-bit SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a fast, not so accurate integer implementation of -; the inverse DCT (Discrete Cosine Transform). The following code is -; based directly on the IJG's original jidctfst.c; see the jidctfst.c -; for more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%define CONST_BITS 8 ; 14 is also OK. -%define PASS1_BITS 2 - -%if IFAST_SCALE_BITS != PASS1_BITS -%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'." -%endif - -%if CONST_BITS == 8 -F_1_082 equ 277 ; FIX(1.082392200) -F_1_414 equ 362 ; FIX(1.414213562) -F_1_847 equ 473 ; FIX(1.847759065) -F_2_613 equ 669 ; FIX(2.613125930) -F_1_613 equ (F_2_613 - 256) ; FIX(2.613125930) - FIX(1) -%else -; NASM cannot do compile-time arithmetic on floating-point constants. -%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) -F_1_082 equ DESCALE(1162209775,30-CONST_BITS) ; FIX(1.082392200) -F_1_414 equ DESCALE(1518500249,30-CONST_BITS) ; FIX(1.414213562) -F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) -F_2_613 equ DESCALE(2805822602,30-CONST_BITS) ; FIX(2.613125930) -F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1) -%endif - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - -; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) -; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) - -%define PRE_MULTIPLY_SCALE_BITS 2 -%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) - - alignz 16 - global EXTN(jconst_idct_ifast_sse2) - -EXTN(jconst_idct_ifast_sse2): - -PW_F1414 times 8 dw F_1_414 << CONST_SHIFT -PW_F1847 times 8 dw F_1_847 << CONST_SHIFT -PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT -PW_F1082 times 8 dw F_1_082 << CONST_SHIFT -PB_CENTERJSAMP times 16 db CENTERJSAMPLE - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 64 -; -; Perform dequantization and inverse DCT on one block of coefficients. -; -; GLOBAL(void) -; jsimd_idct_ifast_sse2 (void *dct_table, JCOEFPTR coef_block, -; JSAMPARRAY output_buf, JDIMENSION output_col) -; - -; r10 = jpeg_component_info *compptr -; r11 = JCOEFPTR coef_block -; r12 = JSAMPARRAY output_buf -; r13 = JDIMENSION output_col - -%define original_rbp rbp+0 -%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 2 - - align 16 - global EXTN(jsimd_idct_ifast_sse2) - -EXTN(jsimd_idct_ifast_sse2): - push rbp - mov rax,rsp ; rax = original rbp - sub rsp, byte 4 - and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [rsp],rax - mov rbp,rsp ; rbp = aligned rbp - lea rsp, [wk(0)] - collect_args - - ; ---- Pass 1: process columns from input. - - mov rdx, r10 ; quantptr - mov rsi, r11 ; inptr - -%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2 - mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] - jnz near .columnDCT - - movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] - movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] - por xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] - por xmm1, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)] - por xmm0, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] - por xmm1, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] - por xmm0, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] - por xmm1,xmm0 - packsswb xmm1,xmm1 - packsswb xmm1,xmm1 - movd eax,xmm1 - test rax,rax - jnz short .columnDCT - - ; -- AC terms all zero - - movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] - pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - - movdqa xmm7,xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) - punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) - punpckhwd xmm7,xmm7 ; xmm7=(04 04 05 05 06 06 07 07) - - pshufd xmm6,xmm0,0x00 ; xmm6=col0=(00 00 00 00 00 00 00 00) - pshufd xmm2,xmm0,0x55 ; xmm2=col1=(01 01 01 01 01 01 01 01) - pshufd xmm5,xmm0,0xAA ; xmm5=col2=(02 02 02 02 02 02 02 02) - pshufd xmm0,xmm0,0xFF ; xmm0=col3=(03 03 03 03 03 03 03 03) - pshufd xmm1,xmm7,0x00 ; xmm1=col4=(04 04 04 04 04 04 04 04) - pshufd xmm4,xmm7,0x55 ; xmm4=col5=(05 05 05 05 05 05 05 05) - pshufd xmm3,xmm7,0xAA ; xmm3=col6=(06 06 06 06 06 06 06 06) - pshufd xmm7,xmm7,0xFF ; xmm7=col7=(07 07 07 07 07 07 07 07) - - movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=col1 - movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=col3 - jmp near .column_end -%endif -.columnDCT: - - ; -- Even part - - movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] - movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] - pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_IFAST_MULT_TYPE)] - pmullw xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_IFAST_MULT_TYPE)] - movdqa xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)] - movdqa xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] - pmullw xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_IFAST_MULT_TYPE)] - pmullw xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_IFAST_MULT_TYPE)] - - movdqa xmm4,xmm0 - movdqa xmm5,xmm1 - psubw xmm0,xmm2 ; xmm0=tmp11 - psubw xmm1,xmm3 - paddw xmm4,xmm2 ; xmm4=tmp10 - paddw xmm5,xmm3 ; xmm5=tmp13 - - psllw xmm1,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm1,[rel PW_F1414] - psubw xmm1,xmm5 ; xmm1=tmp12 - - movdqa xmm6,xmm4 - movdqa xmm7,xmm0 - psubw xmm4,xmm5 ; xmm4=tmp3 - psubw xmm0,xmm1 ; xmm0=tmp2 - paddw xmm6,xmm5 ; xmm6=tmp0 - paddw xmm7,xmm1 ; xmm7=tmp1 - - movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=tmp3 - movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=tmp2 - - ; -- Odd part - - movdqa xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] - movdqa xmm3, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] - pmullw xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_IFAST_MULT_TYPE)] - pmullw xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_IFAST_MULT_TYPE)] - movdqa xmm5, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] - movdqa xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] - pmullw xmm5, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_IFAST_MULT_TYPE)] - pmullw xmm1, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_IFAST_MULT_TYPE)] - - movdqa xmm4,xmm2 - movdqa xmm0,xmm5 - psubw xmm2,xmm1 ; xmm2=z12 - psubw xmm5,xmm3 ; xmm5=z10 - paddw xmm4,xmm1 ; xmm4=z11 - paddw xmm0,xmm3 ; xmm0=z13 - - movdqa xmm1,xmm5 ; xmm1=z10(unscaled) - psllw xmm2,PRE_MULTIPLY_SCALE_BITS - psllw xmm5,PRE_MULTIPLY_SCALE_BITS - - movdqa xmm3,xmm4 - psubw xmm4,xmm0 - paddw xmm3,xmm0 ; xmm3=tmp7 - - psllw xmm4,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm4,[rel PW_F1414] ; xmm4=tmp11 - - ; To avoid overflow... - ; - ; (Original) - ; tmp12 = -2.613125930 * z10 + z5; - ; - ; (This implementation) - ; tmp12 = (-1.613125930 - 1) * z10 + z5; - ; = -1.613125930 * z10 - z10 + z5; - - movdqa xmm0,xmm5 - paddw xmm5,xmm2 - pmulhw xmm5,[rel PW_F1847] ; xmm5=z5 - pmulhw xmm0,[rel PW_MF1613] - pmulhw xmm2,[rel PW_F1082] - psubw xmm0,xmm1 - psubw xmm2,xmm5 ; xmm2=tmp10 - paddw xmm0,xmm5 ; xmm0=tmp12 - - ; -- Final output stage - - psubw xmm0,xmm3 ; xmm0=tmp6 - movdqa xmm1,xmm6 - movdqa xmm5,xmm7 - paddw xmm6,xmm3 ; xmm6=data0=(00 01 02 03 04 05 06 07) - paddw xmm7,xmm0 ; xmm7=data1=(10 11 12 13 14 15 16 17) - psubw xmm1,xmm3 ; xmm1=data7=(70 71 72 73 74 75 76 77) - psubw xmm5,xmm0 ; xmm5=data6=(60 61 62 63 64 65 66 67) - psubw xmm4,xmm0 ; xmm4=tmp5 - - movdqa xmm3,xmm6 ; transpose coefficients(phase 1) - punpcklwd xmm6,xmm7 ; xmm6=(00 10 01 11 02 12 03 13) - punpckhwd xmm3,xmm7 ; xmm3=(04 14 05 15 06 16 07 17) - movdqa xmm0,xmm5 ; transpose coefficients(phase 1) - punpcklwd xmm5,xmm1 ; xmm5=(60 70 61 71 62 72 63 73) - punpckhwd xmm0,xmm1 ; xmm0=(64 74 65 75 66 76 67 77) - - movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 - movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp3 - - movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(60 70 61 71 62 72 63 73) - movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(64 74 65 75 66 76 67 77) - - paddw xmm2,xmm4 ; xmm2=tmp4 - movdqa xmm5,xmm7 - movdqa xmm0,xmm1 - paddw xmm7,xmm4 ; xmm7=data2=(20 21 22 23 24 25 26 27) - paddw xmm1,xmm2 ; xmm1=data4=(40 41 42 43 44 45 46 47) - psubw xmm5,xmm4 ; xmm5=data5=(50 51 52 53 54 55 56 57) - psubw xmm0,xmm2 ; xmm0=data3=(30 31 32 33 34 35 36 37) - - movdqa xmm4,xmm7 ; transpose coefficients(phase 1) - punpcklwd xmm7,xmm0 ; xmm7=(20 30 21 31 22 32 23 33) - punpckhwd xmm4,xmm0 ; xmm4=(24 34 25 35 26 36 27 37) - movdqa xmm2,xmm1 ; transpose coefficients(phase 1) - punpcklwd xmm1,xmm5 ; xmm1=(40 50 41 51 42 52 43 53) - punpckhwd xmm2,xmm5 ; xmm2=(44 54 45 55 46 56 47 57) - - movdqa xmm0,xmm3 ; transpose coefficients(phase 2) - punpckldq xmm3,xmm4 ; xmm3=(04 14 24 34 05 15 25 35) - punpckhdq xmm0,xmm4 ; xmm0=(06 16 26 36 07 17 27 37) - movdqa xmm5,xmm6 ; transpose coefficients(phase 2) - punpckldq xmm6,xmm7 ; xmm6=(00 10 20 30 01 11 21 31) - punpckhdq xmm5,xmm7 ; xmm5=(02 12 22 32 03 13 23 33) - - movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(60 70 61 71 62 72 63 73) - movdqa xmm7, XMMWORD [wk(1)] ; xmm7=(64 74 65 75 66 76 67 77) - - movdqa XMMWORD [wk(0)], xmm3 ; wk(0)=(04 14 24 34 05 15 25 35) - movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(06 16 26 36 07 17 27 37) - - movdqa xmm3,xmm1 ; transpose coefficients(phase 2) - punpckldq xmm1,xmm4 ; xmm1=(40 50 60 70 41 51 61 71) - punpckhdq xmm3,xmm4 ; xmm3=(42 52 62 72 43 53 63 73) - movdqa xmm0,xmm2 ; transpose coefficients(phase 2) - punpckldq xmm2,xmm7 ; xmm2=(44 54 64 74 45 55 65 75) - punpckhdq xmm0,xmm7 ; xmm0=(46 56 66 76 47 57 67 77) - - movdqa xmm4,xmm6 ; transpose coefficients(phase 3) - punpcklqdq xmm6,xmm1 ; xmm6=col0=(00 10 20 30 40 50 60 70) - punpckhqdq xmm4,xmm1 ; xmm4=col1=(01 11 21 31 41 51 61 71) - movdqa xmm7,xmm5 ; transpose coefficients(phase 3) - punpcklqdq xmm5,xmm3 ; xmm5=col2=(02 12 22 32 42 52 62 72) - punpckhqdq xmm7,xmm3 ; xmm7=col3=(03 13 23 33 43 53 63 73) - - movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(04 14 24 34 05 15 25 35) - movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(06 16 26 36 07 17 27 37) - - movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=col1 - movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=col3 - - movdqa xmm4,xmm1 ; transpose coefficients(phase 3) - punpcklqdq xmm1,xmm2 ; xmm1=col4=(04 14 24 34 44 54 64 74) - punpckhqdq xmm4,xmm2 ; xmm4=col5=(05 15 25 35 45 55 65 75) - movdqa xmm7,xmm3 ; transpose coefficients(phase 3) - punpcklqdq xmm3,xmm0 ; xmm3=col6=(06 16 26 36 46 56 66 76) - punpckhqdq xmm7,xmm0 ; xmm7=col7=(07 17 27 37 47 57 67 77) -.column_end: - - ; -- Prefetch the next coefficient block - - prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] - prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] - prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] - prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] - - ; ---- Pass 2: process rows from work array, store into output array. - - mov rax, [original_rbp] - mov rdi, r12 ; (JSAMPROW *) - mov eax, r13d - - ; -- Even part - - ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6 - - movdqa xmm2,xmm6 - movdqa xmm0,xmm5 - psubw xmm6,xmm1 ; xmm6=tmp11 - psubw xmm5,xmm3 - paddw xmm2,xmm1 ; xmm2=tmp10 - paddw xmm0,xmm3 ; xmm0=tmp13 - - psllw xmm5,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm5,[rel PW_F1414] - psubw xmm5,xmm0 ; xmm5=tmp12 - - movdqa xmm1,xmm2 - movdqa xmm3,xmm6 - psubw xmm2,xmm0 ; xmm2=tmp3 - psubw xmm6,xmm5 ; xmm6=tmp2 - paddw xmm1,xmm0 ; xmm1=tmp0 - paddw xmm3,xmm5 ; xmm3=tmp1 - - movdqa xmm0, XMMWORD [wk(0)] ; xmm0=col1 - movdqa xmm5, XMMWORD [wk(1)] ; xmm5=col3 - - movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp3 - movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp2 - - ; -- Odd part - - ; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7 - - movdqa xmm2,xmm0 - movdqa xmm6,xmm4 - psubw xmm0,xmm7 ; xmm0=z12 - psubw xmm4,xmm5 ; xmm4=z10 - paddw xmm2,xmm7 ; xmm2=z11 - paddw xmm6,xmm5 ; xmm6=z13 - - movdqa xmm7,xmm4 ; xmm7=z10(unscaled) - psllw xmm0,PRE_MULTIPLY_SCALE_BITS - psllw xmm4,PRE_MULTIPLY_SCALE_BITS - - movdqa xmm5,xmm2 - psubw xmm2,xmm6 - paddw xmm5,xmm6 ; xmm5=tmp7 - - psllw xmm2,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm2,[rel PW_F1414] ; xmm2=tmp11 - - ; To avoid overflow... - ; - ; (Original) - ; tmp12 = -2.613125930 * z10 + z5; - ; - ; (This implementation) - ; tmp12 = (-1.613125930 - 1) * z10 + z5; - ; = -1.613125930 * z10 - z10 + z5; - - movdqa xmm6,xmm4 - paddw xmm4,xmm0 - pmulhw xmm4,[rel PW_F1847] ; xmm4=z5 - pmulhw xmm6,[rel PW_MF1613] - pmulhw xmm0,[rel PW_F1082] - psubw xmm6,xmm7 - psubw xmm0,xmm4 ; xmm0=tmp10 - paddw xmm6,xmm4 ; xmm6=tmp12 - - ; -- Final output stage - - psubw xmm6,xmm5 ; xmm6=tmp6 - movdqa xmm7,xmm1 - movdqa xmm4,xmm3 - paddw xmm1,xmm5 ; xmm1=data0=(00 10 20 30 40 50 60 70) - paddw xmm3,xmm6 ; xmm3=data1=(01 11 21 31 41 51 61 71) - psraw xmm1,(PASS1_BITS+3) ; descale - psraw xmm3,(PASS1_BITS+3) ; descale - psubw xmm7,xmm5 ; xmm7=data7=(07 17 27 37 47 57 67 77) - psubw xmm4,xmm6 ; xmm4=data6=(06 16 26 36 46 56 66 76) - psraw xmm7,(PASS1_BITS+3) ; descale - psraw xmm4,(PASS1_BITS+3) ; descale - psubw xmm2,xmm6 ; xmm2=tmp5 - - packsswb xmm1,xmm4 ; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) - packsswb xmm3,xmm7 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) - - movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp2 - movdqa xmm6, XMMWORD [wk(0)] ; xmm6=tmp3 - - paddw xmm0,xmm2 ; xmm0=tmp4 - movdqa xmm4,xmm5 - movdqa xmm7,xmm6 - paddw xmm5,xmm2 ; xmm5=data2=(02 12 22 32 42 52 62 72) - paddw xmm6,xmm0 ; xmm6=data4=(04 14 24 34 44 54 64 74) - psraw xmm5,(PASS1_BITS+3) ; descale - psraw xmm6,(PASS1_BITS+3) ; descale - psubw xmm4,xmm2 ; xmm4=data5=(05 15 25 35 45 55 65 75) - psubw xmm7,xmm0 ; xmm7=data3=(03 13 23 33 43 53 63 73) - psraw xmm4,(PASS1_BITS+3) ; descale - psraw xmm7,(PASS1_BITS+3) ; descale - - movdqa xmm2,[rel PB_CENTERJSAMP] ; xmm2=[rel PB_CENTERJSAMP] - - packsswb xmm5,xmm6 ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) - packsswb xmm7,xmm4 ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) - - paddb xmm1,xmm2 - paddb xmm3,xmm2 - paddb xmm5,xmm2 - paddb xmm7,xmm2 - - movdqa xmm0,xmm1 ; transpose coefficients(phase 1) - punpcklbw xmm1,xmm3 ; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) - punpckhbw xmm0,xmm3 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) - movdqa xmm6,xmm5 ; transpose coefficients(phase 1) - punpcklbw xmm5,xmm7 ; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) - punpckhbw xmm6,xmm7 ; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) - - movdqa xmm4,xmm1 ; transpose coefficients(phase 2) - punpcklwd xmm1,xmm5 ; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) - punpckhwd xmm4,xmm5 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) - movdqa xmm2,xmm6 ; transpose coefficients(phase 2) - punpcklwd xmm6,xmm0 ; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) - punpckhwd xmm2,xmm0 ; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) - - movdqa xmm3,xmm1 ; transpose coefficients(phase 3) - punpckldq xmm1,xmm6 ; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) - punpckhdq xmm3,xmm6 ; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) - movdqa xmm7,xmm4 ; transpose coefficients(phase 3) - punpckldq xmm4,xmm2 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) - punpckhdq xmm7,xmm2 ; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) - - pshufd xmm5,xmm1,0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) - pshufd xmm0,xmm3,0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) - pshufd xmm6,xmm4,0x4E ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) - pshufd xmm2,xmm7,0x4E ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) - - mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] - mov rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW] - movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1 - movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3 - mov rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW] - mov rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW] - movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4 - movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7 - - mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] - mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW] - movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5 - movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0 - mov rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW] - mov rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW] - movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6 - movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2 - - uncollect_args - mov rsp,rbp ; rsp <- aligned rbp - pop rsp ; rsp <- original rbp - pop rbp - ret - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jidctfst-sse2.asm b/simd/jidctfst-sse2.asm deleted file mode 100644 index f591e55..0000000 --- a/simd/jidctfst-sse2.asm +++ /dev/null @@ -1,501 +0,0 @@ -; -; jidctfst.asm - fast integer IDCT (SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a fast, not so accurate integer implementation of -; the inverse DCT (Discrete Cosine Transform). The following code is -; based directly on the IJG's original jidctfst.c; see the jidctfst.c -; for more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%define CONST_BITS 8 ; 14 is also OK. -%define PASS1_BITS 2 - -%if IFAST_SCALE_BITS != PASS1_BITS -%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'." -%endif - -%if CONST_BITS == 8 -F_1_082 equ 277 ; FIX(1.082392200) -F_1_414 equ 362 ; FIX(1.414213562) -F_1_847 equ 473 ; FIX(1.847759065) -F_2_613 equ 669 ; FIX(2.613125930) -F_1_613 equ (F_2_613 - 256) ; FIX(2.613125930) - FIX(1) -%else -; NASM cannot do compile-time arithmetic on floating-point constants. -%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) -F_1_082 equ DESCALE(1162209775,30-CONST_BITS) ; FIX(1.082392200) -F_1_414 equ DESCALE(1518500249,30-CONST_BITS) ; FIX(1.414213562) -F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) -F_2_613 equ DESCALE(2805822602,30-CONST_BITS) ; FIX(2.613125930) -F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1) -%endif - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - -; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) -; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) - -%define PRE_MULTIPLY_SCALE_BITS 2 -%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) - - alignz 16 - global EXTN(jconst_idct_ifast_sse2) - -EXTN(jconst_idct_ifast_sse2): - -PW_F1414 times 8 dw F_1_414 << CONST_SHIFT -PW_F1847 times 8 dw F_1_847 << CONST_SHIFT -PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT -PW_F1082 times 8 dw F_1_082 << CONST_SHIFT -PB_CENTERJSAMP times 16 db CENTERJSAMPLE - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Perform dequantization and inverse DCT on one block of coefficients. -; -; GLOBAL(void) -; jsimd_idct_ifast_sse2 (void *dct_table, JCOEFPTR coef_block, -; JSAMPARRAY output_buf, JDIMENSION output_col) -; - -%define dct_table(b) (b)+8 ; jpeg_component_info *compptr -%define coef_block(b) (b)+12 ; JCOEFPTR coef_block -%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf -%define output_col(b) (b)+20 ; JDIMENSION output_col - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 2 - - align 16 - global EXTN(jsimd_idct_ifast_sse2) - -EXTN(jsimd_idct_ifast_sse2): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic ebx -; push ecx ; unused -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process columns from input. - -; mov eax, [original_ebp] - mov edx, POINTER [dct_table(eax)] ; quantptr - mov esi, JCOEFPTR [coef_block(eax)] ; inptr - -%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2 - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] - jnz near .columnDCT - - movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] - por xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] - por xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] - por xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] - por xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] - por xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] - por xmm1,xmm0 - packsswb xmm1,xmm1 - packsswb xmm1,xmm1 - movd eax,xmm1 - test eax,eax - jnz short .columnDCT - - ; -- AC terms all zero - - movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] - pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - - movdqa xmm7,xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) - punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) - punpckhwd xmm7,xmm7 ; xmm7=(04 04 05 05 06 06 07 07) - - pshufd xmm6,xmm0,0x00 ; xmm6=col0=(00 00 00 00 00 00 00 00) - pshufd xmm2,xmm0,0x55 ; xmm2=col1=(01 01 01 01 01 01 01 01) - pshufd xmm5,xmm0,0xAA ; xmm5=col2=(02 02 02 02 02 02 02 02) - pshufd xmm0,xmm0,0xFF ; xmm0=col3=(03 03 03 03 03 03 03 03) - pshufd xmm1,xmm7,0x00 ; xmm1=col4=(04 04 04 04 04 04 04 04) - pshufd xmm4,xmm7,0x55 ; xmm4=col5=(05 05 05 05 05 05 05 05) - pshufd xmm3,xmm7,0xAA ; xmm3=col6=(06 06 06 06 06 06 06 06) - pshufd xmm7,xmm7,0xFF ; xmm7=col7=(07 07 07 07 07 07 07 07) - - movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=col1 - movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=col3 - jmp near .column_end - alignx 16,7 -%endif -.columnDCT: - - ; -- Even part - - movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] - movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] - pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)] - pmullw xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)] - movdqa xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] - movdqa xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] - pmullw xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)] - pmullw xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)] - - movdqa xmm4,xmm0 - movdqa xmm5,xmm1 - psubw xmm0,xmm2 ; xmm0=tmp11 - psubw xmm1,xmm3 - paddw xmm4,xmm2 ; xmm4=tmp10 - paddw xmm5,xmm3 ; xmm5=tmp13 - - psllw xmm1,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm1,[GOTOFF(ebx,PW_F1414)] - psubw xmm1,xmm5 ; xmm1=tmp12 - - movdqa xmm6,xmm4 - movdqa xmm7,xmm0 - psubw xmm4,xmm5 ; xmm4=tmp3 - psubw xmm0,xmm1 ; xmm0=tmp2 - paddw xmm6,xmm5 ; xmm6=tmp0 - paddw xmm7,xmm1 ; xmm7=tmp1 - - movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=tmp3 - movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=tmp2 - - ; -- Odd part - - movdqa xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movdqa xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] - pmullw xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)] - pmullw xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)] - movdqa xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] - movdqa xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] - pmullw xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)] - pmullw xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)] - - movdqa xmm4,xmm2 - movdqa xmm0,xmm5 - psubw xmm2,xmm1 ; xmm2=z12 - psubw xmm5,xmm3 ; xmm5=z10 - paddw xmm4,xmm1 ; xmm4=z11 - paddw xmm0,xmm3 ; xmm0=z13 - - movdqa xmm1,xmm5 ; xmm1=z10(unscaled) - psllw xmm2,PRE_MULTIPLY_SCALE_BITS - psllw xmm5,PRE_MULTIPLY_SCALE_BITS - - movdqa xmm3,xmm4 - psubw xmm4,xmm0 - paddw xmm3,xmm0 ; xmm3=tmp7 - - psllw xmm4,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm4,[GOTOFF(ebx,PW_F1414)] ; xmm4=tmp11 - - ; To avoid overflow... - ; - ; (Original) - ; tmp12 = -2.613125930 * z10 + z5; - ; - ; (This implementation) - ; tmp12 = (-1.613125930 - 1) * z10 + z5; - ; = -1.613125930 * z10 - z10 + z5; - - movdqa xmm0,xmm5 - paddw xmm5,xmm2 - pmulhw xmm5,[GOTOFF(ebx,PW_F1847)] ; xmm5=z5 - pmulhw xmm0,[GOTOFF(ebx,PW_MF1613)] - pmulhw xmm2,[GOTOFF(ebx,PW_F1082)] - psubw xmm0,xmm1 - psubw xmm2,xmm5 ; xmm2=tmp10 - paddw xmm0,xmm5 ; xmm0=tmp12 - - ; -- Final output stage - - psubw xmm0,xmm3 ; xmm0=tmp6 - movdqa xmm1,xmm6 - movdqa xmm5,xmm7 - paddw xmm6,xmm3 ; xmm6=data0=(00 01 02 03 04 05 06 07) - paddw xmm7,xmm0 ; xmm7=data1=(10 11 12 13 14 15 16 17) - psubw xmm1,xmm3 ; xmm1=data7=(70 71 72 73 74 75 76 77) - psubw xmm5,xmm0 ; xmm5=data6=(60 61 62 63 64 65 66 67) - psubw xmm4,xmm0 ; xmm4=tmp5 - - movdqa xmm3,xmm6 ; transpose coefficients(phase 1) - punpcklwd xmm6,xmm7 ; xmm6=(00 10 01 11 02 12 03 13) - punpckhwd xmm3,xmm7 ; xmm3=(04 14 05 15 06 16 07 17) - movdqa xmm0,xmm5 ; transpose coefficients(phase 1) - punpcklwd xmm5,xmm1 ; xmm5=(60 70 61 71 62 72 63 73) - punpckhwd xmm0,xmm1 ; xmm0=(64 74 65 75 66 76 67 77) - - movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 - movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp3 - - movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(60 70 61 71 62 72 63 73) - movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(64 74 65 75 66 76 67 77) - - paddw xmm2,xmm4 ; xmm2=tmp4 - movdqa xmm5,xmm7 - movdqa xmm0,xmm1 - paddw xmm7,xmm4 ; xmm7=data2=(20 21 22 23 24 25 26 27) - paddw xmm1,xmm2 ; xmm1=data4=(40 41 42 43 44 45 46 47) - psubw xmm5,xmm4 ; xmm5=data5=(50 51 52 53 54 55 56 57) - psubw xmm0,xmm2 ; xmm0=data3=(30 31 32 33 34 35 36 37) - - movdqa xmm4,xmm7 ; transpose coefficients(phase 1) - punpcklwd xmm7,xmm0 ; xmm7=(20 30 21 31 22 32 23 33) - punpckhwd xmm4,xmm0 ; xmm4=(24 34 25 35 26 36 27 37) - movdqa xmm2,xmm1 ; transpose coefficients(phase 1) - punpcklwd xmm1,xmm5 ; xmm1=(40 50 41 51 42 52 43 53) - punpckhwd xmm2,xmm5 ; xmm2=(44 54 45 55 46 56 47 57) - - movdqa xmm0,xmm3 ; transpose coefficients(phase 2) - punpckldq xmm3,xmm4 ; xmm3=(04 14 24 34 05 15 25 35) - punpckhdq xmm0,xmm4 ; xmm0=(06 16 26 36 07 17 27 37) - movdqa xmm5,xmm6 ; transpose coefficients(phase 2) - punpckldq xmm6,xmm7 ; xmm6=(00 10 20 30 01 11 21 31) - punpckhdq xmm5,xmm7 ; xmm5=(02 12 22 32 03 13 23 33) - - movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(60 70 61 71 62 72 63 73) - movdqa xmm7, XMMWORD [wk(1)] ; xmm7=(64 74 65 75 66 76 67 77) - - movdqa XMMWORD [wk(0)], xmm3 ; wk(0)=(04 14 24 34 05 15 25 35) - movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(06 16 26 36 07 17 27 37) - - movdqa xmm3,xmm1 ; transpose coefficients(phase 2) - punpckldq xmm1,xmm4 ; xmm1=(40 50 60 70 41 51 61 71) - punpckhdq xmm3,xmm4 ; xmm3=(42 52 62 72 43 53 63 73) - movdqa xmm0,xmm2 ; transpose coefficients(phase 2) - punpckldq xmm2,xmm7 ; xmm2=(44 54 64 74 45 55 65 75) - punpckhdq xmm0,xmm7 ; xmm0=(46 56 66 76 47 57 67 77) - - movdqa xmm4,xmm6 ; transpose coefficients(phase 3) - punpcklqdq xmm6,xmm1 ; xmm6=col0=(00 10 20 30 40 50 60 70) - punpckhqdq xmm4,xmm1 ; xmm4=col1=(01 11 21 31 41 51 61 71) - movdqa xmm7,xmm5 ; transpose coefficients(phase 3) - punpcklqdq xmm5,xmm3 ; xmm5=col2=(02 12 22 32 42 52 62 72) - punpckhqdq xmm7,xmm3 ; xmm7=col3=(03 13 23 33 43 53 63 73) - - movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(04 14 24 34 05 15 25 35) - movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(06 16 26 36 07 17 27 37) - - movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=col1 - movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=col3 - - movdqa xmm4,xmm1 ; transpose coefficients(phase 3) - punpcklqdq xmm1,xmm2 ; xmm1=col4=(04 14 24 34 44 54 64 74) - punpckhqdq xmm4,xmm2 ; xmm4=col5=(05 15 25 35 45 55 65 75) - movdqa xmm7,xmm3 ; transpose coefficients(phase 3) - punpcklqdq xmm3,xmm0 ; xmm3=col6=(06 16 26 36 46 56 66 76) - punpckhqdq xmm7,xmm0 ; xmm7=col7=(07 17 27 37 47 57 67 77) -.column_end: - - ; -- Prefetch the next coefficient block - - prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] - prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] - prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] - prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] - - ; ---- Pass 2: process rows from work array, store into output array. - - mov eax, [original_ebp] - mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) - mov eax, JDIMENSION [output_col(eax)] - - ; -- Even part - - ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6 - - movdqa xmm2,xmm6 - movdqa xmm0,xmm5 - psubw xmm6,xmm1 ; xmm6=tmp11 - psubw xmm5,xmm3 - paddw xmm2,xmm1 ; xmm2=tmp10 - paddw xmm0,xmm3 ; xmm0=tmp13 - - psllw xmm5,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm5,[GOTOFF(ebx,PW_F1414)] - psubw xmm5,xmm0 ; xmm5=tmp12 - - movdqa xmm1,xmm2 - movdqa xmm3,xmm6 - psubw xmm2,xmm0 ; xmm2=tmp3 - psubw xmm6,xmm5 ; xmm6=tmp2 - paddw xmm1,xmm0 ; xmm1=tmp0 - paddw xmm3,xmm5 ; xmm3=tmp1 - - movdqa xmm0, XMMWORD [wk(0)] ; xmm0=col1 - movdqa xmm5, XMMWORD [wk(1)] ; xmm5=col3 - - movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp3 - movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp2 - - ; -- Odd part - - ; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7 - - movdqa xmm2,xmm0 - movdqa xmm6,xmm4 - psubw xmm0,xmm7 ; xmm0=z12 - psubw xmm4,xmm5 ; xmm4=z10 - paddw xmm2,xmm7 ; xmm2=z11 - paddw xmm6,xmm5 ; xmm6=z13 - - movdqa xmm7,xmm4 ; xmm7=z10(unscaled) - psllw xmm0,PRE_MULTIPLY_SCALE_BITS - psllw xmm4,PRE_MULTIPLY_SCALE_BITS - - movdqa xmm5,xmm2 - psubw xmm2,xmm6 - paddw xmm5,xmm6 ; xmm5=tmp7 - - psllw xmm2,PRE_MULTIPLY_SCALE_BITS - pmulhw xmm2,[GOTOFF(ebx,PW_F1414)] ; xmm2=tmp11 - - ; To avoid overflow... - ; - ; (Original) - ; tmp12 = -2.613125930 * z10 + z5; - ; - ; (This implementation) - ; tmp12 = (-1.613125930 - 1) * z10 + z5; - ; = -1.613125930 * z10 - z10 + z5; - - movdqa xmm6,xmm4 - paddw xmm4,xmm0 - pmulhw xmm4,[GOTOFF(ebx,PW_F1847)] ; xmm4=z5 - pmulhw xmm6,[GOTOFF(ebx,PW_MF1613)] - pmulhw xmm0,[GOTOFF(ebx,PW_F1082)] - psubw xmm6,xmm7 - psubw xmm0,xmm4 ; xmm0=tmp10 - paddw xmm6,xmm4 ; xmm6=tmp12 - - ; -- Final output stage - - psubw xmm6,xmm5 ; xmm6=tmp6 - movdqa xmm7,xmm1 - movdqa xmm4,xmm3 - paddw xmm1,xmm5 ; xmm1=data0=(00 10 20 30 40 50 60 70) - paddw xmm3,xmm6 ; xmm3=data1=(01 11 21 31 41 51 61 71) - psraw xmm1,(PASS1_BITS+3) ; descale - psraw xmm3,(PASS1_BITS+3) ; descale - psubw xmm7,xmm5 ; xmm7=data7=(07 17 27 37 47 57 67 77) - psubw xmm4,xmm6 ; xmm4=data6=(06 16 26 36 46 56 66 76) - psraw xmm7,(PASS1_BITS+3) ; descale - psraw xmm4,(PASS1_BITS+3) ; descale - psubw xmm2,xmm6 ; xmm2=tmp5 - - packsswb xmm1,xmm4 ; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) - packsswb xmm3,xmm7 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) - - movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp2 - movdqa xmm6, XMMWORD [wk(0)] ; xmm6=tmp3 - - paddw xmm0,xmm2 ; xmm0=tmp4 - movdqa xmm4,xmm5 - movdqa xmm7,xmm6 - paddw xmm5,xmm2 ; xmm5=data2=(02 12 22 32 42 52 62 72) - paddw xmm6,xmm0 ; xmm6=data4=(04 14 24 34 44 54 64 74) - psraw xmm5,(PASS1_BITS+3) ; descale - psraw xmm6,(PASS1_BITS+3) ; descale - psubw xmm4,xmm2 ; xmm4=data5=(05 15 25 35 45 55 65 75) - psubw xmm7,xmm0 ; xmm7=data3=(03 13 23 33 43 53 63 73) - psraw xmm4,(PASS1_BITS+3) ; descale - psraw xmm7,(PASS1_BITS+3) ; descale - - movdqa xmm2,[GOTOFF(ebx,PB_CENTERJSAMP)] ; xmm2=[PB_CENTERJSAMP] - - packsswb xmm5,xmm6 ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) - packsswb xmm7,xmm4 ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) - - paddb xmm1,xmm2 - paddb xmm3,xmm2 - paddb xmm5,xmm2 - paddb xmm7,xmm2 - - movdqa xmm0,xmm1 ; transpose coefficients(phase 1) - punpcklbw xmm1,xmm3 ; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) - punpckhbw xmm0,xmm3 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) - movdqa xmm6,xmm5 ; transpose coefficients(phase 1) - punpcklbw xmm5,xmm7 ; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) - punpckhbw xmm6,xmm7 ; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) - - movdqa xmm4,xmm1 ; transpose coefficients(phase 2) - punpcklwd xmm1,xmm5 ; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) - punpckhwd xmm4,xmm5 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) - movdqa xmm2,xmm6 ; transpose coefficients(phase 2) - punpcklwd xmm6,xmm0 ; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) - punpckhwd xmm2,xmm0 ; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) - - movdqa xmm3,xmm1 ; transpose coefficients(phase 3) - punpckldq xmm1,xmm6 ; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) - punpckhdq xmm3,xmm6 ; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) - movdqa xmm7,xmm4 ; transpose coefficients(phase 3) - punpckldq xmm4,xmm2 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) - punpckhdq xmm7,xmm2 ; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) - - pshufd xmm5,xmm1,0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) - pshufd xmm0,xmm3,0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) - pshufd xmm6,xmm4,0x4E ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) - pshufd xmm2,xmm7,0x4E ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) - - mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] - mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW] - movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm1 - movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3 - mov edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW] - mov esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW] - movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4 - movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm7 - - mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] - mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] - movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5 - movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0 - mov edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW] - mov esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW] - movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6 - movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm2 - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; unused - poppic ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jidctint-mmx.asm b/simd/jidctint-mmx.asm deleted file mode 100644 index 5bd1981..0000000 --- a/simd/jidctint-mmx.asm +++ /dev/null @@ -1,851 +0,0 @@ -; -; jidctint.asm - accurate integer IDCT (MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a slow-but-accurate integer implementation of the -; inverse DCT (Discrete Cosine Transform). The following code is based -; directly on the IJG's original jidctint.c; see the jidctint.c for -; more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%define CONST_BITS 13 -%define PASS1_BITS 2 - -%define DESCALE_P1 (CONST_BITS-PASS1_BITS) -%define DESCALE_P2 (CONST_BITS+PASS1_BITS+3) - -%if CONST_BITS == 13 -F_0_298 equ 2446 ; FIX(0.298631336) -F_0_390 equ 3196 ; FIX(0.390180644) -F_0_541 equ 4433 ; FIX(0.541196100) -F_0_765 equ 6270 ; FIX(0.765366865) -F_0_899 equ 7373 ; FIX(0.899976223) -F_1_175 equ 9633 ; FIX(1.175875602) -F_1_501 equ 12299 ; FIX(1.501321110) -F_1_847 equ 15137 ; FIX(1.847759065) -F_1_961 equ 16069 ; FIX(1.961570560) -F_2_053 equ 16819 ; FIX(2.053119869) -F_2_562 equ 20995 ; FIX(2.562915447) -F_3_072 equ 25172 ; FIX(3.072711026) -%else -; NASM cannot do compile-time arithmetic on floating-point constants. -%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) -F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) -F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) -F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) -F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) -F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) -F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) -F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) -F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) -F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) -F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) -F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) -F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) -%endif - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_idct_islow_mmx) - -EXTN(jconst_idct_islow_mmx): - -PW_F130_F054 times 2 dw (F_0_541+F_0_765), F_0_541 -PW_F054_MF130 times 2 dw F_0_541, (F_0_541-F_1_847) -PW_MF078_F117 times 2 dw (F_1_175-F_1_961), F_1_175 -PW_F117_F078 times 2 dw F_1_175, (F_1_175-F_0_390) -PW_MF060_MF089 times 2 dw (F_0_298-F_0_899),-F_0_899 -PW_MF089_F060 times 2 dw -F_0_899, (F_1_501-F_0_899) -PW_MF050_MF256 times 2 dw (F_2_053-F_2_562),-F_2_562 -PW_MF256_F050 times 2 dw -F_2_562, (F_3_072-F_2_562) -PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1-1) -PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2-1) -PB_CENTERJSAMP times 8 db CENTERJSAMPLE - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Perform dequantization and inverse DCT on one block of coefficients. -; -; GLOBAL(void) -; jsimd_idct_islow_mmx (void *dct_table, JCOEFPTR coef_block, -; JSAMPARRAY output_buf, JDIMENSION output_col) -; - -%define dct_table(b) (b)+8 ; jpeg_component_info *compptr -%define coef_block(b) (b)+12 ; JCOEFPTR coef_block -%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf -%define output_col(b) (b)+20 ; JDIMENSION output_col - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] -%define WK_NUM 12 -%define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF - ; JCOEF workspace[DCTSIZE2] - - align 16 - global EXTN(jsimd_idct_islow_mmx) - -EXTN(jsimd_idct_islow_mmx): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [workspace] - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process columns from input, store into work array. - -; mov eax, [original_ebp] - mov edx, POINTER [dct_table(eax)] ; quantptr - mov esi, JCOEFPTR [coef_block(eax)] ; inptr - lea edi, [workspace] ; JCOEF *wsptr - mov ecx, DCTSIZE/4 ; ctr - alignx 16,7 -.columnloop: -%ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] - jnz short .columnDCT - - movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] - por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] - por mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] - por mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] - por mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] - por mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] - por mm1,mm0 - packsswb mm1,mm1 - movd eax,mm1 - test eax,eax - jnz short .columnDCT - - ; -- AC terms all zero - - movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] - pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - - psllw mm0,PASS1_BITS - - movq mm2,mm0 ; mm0=in0=(00 01 02 03) - punpcklwd mm0,mm0 ; mm0=(00 00 01 01) - punpckhwd mm2,mm2 ; mm2=(02 02 03 03) - - movq mm1,mm0 - punpckldq mm0,mm0 ; mm0=(00 00 00 00) - punpckhdq mm1,mm1 ; mm1=(01 01 01 01) - movq mm3,mm2 - punpckldq mm2,mm2 ; mm2=(02 02 02 02) - punpckhdq mm3,mm3 ; mm3=(03 03 03 03) - - movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 - movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0 - movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 - movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1 - movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 - movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2 - movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 - movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3 - jmp near .nextcolumn - alignx 16,7 -%endif -.columnDCT: - - ; -- Even part - - movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] - pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] - movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] - pmullw mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - - ; (Original) - ; z1 = (z2 + z3) * 0.541196100; - ; tmp2 = z1 + z3 * -1.847759065; - ; tmp3 = z1 + z2 * 0.765366865; - ; - ; (This implementation) - ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); - ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; - - movq mm4,mm1 ; mm1=in2=z2 - movq mm5,mm1 - punpcklwd mm4,mm3 ; mm3=in6=z3 - punpckhwd mm5,mm3 - movq mm1,mm4 - movq mm3,mm5 - pmaddwd mm4,[GOTOFF(ebx,PW_F130_F054)] ; mm4=tmp3L - pmaddwd mm5,[GOTOFF(ebx,PW_F130_F054)] ; mm5=tmp3H - pmaddwd mm1,[GOTOFF(ebx,PW_F054_MF130)] ; mm1=tmp2L - pmaddwd mm3,[GOTOFF(ebx,PW_F054_MF130)] ; mm3=tmp2H - - movq mm6,mm0 - paddw mm0,mm2 ; mm0=in0+in4 - psubw mm6,mm2 ; mm6=in0-in4 - - pxor mm7,mm7 - pxor mm2,mm2 - punpcklwd mm7,mm0 ; mm7=tmp0L - punpckhwd mm2,mm0 ; mm2=tmp0H - psrad mm7,(16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS - psrad mm2,(16-CONST_BITS) ; psrad mm2,16 & pslld mm2,CONST_BITS - - movq mm0,mm7 - paddd mm7,mm4 ; mm7=tmp10L - psubd mm0,mm4 ; mm0=tmp13L - movq mm4,mm2 - paddd mm2,mm5 ; mm2=tmp10H - psubd mm4,mm5 ; mm4=tmp13H - - movq MMWORD [wk(0)], mm7 ; wk(0)=tmp10L - movq MMWORD [wk(1)], mm2 ; wk(1)=tmp10H - movq MMWORD [wk(2)], mm0 ; wk(2)=tmp13L - movq MMWORD [wk(3)], mm4 ; wk(3)=tmp13H - - pxor mm5,mm5 - pxor mm7,mm7 - punpcklwd mm5,mm6 ; mm5=tmp1L - punpckhwd mm7,mm6 ; mm7=tmp1H - psrad mm5,(16-CONST_BITS) ; psrad mm5,16 & pslld mm5,CONST_BITS - psrad mm7,(16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS - - movq mm2,mm5 - paddd mm5,mm1 ; mm5=tmp11L - psubd mm2,mm1 ; mm2=tmp12L - movq mm0,mm7 - paddd mm7,mm3 ; mm7=tmp11H - psubd mm0,mm3 ; mm0=tmp12H - - movq MMWORD [wk(4)], mm5 ; wk(4)=tmp11L - movq MMWORD [wk(5)], mm7 ; wk(5)=tmp11H - movq MMWORD [wk(6)], mm2 ; wk(6)=tmp12L - movq MMWORD [wk(7)], mm0 ; wk(7)=tmp12H - - ; -- Odd part - - movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movq mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] - pmullw mm4, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw mm6, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - movq mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] - movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] - pmullw mm1, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - - movq mm5,mm6 - movq mm7,mm4 - paddw mm5,mm3 ; mm5=z3 - paddw mm7,mm1 ; mm7=z4 - - ; (Original) - ; z5 = (z3 + z4) * 1.175875602; - ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; - ; z3 += z5; z4 += z5; - ; - ; (This implementation) - ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; - ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); - - movq mm2,mm5 - movq mm0,mm5 - punpcklwd mm2,mm7 - punpckhwd mm0,mm7 - movq mm5,mm2 - movq mm7,mm0 - pmaddwd mm2,[GOTOFF(ebx,PW_MF078_F117)] ; mm2=z3L - pmaddwd mm0,[GOTOFF(ebx,PW_MF078_F117)] ; mm0=z3H - pmaddwd mm5,[GOTOFF(ebx,PW_F117_F078)] ; mm5=z4L - pmaddwd mm7,[GOTOFF(ebx,PW_F117_F078)] ; mm7=z4H - - movq MMWORD [wk(10)], mm2 ; wk(10)=z3L - movq MMWORD [wk(11)], mm0 ; wk(11)=z3H - - ; (Original) - ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; - ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; - ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; - ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; - ; tmp0 += z1 + z3; tmp1 += z2 + z4; - ; tmp2 += z2 + z3; tmp3 += z1 + z4; - ; - ; (This implementation) - ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; - ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; - ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); - ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); - ; tmp0 += z3; tmp1 += z4; - ; tmp2 += z3; tmp3 += z4; - - movq mm2,mm3 - movq mm0,mm3 - punpcklwd mm2,mm4 - punpckhwd mm0,mm4 - movq mm3,mm2 - movq mm4,mm0 - pmaddwd mm2,[GOTOFF(ebx,PW_MF060_MF089)] ; mm2=tmp0L - pmaddwd mm0,[GOTOFF(ebx,PW_MF060_MF089)] ; mm0=tmp0H - pmaddwd mm3,[GOTOFF(ebx,PW_MF089_F060)] ; mm3=tmp3L - pmaddwd mm4,[GOTOFF(ebx,PW_MF089_F060)] ; mm4=tmp3H - - paddd mm2, MMWORD [wk(10)] ; mm2=tmp0L - paddd mm0, MMWORD [wk(11)] ; mm0=tmp0H - paddd mm3,mm5 ; mm3=tmp3L - paddd mm4,mm7 ; mm4=tmp3H - - movq MMWORD [wk(8)], mm2 ; wk(8)=tmp0L - movq MMWORD [wk(9)], mm0 ; wk(9)=tmp0H - - movq mm2,mm1 - movq mm0,mm1 - punpcklwd mm2,mm6 - punpckhwd mm0,mm6 - movq mm1,mm2 - movq mm6,mm0 - pmaddwd mm2,[GOTOFF(ebx,PW_MF050_MF256)] ; mm2=tmp1L - pmaddwd mm0,[GOTOFF(ebx,PW_MF050_MF256)] ; mm0=tmp1H - pmaddwd mm1,[GOTOFF(ebx,PW_MF256_F050)] ; mm1=tmp2L - pmaddwd mm6,[GOTOFF(ebx,PW_MF256_F050)] ; mm6=tmp2H - - paddd mm2,mm5 ; mm2=tmp1L - paddd mm0,mm7 ; mm0=tmp1H - paddd mm1, MMWORD [wk(10)] ; mm1=tmp2L - paddd mm6, MMWORD [wk(11)] ; mm6=tmp2H - - movq MMWORD [wk(10)], mm2 ; wk(10)=tmp1L - movq MMWORD [wk(11)], mm0 ; wk(11)=tmp1H - - ; -- Final output stage - - movq mm5, MMWORD [wk(0)] ; mm5=tmp10L - movq mm7, MMWORD [wk(1)] ; mm7=tmp10H - - movq mm2,mm5 - movq mm0,mm7 - paddd mm5,mm3 ; mm5=data0L - paddd mm7,mm4 ; mm7=data0H - psubd mm2,mm3 ; mm2=data7L - psubd mm0,mm4 ; mm0=data7H - - movq mm3,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm3=[PD_DESCALE_P1] - - paddd mm5,mm3 - paddd mm7,mm3 - psrad mm5,DESCALE_P1 - psrad mm7,DESCALE_P1 - paddd mm2,mm3 - paddd mm0,mm3 - psrad mm2,DESCALE_P1 - psrad mm0,DESCALE_P1 - - packssdw mm5,mm7 ; mm5=data0=(00 01 02 03) - packssdw mm2,mm0 ; mm2=data7=(70 71 72 73) - - movq mm4, MMWORD [wk(4)] ; mm4=tmp11L - movq mm3, MMWORD [wk(5)] ; mm3=tmp11H - - movq mm7,mm4 - movq mm0,mm3 - paddd mm4,mm1 ; mm4=data1L - paddd mm3,mm6 ; mm3=data1H - psubd mm7,mm1 ; mm7=data6L - psubd mm0,mm6 ; mm0=data6H - - movq mm1,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm1=[PD_DESCALE_P1] - - paddd mm4,mm1 - paddd mm3,mm1 - psrad mm4,DESCALE_P1 - psrad mm3,DESCALE_P1 - paddd mm7,mm1 - paddd mm0,mm1 - psrad mm7,DESCALE_P1 - psrad mm0,DESCALE_P1 - - packssdw mm4,mm3 ; mm4=data1=(10 11 12 13) - packssdw mm7,mm0 ; mm7=data6=(60 61 62 63) - - movq mm6,mm5 ; transpose coefficients(phase 1) - punpcklwd mm5,mm4 ; mm5=(00 10 01 11) - punpckhwd mm6,mm4 ; mm6=(02 12 03 13) - movq mm1,mm7 ; transpose coefficients(phase 1) - punpcklwd mm7,mm2 ; mm7=(60 70 61 71) - punpckhwd mm1,mm2 ; mm1=(62 72 63 73) - - movq mm3, MMWORD [wk(6)] ; mm3=tmp12L - movq mm0, MMWORD [wk(7)] ; mm0=tmp12H - movq mm4, MMWORD [wk(10)] ; mm4=tmp1L - movq mm2, MMWORD [wk(11)] ; mm2=tmp1H - - movq MMWORD [wk(0)], mm5 ; wk(0)=(00 10 01 11) - movq MMWORD [wk(1)], mm6 ; wk(1)=(02 12 03 13) - movq MMWORD [wk(4)], mm7 ; wk(4)=(60 70 61 71) - movq MMWORD [wk(5)], mm1 ; wk(5)=(62 72 63 73) - - movq mm5,mm3 - movq mm6,mm0 - paddd mm3,mm4 ; mm3=data2L - paddd mm0,mm2 ; mm0=data2H - psubd mm5,mm4 ; mm5=data5L - psubd mm6,mm2 ; mm6=data5H - - movq mm7,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm7=[PD_DESCALE_P1] - - paddd mm3,mm7 - paddd mm0,mm7 - psrad mm3,DESCALE_P1 - psrad mm0,DESCALE_P1 - paddd mm5,mm7 - paddd mm6,mm7 - psrad mm5,DESCALE_P1 - psrad mm6,DESCALE_P1 - - packssdw mm3,mm0 ; mm3=data2=(20 21 22 23) - packssdw mm5,mm6 ; mm5=data5=(50 51 52 53) - - movq mm1, MMWORD [wk(2)] ; mm1=tmp13L - movq mm4, MMWORD [wk(3)] ; mm4=tmp13H - movq mm2, MMWORD [wk(8)] ; mm2=tmp0L - movq mm7, MMWORD [wk(9)] ; mm7=tmp0H - - movq mm0,mm1 - movq mm6,mm4 - paddd mm1,mm2 ; mm1=data3L - paddd mm4,mm7 ; mm4=data3H - psubd mm0,mm2 ; mm0=data4L - psubd mm6,mm7 ; mm6=data4H - - movq mm2,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm2=[PD_DESCALE_P1] - - paddd mm1,mm2 - paddd mm4,mm2 - psrad mm1,DESCALE_P1 - psrad mm4,DESCALE_P1 - paddd mm0,mm2 - paddd mm6,mm2 - psrad mm0,DESCALE_P1 - psrad mm6,DESCALE_P1 - - packssdw mm1,mm4 ; mm1=data3=(30 31 32 33) - packssdw mm0,mm6 ; mm0=data4=(40 41 42 43) - - movq mm7, MMWORD [wk(0)] ; mm7=(00 10 01 11) - movq mm2, MMWORD [wk(1)] ; mm2=(02 12 03 13) - - movq mm4,mm3 ; transpose coefficients(phase 1) - punpcklwd mm3,mm1 ; mm3=(20 30 21 31) - punpckhwd mm4,mm1 ; mm4=(22 32 23 33) - movq mm6,mm0 ; transpose coefficients(phase 1) - punpcklwd mm0,mm5 ; mm0=(40 50 41 51) - punpckhwd mm6,mm5 ; mm6=(42 52 43 53) - - movq mm1,mm7 ; transpose coefficients(phase 2) - punpckldq mm7,mm3 ; mm7=(00 10 20 30) - punpckhdq mm1,mm3 ; mm1=(01 11 21 31) - movq mm5,mm2 ; transpose coefficients(phase 2) - punpckldq mm2,mm4 ; mm2=(02 12 22 32) - punpckhdq mm5,mm4 ; mm5=(03 13 23 33) - - movq mm3, MMWORD [wk(4)] ; mm3=(60 70 61 71) - movq mm4, MMWORD [wk(5)] ; mm4=(62 72 63 73) - - movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm7 - movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 - movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 - movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5 - - movq mm7,mm0 ; transpose coefficients(phase 2) - punpckldq mm0,mm3 ; mm0=(40 50 60 70) - punpckhdq mm7,mm3 ; mm7=(41 51 61 71) - movq mm1,mm6 ; transpose coefficients(phase 2) - punpckldq mm6,mm4 ; mm6=(42 52 62 72) - punpckhdq mm1,mm4 ; mm1=(43 53 63 73) - - movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0 - movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm7 - movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm6 - movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm1 - -.nextcolumn: - add esi, byte 4*SIZEOF_JCOEF ; coef_block - add edx, byte 4*SIZEOF_ISLOW_MULT_TYPE ; quantptr - add edi, byte 4*DCTSIZE*SIZEOF_JCOEF ; wsptr - dec ecx ; ctr - jnz near .columnloop - - ; ---- Pass 2: process rows from work array, store into output array. - - mov eax, [original_ebp] - lea esi, [workspace] ; JCOEF *wsptr - mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) - mov eax, JDIMENSION [output_col(eax)] - mov ecx, DCTSIZE/4 ; ctr - alignx 16,7 -.rowloop: - - ; -- Even part - - movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] - movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)] - movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] - - ; (Original) - ; z1 = (z2 + z3) * 0.541196100; - ; tmp2 = z1 + z3 * -1.847759065; - ; tmp3 = z1 + z2 * 0.765366865; - ; - ; (This implementation) - ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); - ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; - - movq mm4,mm1 ; mm1=in2=z2 - movq mm5,mm1 - punpcklwd mm4,mm3 ; mm3=in6=z3 - punpckhwd mm5,mm3 - movq mm1,mm4 - movq mm3,mm5 - pmaddwd mm4,[GOTOFF(ebx,PW_F130_F054)] ; mm4=tmp3L - pmaddwd mm5,[GOTOFF(ebx,PW_F130_F054)] ; mm5=tmp3H - pmaddwd mm1,[GOTOFF(ebx,PW_F054_MF130)] ; mm1=tmp2L - pmaddwd mm3,[GOTOFF(ebx,PW_F054_MF130)] ; mm3=tmp2H - - movq mm6,mm0 - paddw mm0,mm2 ; mm0=in0+in4 - psubw mm6,mm2 ; mm6=in0-in4 - - pxor mm7,mm7 - pxor mm2,mm2 - punpcklwd mm7,mm0 ; mm7=tmp0L - punpckhwd mm2,mm0 ; mm2=tmp0H - psrad mm7,(16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS - psrad mm2,(16-CONST_BITS) ; psrad mm2,16 & pslld mm2,CONST_BITS - - movq mm0,mm7 - paddd mm7,mm4 ; mm7=tmp10L - psubd mm0,mm4 ; mm0=tmp13L - movq mm4,mm2 - paddd mm2,mm5 ; mm2=tmp10H - psubd mm4,mm5 ; mm4=tmp13H - - movq MMWORD [wk(0)], mm7 ; wk(0)=tmp10L - movq MMWORD [wk(1)], mm2 ; wk(1)=tmp10H - movq MMWORD [wk(2)], mm0 ; wk(2)=tmp13L - movq MMWORD [wk(3)], mm4 ; wk(3)=tmp13H - - pxor mm5,mm5 - pxor mm7,mm7 - punpcklwd mm5,mm6 ; mm5=tmp1L - punpckhwd mm7,mm6 ; mm7=tmp1H - psrad mm5,(16-CONST_BITS) ; psrad mm5,16 & pslld mm5,CONST_BITS - psrad mm7,(16-CONST_BITS) ; psrad mm7,16 & pslld mm7,CONST_BITS - - movq mm2,mm5 - paddd mm5,mm1 ; mm5=tmp11L - psubd mm2,mm1 ; mm2=tmp12L - movq mm0,mm7 - paddd mm7,mm3 ; mm7=tmp11H - psubd mm0,mm3 ; mm0=tmp12H - - movq MMWORD [wk(4)], mm5 ; wk(4)=tmp11L - movq MMWORD [wk(5)], mm7 ; wk(5)=tmp11H - movq MMWORD [wk(6)], mm2 ; wk(6)=tmp12L - movq MMWORD [wk(7)], mm0 ; wk(7)=tmp12H - - ; -- Odd part - - movq mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movq mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] - movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] - - movq mm5,mm6 - movq mm7,mm4 - paddw mm5,mm3 ; mm5=z3 - paddw mm7,mm1 ; mm7=z4 - - ; (Original) - ; z5 = (z3 + z4) * 1.175875602; - ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; - ; z3 += z5; z4 += z5; - ; - ; (This implementation) - ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; - ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); - - movq mm2,mm5 - movq mm0,mm5 - punpcklwd mm2,mm7 - punpckhwd mm0,mm7 - movq mm5,mm2 - movq mm7,mm0 - pmaddwd mm2,[GOTOFF(ebx,PW_MF078_F117)] ; mm2=z3L - pmaddwd mm0,[GOTOFF(ebx,PW_MF078_F117)] ; mm0=z3H - pmaddwd mm5,[GOTOFF(ebx,PW_F117_F078)] ; mm5=z4L - pmaddwd mm7,[GOTOFF(ebx,PW_F117_F078)] ; mm7=z4H - - movq MMWORD [wk(10)], mm2 ; wk(10)=z3L - movq MMWORD [wk(11)], mm0 ; wk(11)=z3H - - ; (Original) - ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; - ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; - ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; - ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; - ; tmp0 += z1 + z3; tmp1 += z2 + z4; - ; tmp2 += z2 + z3; tmp3 += z1 + z4; - ; - ; (This implementation) - ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; - ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; - ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); - ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); - ; tmp0 += z3; tmp1 += z4; - ; tmp2 += z3; tmp3 += z4; - - movq mm2,mm3 - movq mm0,mm3 - punpcklwd mm2,mm4 - punpckhwd mm0,mm4 - movq mm3,mm2 - movq mm4,mm0 - pmaddwd mm2,[GOTOFF(ebx,PW_MF060_MF089)] ; mm2=tmp0L - pmaddwd mm0,[GOTOFF(ebx,PW_MF060_MF089)] ; mm0=tmp0H - pmaddwd mm3,[GOTOFF(ebx,PW_MF089_F060)] ; mm3=tmp3L - pmaddwd mm4,[GOTOFF(ebx,PW_MF089_F060)] ; mm4=tmp3H - - paddd mm2, MMWORD [wk(10)] ; mm2=tmp0L - paddd mm0, MMWORD [wk(11)] ; mm0=tmp0H - paddd mm3,mm5 ; mm3=tmp3L - paddd mm4,mm7 ; mm4=tmp3H - - movq MMWORD [wk(8)], mm2 ; wk(8)=tmp0L - movq MMWORD [wk(9)], mm0 ; wk(9)=tmp0H - - movq mm2,mm1 - movq mm0,mm1 - punpcklwd mm2,mm6 - punpckhwd mm0,mm6 - movq mm1,mm2 - movq mm6,mm0 - pmaddwd mm2,[GOTOFF(ebx,PW_MF050_MF256)] ; mm2=tmp1L - pmaddwd mm0,[GOTOFF(ebx,PW_MF050_MF256)] ; mm0=tmp1H - pmaddwd mm1,[GOTOFF(ebx,PW_MF256_F050)] ; mm1=tmp2L - pmaddwd mm6,[GOTOFF(ebx,PW_MF256_F050)] ; mm6=tmp2H - - paddd mm2,mm5 ; mm2=tmp1L - paddd mm0,mm7 ; mm0=tmp1H - paddd mm1, MMWORD [wk(10)] ; mm1=tmp2L - paddd mm6, MMWORD [wk(11)] ; mm6=tmp2H - - movq MMWORD [wk(10)], mm2 ; wk(10)=tmp1L - movq MMWORD [wk(11)], mm0 ; wk(11)=tmp1H - - ; -- Final output stage - - movq mm5, MMWORD [wk(0)] ; mm5=tmp10L - movq mm7, MMWORD [wk(1)] ; mm7=tmp10H - - movq mm2,mm5 - movq mm0,mm7 - paddd mm5,mm3 ; mm5=data0L - paddd mm7,mm4 ; mm7=data0H - psubd mm2,mm3 ; mm2=data7L - psubd mm0,mm4 ; mm0=data7H - - movq mm3,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm3=[PD_DESCALE_P2] - - paddd mm5,mm3 - paddd mm7,mm3 - psrad mm5,DESCALE_P2 - psrad mm7,DESCALE_P2 - paddd mm2,mm3 - paddd mm0,mm3 - psrad mm2,DESCALE_P2 - psrad mm0,DESCALE_P2 - - packssdw mm5,mm7 ; mm5=data0=(00 10 20 30) - packssdw mm2,mm0 ; mm2=data7=(07 17 27 37) - - movq mm4, MMWORD [wk(4)] ; mm4=tmp11L - movq mm3, MMWORD [wk(5)] ; mm3=tmp11H - - movq mm7,mm4 - movq mm0,mm3 - paddd mm4,mm1 ; mm4=data1L - paddd mm3,mm6 ; mm3=data1H - psubd mm7,mm1 ; mm7=data6L - psubd mm0,mm6 ; mm0=data6H - - movq mm1,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm1=[PD_DESCALE_P2] - - paddd mm4,mm1 - paddd mm3,mm1 - psrad mm4,DESCALE_P2 - psrad mm3,DESCALE_P2 - paddd mm7,mm1 - paddd mm0,mm1 - psrad mm7,DESCALE_P2 - psrad mm0,DESCALE_P2 - - packssdw mm4,mm3 ; mm4=data1=(01 11 21 31) - packssdw mm7,mm0 ; mm7=data6=(06 16 26 36) - - packsswb mm5,mm7 ; mm5=(00 10 20 30 06 16 26 36) - packsswb mm4,mm2 ; mm4=(01 11 21 31 07 17 27 37) - - movq mm6, MMWORD [wk(6)] ; mm6=tmp12L - movq mm1, MMWORD [wk(7)] ; mm1=tmp12H - movq mm3, MMWORD [wk(10)] ; mm3=tmp1L - movq mm0, MMWORD [wk(11)] ; mm0=tmp1H - - movq MMWORD [wk(0)], mm5 ; wk(0)=(00 10 20 30 06 16 26 36) - movq MMWORD [wk(1)], mm4 ; wk(1)=(01 11 21 31 07 17 27 37) - - movq mm7,mm6 - movq mm2,mm1 - paddd mm6,mm3 ; mm6=data2L - paddd mm1,mm0 ; mm1=data2H - psubd mm7,mm3 ; mm7=data5L - psubd mm2,mm0 ; mm2=data5H - - movq mm5,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm5=[PD_DESCALE_P2] - - paddd mm6,mm5 - paddd mm1,mm5 - psrad mm6,DESCALE_P2 - psrad mm1,DESCALE_P2 - paddd mm7,mm5 - paddd mm2,mm5 - psrad mm7,DESCALE_P2 - psrad mm2,DESCALE_P2 - - packssdw mm6,mm1 ; mm6=data2=(02 12 22 32) - packssdw mm7,mm2 ; mm7=data5=(05 15 25 35) - - movq mm4, MMWORD [wk(2)] ; mm4=tmp13L - movq mm3, MMWORD [wk(3)] ; mm3=tmp13H - movq mm0, MMWORD [wk(8)] ; mm0=tmp0L - movq mm5, MMWORD [wk(9)] ; mm5=tmp0H - - movq mm1,mm4 - movq mm2,mm3 - paddd mm4,mm0 ; mm4=data3L - paddd mm3,mm5 ; mm3=data3H - psubd mm1,mm0 ; mm1=data4L - psubd mm2,mm5 ; mm2=data4H - - movq mm0,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm0=[PD_DESCALE_P2] - - paddd mm4,mm0 - paddd mm3,mm0 - psrad mm4,DESCALE_P2 - psrad mm3,DESCALE_P2 - paddd mm1,mm0 - paddd mm2,mm0 - psrad mm1,DESCALE_P2 - psrad mm2,DESCALE_P2 - - movq mm5,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm5=[PB_CENTERJSAMP] - - packssdw mm4,mm3 ; mm4=data3=(03 13 23 33) - packssdw mm1,mm2 ; mm1=data4=(04 14 24 34) - - movq mm0, MMWORD [wk(0)] ; mm0=(00 10 20 30 06 16 26 36) - movq mm3, MMWORD [wk(1)] ; mm3=(01 11 21 31 07 17 27 37) - - packsswb mm6,mm1 ; mm6=(02 12 22 32 04 14 24 34) - packsswb mm4,mm7 ; mm4=(03 13 23 33 05 15 25 35) - - paddb mm0,mm5 - paddb mm3,mm5 - paddb mm6,mm5 - paddb mm4,mm5 - - movq mm2,mm0 ; transpose coefficients(phase 1) - punpcklbw mm0,mm3 ; mm0=(00 01 10 11 20 21 30 31) - punpckhbw mm2,mm3 ; mm2=(06 07 16 17 26 27 36 37) - movq mm1,mm6 ; transpose coefficients(phase 1) - punpcklbw mm6,mm4 ; mm6=(02 03 12 13 22 23 32 33) - punpckhbw mm1,mm4 ; mm1=(04 05 14 15 24 25 34 35) - - movq mm7,mm0 ; transpose coefficients(phase 2) - punpcklwd mm0,mm6 ; mm0=(00 01 02 03 10 11 12 13) - punpckhwd mm7,mm6 ; mm7=(20 21 22 23 30 31 32 33) - movq mm5,mm1 ; transpose coefficients(phase 2) - punpcklwd mm1,mm2 ; mm1=(04 05 06 07 14 15 16 17) - punpckhwd mm5,mm2 ; mm5=(24 25 26 27 34 35 36 37) - - movq mm3,mm0 ; transpose coefficients(phase 3) - punpckldq mm0,mm1 ; mm0=(00 01 02 03 04 05 06 07) - punpckhdq mm3,mm1 ; mm3=(10 11 12 13 14 15 16 17) - movq mm4,mm7 ; transpose coefficients(phase 3) - punpckldq mm7,mm5 ; mm7=(20 21 22 23 24 25 26 27) - punpckhdq mm4,mm5 ; mm4=(30 31 32 33 34 35 36 37) - - pushpic ebx ; save GOT address - - mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] - mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] - movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0 - movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm3 - mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] - mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW] - movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm7 - movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4 - - poppic ebx ; restore GOT address - - add esi, byte 4*SIZEOF_JCOEF ; wsptr - add edi, byte 4*SIZEOF_JSAMPROW - dec ecx ; ctr - jnz near .rowloop - - emms ; empty MMX state - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jidctint-sse2-64.asm b/simd/jidctint-sse2-64.asm deleted file mode 100644 index afe1d6a..0000000 --- a/simd/jidctint-sse2-64.asm +++ /dev/null @@ -1,847 +0,0 @@ -; -; jidctint.asm - accurate integer IDCT (64-bit SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a slow-but-accurate integer implementation of the -; inverse DCT (Discrete Cosine Transform). The following code is based -; directly on the IJG's original jidctint.c; see the jidctint.c for -; more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%define CONST_BITS 13 -%define PASS1_BITS 2 - -%define DESCALE_P1 (CONST_BITS-PASS1_BITS) -%define DESCALE_P2 (CONST_BITS+PASS1_BITS+3) - -%if CONST_BITS == 13 -F_0_298 equ 2446 ; FIX(0.298631336) -F_0_390 equ 3196 ; FIX(0.390180644) -F_0_541 equ 4433 ; FIX(0.541196100) -F_0_765 equ 6270 ; FIX(0.765366865) -F_0_899 equ 7373 ; FIX(0.899976223) -F_1_175 equ 9633 ; FIX(1.175875602) -F_1_501 equ 12299 ; FIX(1.501321110) -F_1_847 equ 15137 ; FIX(1.847759065) -F_1_961 equ 16069 ; FIX(1.961570560) -F_2_053 equ 16819 ; FIX(2.053119869) -F_2_562 equ 20995 ; FIX(2.562915447) -F_3_072 equ 25172 ; FIX(3.072711026) -%else -; NASM cannot do compile-time arithmetic on floating-point constants. -%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) -F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) -F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) -F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) -F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) -F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) -F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) -F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) -F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) -F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) -F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) -F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) -F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) -%endif - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_idct_islow_sse2) - -EXTN(jconst_idct_islow_sse2): - -PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 -PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) -PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 -PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) -PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 -PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) -PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 -PW_MF256_F050 times 4 dw -F_2_562, (F_3_072-F_2_562) -PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1-1) -PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2-1) -PB_CENTERJSAMP times 16 db CENTERJSAMPLE - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 64 -; -; Perform dequantization and inverse DCT on one block of coefficients. -; -; GLOBAL(void) -; jsimd_idct_islow_sse2 (void *dct_table, JCOEFPTR coef_block, -; JSAMPARRAY output_buf, JDIMENSION output_col) -; - -; r10 = jpeg_component_info *compptr -; r11 = JCOEFPTR coef_block -; r12 = JSAMPARRAY output_buf -; r13 = JDIMENSION output_col - -%define original_rbp rbp+0 -%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 12 - - align 16 - global EXTN(jsimd_idct_islow_sse2) - -EXTN(jsimd_idct_islow_sse2): - push rbp - mov rax,rsp ; rax = original rbp - sub rsp, byte 4 - and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [rsp],rax - mov rbp,rsp ; rbp = aligned rbp - lea rsp, [wk(0)] - collect_args - - ; ---- Pass 1: process columns from input. - - mov rdx, r10 ; quantptr - mov rsi, r11 ; inptr - -%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2 - mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] - jnz near .columnDCT - - movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] - movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] - por xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] - por xmm1, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)] - por xmm0, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] - por xmm1, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] - por xmm0, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] - por xmm1,xmm0 - packsswb xmm1,xmm1 - packsswb xmm1,xmm1 - movd eax,xmm1 - test rax,rax - jnz short .columnDCT - - ; -- AC terms all zero - - movdqa xmm5, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] - pmullw xmm5, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - - psllw xmm5,PASS1_BITS - - movdqa xmm4,xmm5 ; xmm5=in0=(00 01 02 03 04 05 06 07) - punpcklwd xmm5,xmm5 ; xmm5=(00 00 01 01 02 02 03 03) - punpckhwd xmm4,xmm4 ; xmm4=(04 04 05 05 06 06 07 07) - - pshufd xmm7,xmm5,0x00 ; xmm7=col0=(00 00 00 00 00 00 00 00) - pshufd xmm6,xmm5,0x55 ; xmm6=col1=(01 01 01 01 01 01 01 01) - pshufd xmm1,xmm5,0xAA ; xmm1=col2=(02 02 02 02 02 02 02 02) - pshufd xmm5,xmm5,0xFF ; xmm5=col3=(03 03 03 03 03 03 03 03) - pshufd xmm0,xmm4,0x00 ; xmm0=col4=(04 04 04 04 04 04 04 04) - pshufd xmm3,xmm4,0x55 ; xmm3=col5=(05 05 05 05 05 05 05 05) - pshufd xmm2,xmm4,0xAA ; xmm2=col6=(06 06 06 06 06 06 06 06) - pshufd xmm4,xmm4,0xFF ; xmm4=col7=(07 07 07 07 07 07 07 07) - - movdqa XMMWORD [wk(8)], xmm6 ; wk(8)=col1 - movdqa XMMWORD [wk(9)], xmm5 ; wk(9)=col3 - movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 - movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 - jmp near .column_end -%endif -.columnDCT: - - ; -- Even part - - movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] - movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] - pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - movdqa xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)] - movdqa xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] - pmullw xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - - ; (Original) - ; z1 = (z2 + z3) * 0.541196100; - ; tmp2 = z1 + z3 * -1.847759065; - ; tmp3 = z1 + z2 * 0.765366865; - ; - ; (This implementation) - ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); - ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; - - movdqa xmm4,xmm1 ; xmm1=in2=z2 - movdqa xmm5,xmm1 - punpcklwd xmm4,xmm3 ; xmm3=in6=z3 - punpckhwd xmm5,xmm3 - movdqa xmm1,xmm4 - movdqa xmm3,xmm5 - pmaddwd xmm4,[rel PW_F130_F054] ; xmm4=tmp3L - pmaddwd xmm5,[rel PW_F130_F054] ; xmm5=tmp3H - pmaddwd xmm1,[rel PW_F054_MF130] ; xmm1=tmp2L - pmaddwd xmm3,[rel PW_F054_MF130] ; xmm3=tmp2H - - movdqa xmm6,xmm0 - paddw xmm0,xmm2 ; xmm0=in0+in4 - psubw xmm6,xmm2 ; xmm6=in0-in4 - - pxor xmm7,xmm7 - pxor xmm2,xmm2 - punpcklwd xmm7,xmm0 ; xmm7=tmp0L - punpckhwd xmm2,xmm0 ; xmm2=tmp0H - psrad xmm7,(16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS - psrad xmm2,(16-CONST_BITS) ; psrad xmm2,16 & pslld xmm2,CONST_BITS - - movdqa xmm0,xmm7 - paddd xmm7,xmm4 ; xmm7=tmp10L - psubd xmm0,xmm4 ; xmm0=tmp13L - movdqa xmm4,xmm2 - paddd xmm2,xmm5 ; xmm2=tmp10H - psubd xmm4,xmm5 ; xmm4=tmp13H - - movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=tmp10L - movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=tmp10H - movdqa XMMWORD [wk(2)], xmm0 ; wk(2)=tmp13L - movdqa XMMWORD [wk(3)], xmm4 ; wk(3)=tmp13H - - pxor xmm5,xmm5 - pxor xmm7,xmm7 - punpcklwd xmm5,xmm6 ; xmm5=tmp1L - punpckhwd xmm7,xmm6 ; xmm7=tmp1H - psrad xmm5,(16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS - psrad xmm7,(16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS - - movdqa xmm2,xmm5 - paddd xmm5,xmm1 ; xmm5=tmp11L - psubd xmm2,xmm1 ; xmm2=tmp12L - movdqa xmm0,xmm7 - paddd xmm7,xmm3 ; xmm7=tmp11H - psubd xmm0,xmm3 ; xmm0=tmp12H - - movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L - movdqa XMMWORD [wk(5)], xmm7 ; wk(5)=tmp11H - movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=tmp12L - movdqa XMMWORD [wk(7)], xmm0 ; wk(7)=tmp12H - - ; -- Odd part - - movdqa xmm4, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] - movdqa xmm6, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] - pmullw xmm4, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm6, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - movdqa xmm1, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] - movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] - pmullw xmm1, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - - movdqa xmm5,xmm6 - movdqa xmm7,xmm4 - paddw xmm5,xmm3 ; xmm5=z3 - paddw xmm7,xmm1 ; xmm7=z4 - - ; (Original) - ; z5 = (z3 + z4) * 1.175875602; - ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; - ; z3 += z5; z4 += z5; - ; - ; (This implementation) - ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; - ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); - - movdqa xmm2,xmm5 - movdqa xmm0,xmm5 - punpcklwd xmm2,xmm7 - punpckhwd xmm0,xmm7 - movdqa xmm5,xmm2 - movdqa xmm7,xmm0 - pmaddwd xmm2,[rel PW_MF078_F117] ; xmm2=z3L - pmaddwd xmm0,[rel PW_MF078_F117] ; xmm0=z3H - pmaddwd xmm5,[rel PW_F117_F078] ; xmm5=z4L - pmaddwd xmm7,[rel PW_F117_F078] ; xmm7=z4H - - movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=z3L - movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=z3H - - ; (Original) - ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; - ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; - ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; - ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; - ; tmp0 += z1 + z3; tmp1 += z2 + z4; - ; tmp2 += z2 + z3; tmp3 += z1 + z4; - ; - ; (This implementation) - ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; - ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; - ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); - ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); - ; tmp0 += z3; tmp1 += z4; - ; tmp2 += z3; tmp3 += z4; - - movdqa xmm2,xmm3 - movdqa xmm0,xmm3 - punpcklwd xmm2,xmm4 - punpckhwd xmm0,xmm4 - movdqa xmm3,xmm2 - movdqa xmm4,xmm0 - pmaddwd xmm2,[rel PW_MF060_MF089] ; xmm2=tmp0L - pmaddwd xmm0,[rel PW_MF060_MF089] ; xmm0=tmp0H - pmaddwd xmm3,[rel PW_MF089_F060] ; xmm3=tmp3L - pmaddwd xmm4,[rel PW_MF089_F060] ; xmm4=tmp3H - - paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp0L - paddd xmm0, XMMWORD [wk(11)] ; xmm0=tmp0H - paddd xmm3,xmm5 ; xmm3=tmp3L - paddd xmm4,xmm7 ; xmm4=tmp3H - - movdqa XMMWORD [wk(8)], xmm2 ; wk(8)=tmp0L - movdqa XMMWORD [wk(9)], xmm0 ; wk(9)=tmp0H - - movdqa xmm2,xmm1 - movdqa xmm0,xmm1 - punpcklwd xmm2,xmm6 - punpckhwd xmm0,xmm6 - movdqa xmm1,xmm2 - movdqa xmm6,xmm0 - pmaddwd xmm2,[rel PW_MF050_MF256] ; xmm2=tmp1L - pmaddwd xmm0,[rel PW_MF050_MF256] ; xmm0=tmp1H - pmaddwd xmm1,[rel PW_MF256_F050] ; xmm1=tmp2L - pmaddwd xmm6,[rel PW_MF256_F050] ; xmm6=tmp2H - - paddd xmm2,xmm5 ; xmm2=tmp1L - paddd xmm0,xmm7 ; xmm0=tmp1H - paddd xmm1, XMMWORD [wk(10)] ; xmm1=tmp2L - paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H - - movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=tmp1L - movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=tmp1H - - ; -- Final output stage - - movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L - movdqa xmm7, XMMWORD [wk(1)] ; xmm7=tmp10H - - movdqa xmm2,xmm5 - movdqa xmm0,xmm7 - paddd xmm5,xmm3 ; xmm5=data0L - paddd xmm7,xmm4 ; xmm7=data0H - psubd xmm2,xmm3 ; xmm2=data7L - psubd xmm0,xmm4 ; xmm0=data7H - - movdqa xmm3,[rel PD_DESCALE_P1] ; xmm3=[rel PD_DESCALE_P1] - - paddd xmm5,xmm3 - paddd xmm7,xmm3 - psrad xmm5,DESCALE_P1 - psrad xmm7,DESCALE_P1 - paddd xmm2,xmm3 - paddd xmm0,xmm3 - psrad xmm2,DESCALE_P1 - psrad xmm0,DESCALE_P1 - - packssdw xmm5,xmm7 ; xmm5=data0=(00 01 02 03 04 05 06 07) - packssdw xmm2,xmm0 ; xmm2=data7=(70 71 72 73 74 75 76 77) - - movdqa xmm4, XMMWORD [wk(4)] ; xmm4=tmp11L - movdqa xmm3, XMMWORD [wk(5)] ; xmm3=tmp11H - - movdqa xmm7,xmm4 - movdqa xmm0,xmm3 - paddd xmm4,xmm1 ; xmm4=data1L - paddd xmm3,xmm6 ; xmm3=data1H - psubd xmm7,xmm1 ; xmm7=data6L - psubd xmm0,xmm6 ; xmm0=data6H - - movdqa xmm1,[rel PD_DESCALE_P1] ; xmm1=[rel PD_DESCALE_P1] - - paddd xmm4,xmm1 - paddd xmm3,xmm1 - psrad xmm4,DESCALE_P1 - psrad xmm3,DESCALE_P1 - paddd xmm7,xmm1 - paddd xmm0,xmm1 - psrad xmm7,DESCALE_P1 - psrad xmm0,DESCALE_P1 - - packssdw xmm4,xmm3 ; xmm4=data1=(10 11 12 13 14 15 16 17) - packssdw xmm7,xmm0 ; xmm7=data6=(60 61 62 63 64 65 66 67) - - movdqa xmm6,xmm5 ; transpose coefficients(phase 1) - punpcklwd xmm5,xmm4 ; xmm5=(00 10 01 11 02 12 03 13) - punpckhwd xmm6,xmm4 ; xmm6=(04 14 05 15 06 16 07 17) - movdqa xmm1,xmm7 ; transpose coefficients(phase 1) - punpcklwd xmm7,xmm2 ; xmm7=(60 70 61 71 62 72 63 73) - punpckhwd xmm1,xmm2 ; xmm1=(64 74 65 75 66 76 67 77) - - movdqa xmm3, XMMWORD [wk(6)] ; xmm3=tmp12L - movdqa xmm0, XMMWORD [wk(7)] ; xmm0=tmp12H - movdqa xmm4, XMMWORD [wk(10)] ; xmm4=tmp1L - movdqa xmm2, XMMWORD [wk(11)] ; xmm2=tmp1H - - movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 01 11 02 12 03 13) - movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=(04 14 05 15 06 16 07 17) - movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=(60 70 61 71 62 72 63 73) - movdqa XMMWORD [wk(5)], xmm1 ; wk(5)=(64 74 65 75 66 76 67 77) - - movdqa xmm5,xmm3 - movdqa xmm6,xmm0 - paddd xmm3,xmm4 ; xmm3=data2L - paddd xmm0,xmm2 ; xmm0=data2H - psubd xmm5,xmm4 ; xmm5=data5L - psubd xmm6,xmm2 ; xmm6=data5H - - movdqa xmm7,[rel PD_DESCALE_P1] ; xmm7=[rel PD_DESCALE_P1] - - paddd xmm3,xmm7 - paddd xmm0,xmm7 - psrad xmm3,DESCALE_P1 - psrad xmm0,DESCALE_P1 - paddd xmm5,xmm7 - paddd xmm6,xmm7 - psrad xmm5,DESCALE_P1 - psrad xmm6,DESCALE_P1 - - packssdw xmm3,xmm0 ; xmm3=data2=(20 21 22 23 24 25 26 27) - packssdw xmm5,xmm6 ; xmm5=data5=(50 51 52 53 54 55 56 57) - - movdqa xmm1, XMMWORD [wk(2)] ; xmm1=tmp13L - movdqa xmm4, XMMWORD [wk(3)] ; xmm4=tmp13H - movdqa xmm2, XMMWORD [wk(8)] ; xmm2=tmp0L - movdqa xmm7, XMMWORD [wk(9)] ; xmm7=tmp0H - - movdqa xmm0,xmm1 - movdqa xmm6,xmm4 - paddd xmm1,xmm2 ; xmm1=data3L - paddd xmm4,xmm7 ; xmm4=data3H - psubd xmm0,xmm2 ; xmm0=data4L - psubd xmm6,xmm7 ; xmm6=data4H - - movdqa xmm2,[rel PD_DESCALE_P1] ; xmm2=[rel PD_DESCALE_P1] - - paddd xmm1,xmm2 - paddd xmm4,xmm2 - psrad xmm1,DESCALE_P1 - psrad xmm4,DESCALE_P1 - paddd xmm0,xmm2 - paddd xmm6,xmm2 - psrad xmm0,DESCALE_P1 - psrad xmm6,DESCALE_P1 - - packssdw xmm1,xmm4 ; xmm1=data3=(30 31 32 33 34 35 36 37) - packssdw xmm0,xmm6 ; xmm0=data4=(40 41 42 43 44 45 46 47) - - movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 01 11 02 12 03 13) - movdqa xmm2, XMMWORD [wk(1)] ; xmm2=(04 14 05 15 06 16 07 17) - - movdqa xmm4,xmm3 ; transpose coefficients(phase 1) - punpcklwd xmm3,xmm1 ; xmm3=(20 30 21 31 22 32 23 33) - punpckhwd xmm4,xmm1 ; xmm4=(24 34 25 35 26 36 27 37) - movdqa xmm6,xmm0 ; transpose coefficients(phase 1) - punpcklwd xmm0,xmm5 ; xmm0=(40 50 41 51 42 52 43 53) - punpckhwd xmm6,xmm5 ; xmm6=(44 54 45 55 46 56 47 57) - - movdqa xmm1,xmm7 ; transpose coefficients(phase 2) - punpckldq xmm7,xmm3 ; xmm7=(00 10 20 30 01 11 21 31) - punpckhdq xmm1,xmm3 ; xmm1=(02 12 22 32 03 13 23 33) - movdqa xmm5,xmm2 ; transpose coefficients(phase 2) - punpckldq xmm2,xmm4 ; xmm2=(04 14 24 34 05 15 25 35) - punpckhdq xmm5,xmm4 ; xmm5=(06 16 26 36 07 17 27 37) - - movdqa xmm3, XMMWORD [wk(4)] ; xmm3=(60 70 61 71 62 72 63 73) - movdqa xmm4, XMMWORD [wk(5)] ; xmm4=(64 74 65 75 66 76 67 77) - - movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=(04 14 24 34 05 15 25 35) - movdqa XMMWORD [wk(7)], xmm5 ; wk(7)=(06 16 26 36 07 17 27 37) - - movdqa xmm2,xmm0 ; transpose coefficients(phase 2) - punpckldq xmm0,xmm3 ; xmm0=(40 50 60 70 41 51 61 71) - punpckhdq xmm2,xmm3 ; xmm2=(42 52 62 72 43 53 63 73) - movdqa xmm5,xmm6 ; transpose coefficients(phase 2) - punpckldq xmm6,xmm4 ; xmm6=(44 54 64 74 45 55 65 75) - punpckhdq xmm5,xmm4 ; xmm5=(46 56 66 76 47 57 67 77) - - movdqa xmm3,xmm7 ; transpose coefficients(phase 3) - punpcklqdq xmm7,xmm0 ; xmm7=col0=(00 10 20 30 40 50 60 70) - punpckhqdq xmm3,xmm0 ; xmm3=col1=(01 11 21 31 41 51 61 71) - movdqa xmm4,xmm1 ; transpose coefficients(phase 3) - punpcklqdq xmm1,xmm2 ; xmm1=col2=(02 12 22 32 42 52 62 72) - punpckhqdq xmm4,xmm2 ; xmm4=col3=(03 13 23 33 43 53 63 73) - - movdqa xmm0, XMMWORD [wk(6)] ; xmm0=(04 14 24 34 05 15 25 35) - movdqa xmm2, XMMWORD [wk(7)] ; xmm2=(06 16 26 36 07 17 27 37) - - movdqa XMMWORD [wk(8)], xmm3 ; wk(8)=col1 - movdqa XMMWORD [wk(9)], xmm4 ; wk(9)=col3 - - movdqa xmm3,xmm0 ; transpose coefficients(phase 3) - punpcklqdq xmm0,xmm6 ; xmm0=col4=(04 14 24 34 44 54 64 74) - punpckhqdq xmm3,xmm6 ; xmm3=col5=(05 15 25 35 45 55 65 75) - movdqa xmm4,xmm2 ; transpose coefficients(phase 3) - punpcklqdq xmm2,xmm5 ; xmm2=col6=(06 16 26 36 46 56 66 76) - punpckhqdq xmm4,xmm5 ; xmm4=col7=(07 17 27 37 47 57 67 77) - - movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 - movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 -.column_end: - - ; -- Prefetch the next coefficient block - - prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] - prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] - prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] - prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] - - ; ---- Pass 2: process rows from work array, store into output array. - - mov rax, [original_rbp] - mov rdi, r12 ; (JSAMPROW *) - mov eax, r13d - - ; -- Even part - - ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6 - - ; (Original) - ; z1 = (z2 + z3) * 0.541196100; - ; tmp2 = z1 + z3 * -1.847759065; - ; tmp3 = z1 + z2 * 0.765366865; - ; - ; (This implementation) - ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); - ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; - - movdqa xmm6,xmm1 ; xmm1=in2=z2 - movdqa xmm5,xmm1 - punpcklwd xmm6,xmm2 ; xmm2=in6=z3 - punpckhwd xmm5,xmm2 - movdqa xmm1,xmm6 - movdqa xmm2,xmm5 - pmaddwd xmm6,[rel PW_F130_F054] ; xmm6=tmp3L - pmaddwd xmm5,[rel PW_F130_F054] ; xmm5=tmp3H - pmaddwd xmm1,[rel PW_F054_MF130] ; xmm1=tmp2L - pmaddwd xmm2,[rel PW_F054_MF130] ; xmm2=tmp2H - - movdqa xmm3,xmm7 - paddw xmm7,xmm0 ; xmm7=in0+in4 - psubw xmm3,xmm0 ; xmm3=in0-in4 - - pxor xmm4,xmm4 - pxor xmm0,xmm0 - punpcklwd xmm4,xmm7 ; xmm4=tmp0L - punpckhwd xmm0,xmm7 ; xmm0=tmp0H - psrad xmm4,(16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS - psrad xmm0,(16-CONST_BITS) ; psrad xmm0,16 & pslld xmm0,CONST_BITS - - movdqa xmm7,xmm4 - paddd xmm4,xmm6 ; xmm4=tmp10L - psubd xmm7,xmm6 ; xmm7=tmp13L - movdqa xmm6,xmm0 - paddd xmm0,xmm5 ; xmm0=tmp10H - psubd xmm6,xmm5 ; xmm6=tmp13H - - movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=tmp10L - movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp10H - movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=tmp13L - movdqa XMMWORD [wk(3)], xmm6 ; wk(3)=tmp13H - - pxor xmm5,xmm5 - pxor xmm4,xmm4 - punpcklwd xmm5,xmm3 ; xmm5=tmp1L - punpckhwd xmm4,xmm3 ; xmm4=tmp1H - psrad xmm5,(16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS - psrad xmm4,(16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS - - movdqa xmm0,xmm5 - paddd xmm5,xmm1 ; xmm5=tmp11L - psubd xmm0,xmm1 ; xmm0=tmp12L - movdqa xmm7,xmm4 - paddd xmm4,xmm2 ; xmm4=tmp11H - psubd xmm7,xmm2 ; xmm7=tmp12H - - movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L - movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=tmp11H - movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=tmp12L - movdqa XMMWORD [wk(7)], xmm7 ; wk(7)=tmp12H - - ; -- Odd part - - movdqa xmm6, XMMWORD [wk(9)] ; xmm6=col3 - movdqa xmm3, XMMWORD [wk(8)] ; xmm3=col1 - movdqa xmm1, XMMWORD [wk(11)] ; xmm1=col7 - movdqa xmm2, XMMWORD [wk(10)] ; xmm2=col5 - - movdqa xmm5,xmm6 - movdqa xmm4,xmm3 - paddw xmm5,xmm1 ; xmm5=z3 - paddw xmm4,xmm2 ; xmm4=z4 - - ; (Original) - ; z5 = (z3 + z4) * 1.175875602; - ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; - ; z3 += z5; z4 += z5; - ; - ; (This implementation) - ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; - ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); - - movdqa xmm0,xmm5 - movdqa xmm7,xmm5 - punpcklwd xmm0,xmm4 - punpckhwd xmm7,xmm4 - movdqa xmm5,xmm0 - movdqa xmm4,xmm7 - pmaddwd xmm0,[rel PW_MF078_F117] ; xmm0=z3L - pmaddwd xmm7,[rel PW_MF078_F117] ; xmm7=z3H - pmaddwd xmm5,[rel PW_F117_F078] ; xmm5=z4L - pmaddwd xmm4,[rel PW_F117_F078] ; xmm4=z4H - - movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=z3L - movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=z3H - - ; (Original) - ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; - ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; - ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; - ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; - ; tmp0 += z1 + z3; tmp1 += z2 + z4; - ; tmp2 += z2 + z3; tmp3 += z1 + z4; - ; - ; (This implementation) - ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; - ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; - ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); - ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); - ; tmp0 += z3; tmp1 += z4; - ; tmp2 += z3; tmp3 += z4; - - movdqa xmm0,xmm1 - movdqa xmm7,xmm1 - punpcklwd xmm0,xmm3 - punpckhwd xmm7,xmm3 - movdqa xmm1,xmm0 - movdqa xmm3,xmm7 - pmaddwd xmm0,[rel PW_MF060_MF089] ; xmm0=tmp0L - pmaddwd xmm7,[rel PW_MF060_MF089] ; xmm7=tmp0H - pmaddwd xmm1,[rel PW_MF089_F060] ; xmm1=tmp3L - pmaddwd xmm3,[rel PW_MF089_F060] ; xmm3=tmp3H - - paddd xmm0, XMMWORD [wk(10)] ; xmm0=tmp0L - paddd xmm7, XMMWORD [wk(11)] ; xmm7=tmp0H - paddd xmm1,xmm5 ; xmm1=tmp3L - paddd xmm3,xmm4 ; xmm3=tmp3H - - movdqa XMMWORD [wk(8)], xmm0 ; wk(8)=tmp0L - movdqa XMMWORD [wk(9)], xmm7 ; wk(9)=tmp0H - - movdqa xmm0,xmm2 - movdqa xmm7,xmm2 - punpcklwd xmm0,xmm6 - punpckhwd xmm7,xmm6 - movdqa xmm2,xmm0 - movdqa xmm6,xmm7 - pmaddwd xmm0,[rel PW_MF050_MF256] ; xmm0=tmp1L - pmaddwd xmm7,[rel PW_MF050_MF256] ; xmm7=tmp1H - pmaddwd xmm2,[rel PW_MF256_F050] ; xmm2=tmp2L - pmaddwd xmm6,[rel PW_MF256_F050] ; xmm6=tmp2H - - paddd xmm0,xmm5 ; xmm0=tmp1L - paddd xmm7,xmm4 ; xmm7=tmp1H - paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp2L - paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H - - movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=tmp1L - movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=tmp1H - - ; -- Final output stage - - movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L - movdqa xmm4, XMMWORD [wk(1)] ; xmm4=tmp10H - - movdqa xmm0,xmm5 - movdqa xmm7,xmm4 - paddd xmm5,xmm1 ; xmm5=data0L - paddd xmm4,xmm3 ; xmm4=data0H - psubd xmm0,xmm1 ; xmm0=data7L - psubd xmm7,xmm3 ; xmm7=data7H - - movdqa xmm1,[rel PD_DESCALE_P2] ; xmm1=[rel PD_DESCALE_P2] - - paddd xmm5,xmm1 - paddd xmm4,xmm1 - psrad xmm5,DESCALE_P2 - psrad xmm4,DESCALE_P2 - paddd xmm0,xmm1 - paddd xmm7,xmm1 - psrad xmm0,DESCALE_P2 - psrad xmm7,DESCALE_P2 - - packssdw xmm5,xmm4 ; xmm5=data0=(00 10 20 30 40 50 60 70) - packssdw xmm0,xmm7 ; xmm0=data7=(07 17 27 37 47 57 67 77) - - movdqa xmm3, XMMWORD [wk(4)] ; xmm3=tmp11L - movdqa xmm1, XMMWORD [wk(5)] ; xmm1=tmp11H - - movdqa xmm4,xmm3 - movdqa xmm7,xmm1 - paddd xmm3,xmm2 ; xmm3=data1L - paddd xmm1,xmm6 ; xmm1=data1H - psubd xmm4,xmm2 ; xmm4=data6L - psubd xmm7,xmm6 ; xmm7=data6H - - movdqa xmm2,[rel PD_DESCALE_P2] ; xmm2=[rel PD_DESCALE_P2] - - paddd xmm3,xmm2 - paddd xmm1,xmm2 - psrad xmm3,DESCALE_P2 - psrad xmm1,DESCALE_P2 - paddd xmm4,xmm2 - paddd xmm7,xmm2 - psrad xmm4,DESCALE_P2 - psrad xmm7,DESCALE_P2 - - packssdw xmm3,xmm1 ; xmm3=data1=(01 11 21 31 41 51 61 71) - packssdw xmm4,xmm7 ; xmm4=data6=(06 16 26 36 46 56 66 76) - - packsswb xmm5,xmm4 ; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) - packsswb xmm3,xmm0 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) - - movdqa xmm6, XMMWORD [wk(6)] ; xmm6=tmp12L - movdqa xmm2, XMMWORD [wk(7)] ; xmm2=tmp12H - movdqa xmm1, XMMWORD [wk(10)] ; xmm1=tmp1L - movdqa xmm7, XMMWORD [wk(11)] ; xmm7=tmp1H - - movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) - movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) - - movdqa xmm4,xmm6 - movdqa xmm0,xmm2 - paddd xmm6,xmm1 ; xmm6=data2L - paddd xmm2,xmm7 ; xmm2=data2H - psubd xmm4,xmm1 ; xmm4=data5L - psubd xmm0,xmm7 ; xmm0=data5H - - movdqa xmm5,[rel PD_DESCALE_P2] ; xmm5=[rel PD_DESCALE_P2] - - paddd xmm6,xmm5 - paddd xmm2,xmm5 - psrad xmm6,DESCALE_P2 - psrad xmm2,DESCALE_P2 - paddd xmm4,xmm5 - paddd xmm0,xmm5 - psrad xmm4,DESCALE_P2 - psrad xmm0,DESCALE_P2 - - packssdw xmm6,xmm2 ; xmm6=data2=(02 12 22 32 42 52 62 72) - packssdw xmm4,xmm0 ; xmm4=data5=(05 15 25 35 45 55 65 75) - - movdqa xmm3, XMMWORD [wk(2)] ; xmm3=tmp13L - movdqa xmm1, XMMWORD [wk(3)] ; xmm1=tmp13H - movdqa xmm7, XMMWORD [wk(8)] ; xmm7=tmp0L - movdqa xmm5, XMMWORD [wk(9)] ; xmm5=tmp0H - - movdqa xmm2,xmm3 - movdqa xmm0,xmm1 - paddd xmm3,xmm7 ; xmm3=data3L - paddd xmm1,xmm5 ; xmm1=data3H - psubd xmm2,xmm7 ; xmm2=data4L - psubd xmm0,xmm5 ; xmm0=data4H - - movdqa xmm7,[rel PD_DESCALE_P2] ; xmm7=[rel PD_DESCALE_P2] - - paddd xmm3,xmm7 - paddd xmm1,xmm7 - psrad xmm3,DESCALE_P2 - psrad xmm1,DESCALE_P2 - paddd xmm2,xmm7 - paddd xmm0,xmm7 - psrad xmm2,DESCALE_P2 - psrad xmm0,DESCALE_P2 - - movdqa xmm5,[rel PB_CENTERJSAMP] ; xmm5=[rel PB_CENTERJSAMP] - - packssdw xmm3,xmm1 ; xmm3=data3=(03 13 23 33 43 53 63 73) - packssdw xmm2,xmm0 ; xmm2=data4=(04 14 24 34 44 54 64 74) - - movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) - movdqa xmm1, XMMWORD [wk(1)] ; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) - - packsswb xmm6,xmm2 ; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) - packsswb xmm3,xmm4 ; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) - - paddb xmm7,xmm5 - paddb xmm1,xmm5 - paddb xmm6,xmm5 - paddb xmm3,xmm5 - - movdqa xmm0,xmm7 ; transpose coefficients(phase 1) - punpcklbw xmm7,xmm1 ; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) - punpckhbw xmm0,xmm1 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) - movdqa xmm2,xmm6 ; transpose coefficients(phase 1) - punpcklbw xmm6,xmm3 ; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) - punpckhbw xmm2,xmm3 ; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) - - movdqa xmm4,xmm7 ; transpose coefficients(phase 2) - punpcklwd xmm7,xmm6 ; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) - punpckhwd xmm4,xmm6 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) - movdqa xmm5,xmm2 ; transpose coefficients(phase 2) - punpcklwd xmm2,xmm0 ; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) - punpckhwd xmm5,xmm0 ; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) - - movdqa xmm1,xmm7 ; transpose coefficients(phase 3) - punpckldq xmm7,xmm2 ; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) - punpckhdq xmm1,xmm2 ; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) - movdqa xmm3,xmm4 ; transpose coefficients(phase 3) - punpckldq xmm4,xmm5 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) - punpckhdq xmm3,xmm5 ; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) - - pshufd xmm6,xmm7,0x4E ; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) - pshufd xmm0,xmm1,0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) - pshufd xmm2,xmm4,0x4E ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) - pshufd xmm5,xmm3,0x4E ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) - - mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] - mov rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW] - movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm7 - movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm1 - mov rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW] - mov rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW] - movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4 - movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3 - - mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] - mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW] - movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6 - movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0 - mov rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW] - mov rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW] - movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2 - movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5 - - uncollect_args - mov rsp,rbp ; rsp <- aligned rbp - pop rsp ; rsp <- original rbp - pop rbp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jidctint-sse2.asm b/simd/jidctint-sse2.asm deleted file mode 100644 index 6c7e7d9..0000000 --- a/simd/jidctint-sse2.asm +++ /dev/null @@ -1,858 +0,0 @@ -; -; jidctint.asm - accurate integer IDCT (SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains a slow-but-accurate integer implementation of the -; inverse DCT (Discrete Cosine Transform). The following code is based -; directly on the IJG's original jidctint.c; see the jidctint.c for -; more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%define CONST_BITS 13 -%define PASS1_BITS 2 - -%define DESCALE_P1 (CONST_BITS-PASS1_BITS) -%define DESCALE_P2 (CONST_BITS+PASS1_BITS+3) - -%if CONST_BITS == 13 -F_0_298 equ 2446 ; FIX(0.298631336) -F_0_390 equ 3196 ; FIX(0.390180644) -F_0_541 equ 4433 ; FIX(0.541196100) -F_0_765 equ 6270 ; FIX(0.765366865) -F_0_899 equ 7373 ; FIX(0.899976223) -F_1_175 equ 9633 ; FIX(1.175875602) -F_1_501 equ 12299 ; FIX(1.501321110) -F_1_847 equ 15137 ; FIX(1.847759065) -F_1_961 equ 16069 ; FIX(1.961570560) -F_2_053 equ 16819 ; FIX(2.053119869) -F_2_562 equ 20995 ; FIX(2.562915447) -F_3_072 equ 25172 ; FIX(3.072711026) -%else -; NASM cannot do compile-time arithmetic on floating-point constants. -%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) -F_0_298 equ DESCALE( 320652955,30-CONST_BITS) ; FIX(0.298631336) -F_0_390 equ DESCALE( 418953276,30-CONST_BITS) ; FIX(0.390180644) -F_0_541 equ DESCALE( 581104887,30-CONST_BITS) ; FIX(0.541196100) -F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) -F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) -F_1_175 equ DESCALE(1262586813,30-CONST_BITS) ; FIX(1.175875602) -F_1_501 equ DESCALE(1612031267,30-CONST_BITS) ; FIX(1.501321110) -F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) -F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) -F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) -F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) -F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) -%endif - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_idct_islow_sse2) - -EXTN(jconst_idct_islow_sse2): - -PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 -PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) -PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 -PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) -PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 -PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) -PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 -PW_MF256_F050 times 4 dw -F_2_562, (F_3_072-F_2_562) -PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1-1) -PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2-1) -PB_CENTERJSAMP times 16 db CENTERJSAMPLE - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Perform dequantization and inverse DCT on one block of coefficients. -; -; GLOBAL(void) -; jsimd_idct_islow_sse2 (void *dct_table, JCOEFPTR coef_block, -; JSAMPARRAY output_buf, JDIMENSION output_col) -; - -%define dct_table(b) (b)+8 ; jpeg_component_info *compptr -%define coef_block(b) (b)+12 ; JCOEFPTR coef_block -%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf -%define output_col(b) (b)+20 ; JDIMENSION output_col - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 12 - - align 16 - global EXTN(jsimd_idct_islow_sse2) - -EXTN(jsimd_idct_islow_sse2): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic ebx -; push ecx ; unused -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process columns from input. - -; mov eax, [original_ebp] - mov edx, POINTER [dct_table(eax)] ; quantptr - mov esi, JCOEFPTR [coef_block(eax)] ; inptr - -%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2 - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] - jnz near .columnDCT - - movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] - por xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] - por xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] - por xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] - por xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] - por xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] - por xmm1,xmm0 - packsswb xmm1,xmm1 - packsswb xmm1,xmm1 - movd eax,xmm1 - test eax,eax - jnz short .columnDCT - - ; -- AC terms all zero - - movdqa xmm5, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] - pmullw xmm5, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - - psllw xmm5,PASS1_BITS - - movdqa xmm4,xmm5 ; xmm5=in0=(00 01 02 03 04 05 06 07) - punpcklwd xmm5,xmm5 ; xmm5=(00 00 01 01 02 02 03 03) - punpckhwd xmm4,xmm4 ; xmm4=(04 04 05 05 06 06 07 07) - - pshufd xmm7,xmm5,0x00 ; xmm7=col0=(00 00 00 00 00 00 00 00) - pshufd xmm6,xmm5,0x55 ; xmm6=col1=(01 01 01 01 01 01 01 01) - pshufd xmm1,xmm5,0xAA ; xmm1=col2=(02 02 02 02 02 02 02 02) - pshufd xmm5,xmm5,0xFF ; xmm5=col3=(03 03 03 03 03 03 03 03) - pshufd xmm0,xmm4,0x00 ; xmm0=col4=(04 04 04 04 04 04 04 04) - pshufd xmm3,xmm4,0x55 ; xmm3=col5=(05 05 05 05 05 05 05 05) - pshufd xmm2,xmm4,0xAA ; xmm2=col6=(06 06 06 06 06 06 06 06) - pshufd xmm4,xmm4,0xFF ; xmm4=col7=(07 07 07 07 07 07 07 07) - - movdqa XMMWORD [wk(8)], xmm6 ; wk(8)=col1 - movdqa XMMWORD [wk(9)], xmm5 ; wk(9)=col3 - movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 - movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 - jmp near .column_end - alignx 16,7 -%endif -.columnDCT: - - ; -- Even part - - movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] - movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] - pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - movdqa xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)] - movdqa xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] - pmullw xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - - ; (Original) - ; z1 = (z2 + z3) * 0.541196100; - ; tmp2 = z1 + z3 * -1.847759065; - ; tmp3 = z1 + z2 * 0.765366865; - ; - ; (This implementation) - ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); - ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; - - movdqa xmm4,xmm1 ; xmm1=in2=z2 - movdqa xmm5,xmm1 - punpcklwd xmm4,xmm3 ; xmm3=in6=z3 - punpckhwd xmm5,xmm3 - movdqa xmm1,xmm4 - movdqa xmm3,xmm5 - pmaddwd xmm4,[GOTOFF(ebx,PW_F130_F054)] ; xmm4=tmp3L - pmaddwd xmm5,[GOTOFF(ebx,PW_F130_F054)] ; xmm5=tmp3H - pmaddwd xmm1,[GOTOFF(ebx,PW_F054_MF130)] ; xmm1=tmp2L - pmaddwd xmm3,[GOTOFF(ebx,PW_F054_MF130)] ; xmm3=tmp2H - - movdqa xmm6,xmm0 - paddw xmm0,xmm2 ; xmm0=in0+in4 - psubw xmm6,xmm2 ; xmm6=in0-in4 - - pxor xmm7,xmm7 - pxor xmm2,xmm2 - punpcklwd xmm7,xmm0 ; xmm7=tmp0L - punpckhwd xmm2,xmm0 ; xmm2=tmp0H - psrad xmm7,(16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS - psrad xmm2,(16-CONST_BITS) ; psrad xmm2,16 & pslld xmm2,CONST_BITS - - movdqa xmm0,xmm7 - paddd xmm7,xmm4 ; xmm7=tmp10L - psubd xmm0,xmm4 ; xmm0=tmp13L - movdqa xmm4,xmm2 - paddd xmm2,xmm5 ; xmm2=tmp10H - psubd xmm4,xmm5 ; xmm4=tmp13H - - movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=tmp10L - movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=tmp10H - movdqa XMMWORD [wk(2)], xmm0 ; wk(2)=tmp13L - movdqa XMMWORD [wk(3)], xmm4 ; wk(3)=tmp13H - - pxor xmm5,xmm5 - pxor xmm7,xmm7 - punpcklwd xmm5,xmm6 ; xmm5=tmp1L - punpckhwd xmm7,xmm6 ; xmm7=tmp1H - psrad xmm5,(16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS - psrad xmm7,(16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS - - movdqa xmm2,xmm5 - paddd xmm5,xmm1 ; xmm5=tmp11L - psubd xmm2,xmm1 ; xmm2=tmp12L - movdqa xmm0,xmm7 - paddd xmm7,xmm3 ; xmm7=tmp11H - psubd xmm0,xmm3 ; xmm0=tmp12H - - movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L - movdqa XMMWORD [wk(5)], xmm7 ; wk(5)=tmp11H - movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=tmp12L - movdqa XMMWORD [wk(7)], xmm0 ; wk(7)=tmp12H - - ; -- Odd part - - movdqa xmm4, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movdqa xmm6, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] - pmullw xmm4, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm6, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - movdqa xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] - movdqa xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] - pmullw xmm1, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - - movdqa xmm5,xmm6 - movdqa xmm7,xmm4 - paddw xmm5,xmm3 ; xmm5=z3 - paddw xmm7,xmm1 ; xmm7=z4 - - ; (Original) - ; z5 = (z3 + z4) * 1.175875602; - ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; - ; z3 += z5; z4 += z5; - ; - ; (This implementation) - ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; - ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); - - movdqa xmm2,xmm5 - movdqa xmm0,xmm5 - punpcklwd xmm2,xmm7 - punpckhwd xmm0,xmm7 - movdqa xmm5,xmm2 - movdqa xmm7,xmm0 - pmaddwd xmm2,[GOTOFF(ebx,PW_MF078_F117)] ; xmm2=z3L - pmaddwd xmm0,[GOTOFF(ebx,PW_MF078_F117)] ; xmm0=z3H - pmaddwd xmm5,[GOTOFF(ebx,PW_F117_F078)] ; xmm5=z4L - pmaddwd xmm7,[GOTOFF(ebx,PW_F117_F078)] ; xmm7=z4H - - movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=z3L - movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=z3H - - ; (Original) - ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; - ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; - ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; - ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; - ; tmp0 += z1 + z3; tmp1 += z2 + z4; - ; tmp2 += z2 + z3; tmp3 += z1 + z4; - ; - ; (This implementation) - ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; - ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; - ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); - ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); - ; tmp0 += z3; tmp1 += z4; - ; tmp2 += z3; tmp3 += z4; - - movdqa xmm2,xmm3 - movdqa xmm0,xmm3 - punpcklwd xmm2,xmm4 - punpckhwd xmm0,xmm4 - movdqa xmm3,xmm2 - movdqa xmm4,xmm0 - pmaddwd xmm2,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm2=tmp0L - pmaddwd xmm0,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm0=tmp0H - pmaddwd xmm3,[GOTOFF(ebx,PW_MF089_F060)] ; xmm3=tmp3L - pmaddwd xmm4,[GOTOFF(ebx,PW_MF089_F060)] ; xmm4=tmp3H - - paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp0L - paddd xmm0, XMMWORD [wk(11)] ; xmm0=tmp0H - paddd xmm3,xmm5 ; xmm3=tmp3L - paddd xmm4,xmm7 ; xmm4=tmp3H - - movdqa XMMWORD [wk(8)], xmm2 ; wk(8)=tmp0L - movdqa XMMWORD [wk(9)], xmm0 ; wk(9)=tmp0H - - movdqa xmm2,xmm1 - movdqa xmm0,xmm1 - punpcklwd xmm2,xmm6 - punpckhwd xmm0,xmm6 - movdqa xmm1,xmm2 - movdqa xmm6,xmm0 - pmaddwd xmm2,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm2=tmp1L - pmaddwd xmm0,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm0=tmp1H - pmaddwd xmm1,[GOTOFF(ebx,PW_MF256_F050)] ; xmm1=tmp2L - pmaddwd xmm6,[GOTOFF(ebx,PW_MF256_F050)] ; xmm6=tmp2H - - paddd xmm2,xmm5 ; xmm2=tmp1L - paddd xmm0,xmm7 ; xmm0=tmp1H - paddd xmm1, XMMWORD [wk(10)] ; xmm1=tmp2L - paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H - - movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=tmp1L - movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=tmp1H - - ; -- Final output stage - - movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L - movdqa xmm7, XMMWORD [wk(1)] ; xmm7=tmp10H - - movdqa xmm2,xmm5 - movdqa xmm0,xmm7 - paddd xmm5,xmm3 ; xmm5=data0L - paddd xmm7,xmm4 ; xmm7=data0H - psubd xmm2,xmm3 ; xmm2=data7L - psubd xmm0,xmm4 ; xmm0=data7H - - movdqa xmm3,[GOTOFF(ebx,PD_DESCALE_P1)] ; xmm3=[PD_DESCALE_P1] - - paddd xmm5,xmm3 - paddd xmm7,xmm3 - psrad xmm5,DESCALE_P1 - psrad xmm7,DESCALE_P1 - paddd xmm2,xmm3 - paddd xmm0,xmm3 - psrad xmm2,DESCALE_P1 - psrad xmm0,DESCALE_P1 - - packssdw xmm5,xmm7 ; xmm5=data0=(00 01 02 03 04 05 06 07) - packssdw xmm2,xmm0 ; xmm2=data7=(70 71 72 73 74 75 76 77) - - movdqa xmm4, XMMWORD [wk(4)] ; xmm4=tmp11L - movdqa xmm3, XMMWORD [wk(5)] ; xmm3=tmp11H - - movdqa xmm7,xmm4 - movdqa xmm0,xmm3 - paddd xmm4,xmm1 ; xmm4=data1L - paddd xmm3,xmm6 ; xmm3=data1H - psubd xmm7,xmm1 ; xmm7=data6L - psubd xmm0,xmm6 ; xmm0=data6H - - movdqa xmm1,[GOTOFF(ebx,PD_DESCALE_P1)] ; xmm1=[PD_DESCALE_P1] - - paddd xmm4,xmm1 - paddd xmm3,xmm1 - psrad xmm4,DESCALE_P1 - psrad xmm3,DESCALE_P1 - paddd xmm7,xmm1 - paddd xmm0,xmm1 - psrad xmm7,DESCALE_P1 - psrad xmm0,DESCALE_P1 - - packssdw xmm4,xmm3 ; xmm4=data1=(10 11 12 13 14 15 16 17) - packssdw xmm7,xmm0 ; xmm7=data6=(60 61 62 63 64 65 66 67) - - movdqa xmm6,xmm5 ; transpose coefficients(phase 1) - punpcklwd xmm5,xmm4 ; xmm5=(00 10 01 11 02 12 03 13) - punpckhwd xmm6,xmm4 ; xmm6=(04 14 05 15 06 16 07 17) - movdqa xmm1,xmm7 ; transpose coefficients(phase 1) - punpcklwd xmm7,xmm2 ; xmm7=(60 70 61 71 62 72 63 73) - punpckhwd xmm1,xmm2 ; xmm1=(64 74 65 75 66 76 67 77) - - movdqa xmm3, XMMWORD [wk(6)] ; xmm3=tmp12L - movdqa xmm0, XMMWORD [wk(7)] ; xmm0=tmp12H - movdqa xmm4, XMMWORD [wk(10)] ; xmm4=tmp1L - movdqa xmm2, XMMWORD [wk(11)] ; xmm2=tmp1H - - movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 01 11 02 12 03 13) - movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=(04 14 05 15 06 16 07 17) - movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=(60 70 61 71 62 72 63 73) - movdqa XMMWORD [wk(5)], xmm1 ; wk(5)=(64 74 65 75 66 76 67 77) - - movdqa xmm5,xmm3 - movdqa xmm6,xmm0 - paddd xmm3,xmm4 ; xmm3=data2L - paddd xmm0,xmm2 ; xmm0=data2H - psubd xmm5,xmm4 ; xmm5=data5L - psubd xmm6,xmm2 ; xmm6=data5H - - movdqa xmm7,[GOTOFF(ebx,PD_DESCALE_P1)] ; xmm7=[PD_DESCALE_P1] - - paddd xmm3,xmm7 - paddd xmm0,xmm7 - psrad xmm3,DESCALE_P1 - psrad xmm0,DESCALE_P1 - paddd xmm5,xmm7 - paddd xmm6,xmm7 - psrad xmm5,DESCALE_P1 - psrad xmm6,DESCALE_P1 - - packssdw xmm3,xmm0 ; xmm3=data2=(20 21 22 23 24 25 26 27) - packssdw xmm5,xmm6 ; xmm5=data5=(50 51 52 53 54 55 56 57) - - movdqa xmm1, XMMWORD [wk(2)] ; xmm1=tmp13L - movdqa xmm4, XMMWORD [wk(3)] ; xmm4=tmp13H - movdqa xmm2, XMMWORD [wk(8)] ; xmm2=tmp0L - movdqa xmm7, XMMWORD [wk(9)] ; xmm7=tmp0H - - movdqa xmm0,xmm1 - movdqa xmm6,xmm4 - paddd xmm1,xmm2 ; xmm1=data3L - paddd xmm4,xmm7 ; xmm4=data3H - psubd xmm0,xmm2 ; xmm0=data4L - psubd xmm6,xmm7 ; xmm6=data4H - - movdqa xmm2,[GOTOFF(ebx,PD_DESCALE_P1)] ; xmm2=[PD_DESCALE_P1] - - paddd xmm1,xmm2 - paddd xmm4,xmm2 - psrad xmm1,DESCALE_P1 - psrad xmm4,DESCALE_P1 - paddd xmm0,xmm2 - paddd xmm6,xmm2 - psrad xmm0,DESCALE_P1 - psrad xmm6,DESCALE_P1 - - packssdw xmm1,xmm4 ; xmm1=data3=(30 31 32 33 34 35 36 37) - packssdw xmm0,xmm6 ; xmm0=data4=(40 41 42 43 44 45 46 47) - - movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 01 11 02 12 03 13) - movdqa xmm2, XMMWORD [wk(1)] ; xmm2=(04 14 05 15 06 16 07 17) - - movdqa xmm4,xmm3 ; transpose coefficients(phase 1) - punpcklwd xmm3,xmm1 ; xmm3=(20 30 21 31 22 32 23 33) - punpckhwd xmm4,xmm1 ; xmm4=(24 34 25 35 26 36 27 37) - movdqa xmm6,xmm0 ; transpose coefficients(phase 1) - punpcklwd xmm0,xmm5 ; xmm0=(40 50 41 51 42 52 43 53) - punpckhwd xmm6,xmm5 ; xmm6=(44 54 45 55 46 56 47 57) - - movdqa xmm1,xmm7 ; transpose coefficients(phase 2) - punpckldq xmm7,xmm3 ; xmm7=(00 10 20 30 01 11 21 31) - punpckhdq xmm1,xmm3 ; xmm1=(02 12 22 32 03 13 23 33) - movdqa xmm5,xmm2 ; transpose coefficients(phase 2) - punpckldq xmm2,xmm4 ; xmm2=(04 14 24 34 05 15 25 35) - punpckhdq xmm5,xmm4 ; xmm5=(06 16 26 36 07 17 27 37) - - movdqa xmm3, XMMWORD [wk(4)] ; xmm3=(60 70 61 71 62 72 63 73) - movdqa xmm4, XMMWORD [wk(5)] ; xmm4=(64 74 65 75 66 76 67 77) - - movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=(04 14 24 34 05 15 25 35) - movdqa XMMWORD [wk(7)], xmm5 ; wk(7)=(06 16 26 36 07 17 27 37) - - movdqa xmm2,xmm0 ; transpose coefficients(phase 2) - punpckldq xmm0,xmm3 ; xmm0=(40 50 60 70 41 51 61 71) - punpckhdq xmm2,xmm3 ; xmm2=(42 52 62 72 43 53 63 73) - movdqa xmm5,xmm6 ; transpose coefficients(phase 2) - punpckldq xmm6,xmm4 ; xmm6=(44 54 64 74 45 55 65 75) - punpckhdq xmm5,xmm4 ; xmm5=(46 56 66 76 47 57 67 77) - - movdqa xmm3,xmm7 ; transpose coefficients(phase 3) - punpcklqdq xmm7,xmm0 ; xmm7=col0=(00 10 20 30 40 50 60 70) - punpckhqdq xmm3,xmm0 ; xmm3=col1=(01 11 21 31 41 51 61 71) - movdqa xmm4,xmm1 ; transpose coefficients(phase 3) - punpcklqdq xmm1,xmm2 ; xmm1=col2=(02 12 22 32 42 52 62 72) - punpckhqdq xmm4,xmm2 ; xmm4=col3=(03 13 23 33 43 53 63 73) - - movdqa xmm0, XMMWORD [wk(6)] ; xmm0=(04 14 24 34 05 15 25 35) - movdqa xmm2, XMMWORD [wk(7)] ; xmm2=(06 16 26 36 07 17 27 37) - - movdqa XMMWORD [wk(8)], xmm3 ; wk(8)=col1 - movdqa XMMWORD [wk(9)], xmm4 ; wk(9)=col3 - - movdqa xmm3,xmm0 ; transpose coefficients(phase 3) - punpcklqdq xmm0,xmm6 ; xmm0=col4=(04 14 24 34 44 54 64 74) - punpckhqdq xmm3,xmm6 ; xmm3=col5=(05 15 25 35 45 55 65 75) - movdqa xmm4,xmm2 ; transpose coefficients(phase 3) - punpcklqdq xmm2,xmm5 ; xmm2=col6=(06 16 26 36 46 56 66 76) - punpckhqdq xmm4,xmm5 ; xmm4=col7=(07 17 27 37 47 57 67 77) - - movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 - movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 -.column_end: - - ; -- Prefetch the next coefficient block - - prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] - prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] - prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] - prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] - - ; ---- Pass 2: process rows from work array, store into output array. - - mov eax, [original_ebp] - mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) - mov eax, JDIMENSION [output_col(eax)] - - ; -- Even part - - ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6 - - ; (Original) - ; z1 = (z2 + z3) * 0.541196100; - ; tmp2 = z1 + z3 * -1.847759065; - ; tmp3 = z1 + z2 * 0.765366865; - ; - ; (This implementation) - ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); - ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; - - movdqa xmm6,xmm1 ; xmm1=in2=z2 - movdqa xmm5,xmm1 - punpcklwd xmm6,xmm2 ; xmm2=in6=z3 - punpckhwd xmm5,xmm2 - movdqa xmm1,xmm6 - movdqa xmm2,xmm5 - pmaddwd xmm6,[GOTOFF(ebx,PW_F130_F054)] ; xmm6=tmp3L - pmaddwd xmm5,[GOTOFF(ebx,PW_F130_F054)] ; xmm5=tmp3H - pmaddwd xmm1,[GOTOFF(ebx,PW_F054_MF130)] ; xmm1=tmp2L - pmaddwd xmm2,[GOTOFF(ebx,PW_F054_MF130)] ; xmm2=tmp2H - - movdqa xmm3,xmm7 - paddw xmm7,xmm0 ; xmm7=in0+in4 - psubw xmm3,xmm0 ; xmm3=in0-in4 - - pxor xmm4,xmm4 - pxor xmm0,xmm0 - punpcklwd xmm4,xmm7 ; xmm4=tmp0L - punpckhwd xmm0,xmm7 ; xmm0=tmp0H - psrad xmm4,(16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS - psrad xmm0,(16-CONST_BITS) ; psrad xmm0,16 & pslld xmm0,CONST_BITS - - movdqa xmm7,xmm4 - paddd xmm4,xmm6 ; xmm4=tmp10L - psubd xmm7,xmm6 ; xmm7=tmp13L - movdqa xmm6,xmm0 - paddd xmm0,xmm5 ; xmm0=tmp10H - psubd xmm6,xmm5 ; xmm6=tmp13H - - movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=tmp10L - movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp10H - movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=tmp13L - movdqa XMMWORD [wk(3)], xmm6 ; wk(3)=tmp13H - - pxor xmm5,xmm5 - pxor xmm4,xmm4 - punpcklwd xmm5,xmm3 ; xmm5=tmp1L - punpckhwd xmm4,xmm3 ; xmm4=tmp1H - psrad xmm5,(16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS - psrad xmm4,(16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS - - movdqa xmm0,xmm5 - paddd xmm5,xmm1 ; xmm5=tmp11L - psubd xmm0,xmm1 ; xmm0=tmp12L - movdqa xmm7,xmm4 - paddd xmm4,xmm2 ; xmm4=tmp11H - psubd xmm7,xmm2 ; xmm7=tmp12H - - movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L - movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=tmp11H - movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=tmp12L - movdqa XMMWORD [wk(7)], xmm7 ; wk(7)=tmp12H - - ; -- Odd part - - movdqa xmm6, XMMWORD [wk(9)] ; xmm6=col3 - movdqa xmm3, XMMWORD [wk(8)] ; xmm3=col1 - movdqa xmm1, XMMWORD [wk(11)] ; xmm1=col7 - movdqa xmm2, XMMWORD [wk(10)] ; xmm2=col5 - - movdqa xmm5,xmm6 - movdqa xmm4,xmm3 - paddw xmm5,xmm1 ; xmm5=z3 - paddw xmm4,xmm2 ; xmm4=z4 - - ; (Original) - ; z5 = (z3 + z4) * 1.175875602; - ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; - ; z3 += z5; z4 += z5; - ; - ; (This implementation) - ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; - ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); - - movdqa xmm0,xmm5 - movdqa xmm7,xmm5 - punpcklwd xmm0,xmm4 - punpckhwd xmm7,xmm4 - movdqa xmm5,xmm0 - movdqa xmm4,xmm7 - pmaddwd xmm0,[GOTOFF(ebx,PW_MF078_F117)] ; xmm0=z3L - pmaddwd xmm7,[GOTOFF(ebx,PW_MF078_F117)] ; xmm7=z3H - pmaddwd xmm5,[GOTOFF(ebx,PW_F117_F078)] ; xmm5=z4L - pmaddwd xmm4,[GOTOFF(ebx,PW_F117_F078)] ; xmm4=z4H - - movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=z3L - movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=z3H - - ; (Original) - ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; - ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; - ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; - ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; - ; tmp0 += z1 + z3; tmp1 += z2 + z4; - ; tmp2 += z2 + z3; tmp3 += z1 + z4; - ; - ; (This implementation) - ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; - ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; - ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); - ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); - ; tmp0 += z3; tmp1 += z4; - ; tmp2 += z3; tmp3 += z4; - - movdqa xmm0,xmm1 - movdqa xmm7,xmm1 - punpcklwd xmm0,xmm3 - punpckhwd xmm7,xmm3 - movdqa xmm1,xmm0 - movdqa xmm3,xmm7 - pmaddwd xmm0,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm0=tmp0L - pmaddwd xmm7,[GOTOFF(ebx,PW_MF060_MF089)] ; xmm7=tmp0H - pmaddwd xmm1,[GOTOFF(ebx,PW_MF089_F060)] ; xmm1=tmp3L - pmaddwd xmm3,[GOTOFF(ebx,PW_MF089_F060)] ; xmm3=tmp3H - - paddd xmm0, XMMWORD [wk(10)] ; xmm0=tmp0L - paddd xmm7, XMMWORD [wk(11)] ; xmm7=tmp0H - paddd xmm1,xmm5 ; xmm1=tmp3L - paddd xmm3,xmm4 ; xmm3=tmp3H - - movdqa XMMWORD [wk(8)], xmm0 ; wk(8)=tmp0L - movdqa XMMWORD [wk(9)], xmm7 ; wk(9)=tmp0H - - movdqa xmm0,xmm2 - movdqa xmm7,xmm2 - punpcklwd xmm0,xmm6 - punpckhwd xmm7,xmm6 - movdqa xmm2,xmm0 - movdqa xmm6,xmm7 - pmaddwd xmm0,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm0=tmp1L - pmaddwd xmm7,[GOTOFF(ebx,PW_MF050_MF256)] ; xmm7=tmp1H - pmaddwd xmm2,[GOTOFF(ebx,PW_MF256_F050)] ; xmm2=tmp2L - pmaddwd xmm6,[GOTOFF(ebx,PW_MF256_F050)] ; xmm6=tmp2H - - paddd xmm0,xmm5 ; xmm0=tmp1L - paddd xmm7,xmm4 ; xmm7=tmp1H - paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp2L - paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H - - movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=tmp1L - movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=tmp1H - - ; -- Final output stage - - movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L - movdqa xmm4, XMMWORD [wk(1)] ; xmm4=tmp10H - - movdqa xmm0,xmm5 - movdqa xmm7,xmm4 - paddd xmm5,xmm1 ; xmm5=data0L - paddd xmm4,xmm3 ; xmm4=data0H - psubd xmm0,xmm1 ; xmm0=data7L - psubd xmm7,xmm3 ; xmm7=data7H - - movdqa xmm1,[GOTOFF(ebx,PD_DESCALE_P2)] ; xmm1=[PD_DESCALE_P2] - - paddd xmm5,xmm1 - paddd xmm4,xmm1 - psrad xmm5,DESCALE_P2 - psrad xmm4,DESCALE_P2 - paddd xmm0,xmm1 - paddd xmm7,xmm1 - psrad xmm0,DESCALE_P2 - psrad xmm7,DESCALE_P2 - - packssdw xmm5,xmm4 ; xmm5=data0=(00 10 20 30 40 50 60 70) - packssdw xmm0,xmm7 ; xmm0=data7=(07 17 27 37 47 57 67 77) - - movdqa xmm3, XMMWORD [wk(4)] ; xmm3=tmp11L - movdqa xmm1, XMMWORD [wk(5)] ; xmm1=tmp11H - - movdqa xmm4,xmm3 - movdqa xmm7,xmm1 - paddd xmm3,xmm2 ; xmm3=data1L - paddd xmm1,xmm6 ; xmm1=data1H - psubd xmm4,xmm2 ; xmm4=data6L - psubd xmm7,xmm6 ; xmm7=data6H - - movdqa xmm2,[GOTOFF(ebx,PD_DESCALE_P2)] ; xmm2=[PD_DESCALE_P2] - - paddd xmm3,xmm2 - paddd xmm1,xmm2 - psrad xmm3,DESCALE_P2 - psrad xmm1,DESCALE_P2 - paddd xmm4,xmm2 - paddd xmm7,xmm2 - psrad xmm4,DESCALE_P2 - psrad xmm7,DESCALE_P2 - - packssdw xmm3,xmm1 ; xmm3=data1=(01 11 21 31 41 51 61 71) - packssdw xmm4,xmm7 ; xmm4=data6=(06 16 26 36 46 56 66 76) - - packsswb xmm5,xmm4 ; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) - packsswb xmm3,xmm0 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) - - movdqa xmm6, XMMWORD [wk(6)] ; xmm6=tmp12L - movdqa xmm2, XMMWORD [wk(7)] ; xmm2=tmp12H - movdqa xmm1, XMMWORD [wk(10)] ; xmm1=tmp1L - movdqa xmm7, XMMWORD [wk(11)] ; xmm7=tmp1H - - movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) - movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) - - movdqa xmm4,xmm6 - movdqa xmm0,xmm2 - paddd xmm6,xmm1 ; xmm6=data2L - paddd xmm2,xmm7 ; xmm2=data2H - psubd xmm4,xmm1 ; xmm4=data5L - psubd xmm0,xmm7 ; xmm0=data5H - - movdqa xmm5,[GOTOFF(ebx,PD_DESCALE_P2)] ; xmm5=[PD_DESCALE_P2] - - paddd xmm6,xmm5 - paddd xmm2,xmm5 - psrad xmm6,DESCALE_P2 - psrad xmm2,DESCALE_P2 - paddd xmm4,xmm5 - paddd xmm0,xmm5 - psrad xmm4,DESCALE_P2 - psrad xmm0,DESCALE_P2 - - packssdw xmm6,xmm2 ; xmm6=data2=(02 12 22 32 42 52 62 72) - packssdw xmm4,xmm0 ; xmm4=data5=(05 15 25 35 45 55 65 75) - - movdqa xmm3, XMMWORD [wk(2)] ; xmm3=tmp13L - movdqa xmm1, XMMWORD [wk(3)] ; xmm1=tmp13H - movdqa xmm7, XMMWORD [wk(8)] ; xmm7=tmp0L - movdqa xmm5, XMMWORD [wk(9)] ; xmm5=tmp0H - - movdqa xmm2,xmm3 - movdqa xmm0,xmm1 - paddd xmm3,xmm7 ; xmm3=data3L - paddd xmm1,xmm5 ; xmm1=data3H - psubd xmm2,xmm7 ; xmm2=data4L - psubd xmm0,xmm5 ; xmm0=data4H - - movdqa xmm7,[GOTOFF(ebx,PD_DESCALE_P2)] ; xmm7=[PD_DESCALE_P2] - - paddd xmm3,xmm7 - paddd xmm1,xmm7 - psrad xmm3,DESCALE_P2 - psrad xmm1,DESCALE_P2 - paddd xmm2,xmm7 - paddd xmm0,xmm7 - psrad xmm2,DESCALE_P2 - psrad xmm0,DESCALE_P2 - - movdqa xmm5,[GOTOFF(ebx,PB_CENTERJSAMP)] ; xmm5=[PB_CENTERJSAMP] - - packssdw xmm3,xmm1 ; xmm3=data3=(03 13 23 33 43 53 63 73) - packssdw xmm2,xmm0 ; xmm2=data4=(04 14 24 34 44 54 64 74) - - movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) - movdqa xmm1, XMMWORD [wk(1)] ; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) - - packsswb xmm6,xmm2 ; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) - packsswb xmm3,xmm4 ; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) - - paddb xmm7,xmm5 - paddb xmm1,xmm5 - paddb xmm6,xmm5 - paddb xmm3,xmm5 - - movdqa xmm0,xmm7 ; transpose coefficients(phase 1) - punpcklbw xmm7,xmm1 ; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) - punpckhbw xmm0,xmm1 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) - movdqa xmm2,xmm6 ; transpose coefficients(phase 1) - punpcklbw xmm6,xmm3 ; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) - punpckhbw xmm2,xmm3 ; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) - - movdqa xmm4,xmm7 ; transpose coefficients(phase 2) - punpcklwd xmm7,xmm6 ; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) - punpckhwd xmm4,xmm6 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) - movdqa xmm5,xmm2 ; transpose coefficients(phase 2) - punpcklwd xmm2,xmm0 ; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) - punpckhwd xmm5,xmm0 ; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) - - movdqa xmm1,xmm7 ; transpose coefficients(phase 3) - punpckldq xmm7,xmm2 ; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) - punpckhdq xmm1,xmm2 ; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) - movdqa xmm3,xmm4 ; transpose coefficients(phase 3) - punpckldq xmm4,xmm5 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) - punpckhdq xmm3,xmm5 ; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) - - pshufd xmm6,xmm7,0x4E ; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) - pshufd xmm0,xmm1,0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) - pshufd xmm2,xmm4,0x4E ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) - pshufd xmm5,xmm3,0x4E ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) - - mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] - mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW] - movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm7 - movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm1 - mov edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW] - mov esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW] - movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4 - movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3 - - mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] - mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] - movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6 - movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0 - mov edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW] - mov esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW] - movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm2 - movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm5 - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; unused - poppic ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jidctred-mmx.asm b/simd/jidctred-mmx.asm deleted file mode 100644 index ba054e3..0000000 --- a/simd/jidctred-mmx.asm +++ /dev/null @@ -1,705 +0,0 @@ -; -; jidctred.asm - reduced-size IDCT (MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains inverse-DCT routines that produce reduced-size -; output: either 4x4 or 2x2 pixels from an 8x8 DCT block. -; The following code is based directly on the IJG's original jidctred.c; -; see the jidctred.c for more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%define CONST_BITS 13 -%define PASS1_BITS 2 - -%define DESCALE_P1_4 (CONST_BITS-PASS1_BITS+1) -%define DESCALE_P2_4 (CONST_BITS+PASS1_BITS+3+1) -%define DESCALE_P1_2 (CONST_BITS-PASS1_BITS+2) -%define DESCALE_P2_2 (CONST_BITS+PASS1_BITS+3+2) - -%if CONST_BITS == 13 -F_0_211 equ 1730 ; FIX(0.211164243) -F_0_509 equ 4176 ; FIX(0.509795579) -F_0_601 equ 4926 ; FIX(0.601344887) -F_0_720 equ 5906 ; FIX(0.720959822) -F_0_765 equ 6270 ; FIX(0.765366865) -F_0_850 equ 6967 ; FIX(0.850430095) -F_0_899 equ 7373 ; FIX(0.899976223) -F_1_061 equ 8697 ; FIX(1.061594337) -F_1_272 equ 10426 ; FIX(1.272758580) -F_1_451 equ 11893 ; FIX(1.451774981) -F_1_847 equ 15137 ; FIX(1.847759065) -F_2_172 equ 17799 ; FIX(2.172734803) -F_2_562 equ 20995 ; FIX(2.562915447) -F_3_624 equ 29692 ; FIX(3.624509785) -%else -; NASM cannot do compile-time arithmetic on floating-point constants. -%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) -F_0_211 equ DESCALE( 226735879,30-CONST_BITS) ; FIX(0.211164243) -F_0_509 equ DESCALE( 547388834,30-CONST_BITS) ; FIX(0.509795579) -F_0_601 equ DESCALE( 645689155,30-CONST_BITS) ; FIX(0.601344887) -F_0_720 equ DESCALE( 774124714,30-CONST_BITS) ; FIX(0.720959822) -F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) -F_0_850 equ DESCALE( 913142361,30-CONST_BITS) ; FIX(0.850430095) -F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) -F_1_061 equ DESCALE(1139878239,30-CONST_BITS) ; FIX(1.061594337) -F_1_272 equ DESCALE(1366614119,30-CONST_BITS) ; FIX(1.272758580) -F_1_451 equ DESCALE(1558831516,30-CONST_BITS) ; FIX(1.451774981) -F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) -F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) -F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) -F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) -%endif - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_idct_red_mmx) - -EXTN(jconst_idct_red_mmx): - -PW_F184_MF076 times 2 dw F_1_847,-F_0_765 -PW_F256_F089 times 2 dw F_2_562, F_0_899 -PW_F106_MF217 times 2 dw F_1_061,-F_2_172 -PW_MF060_MF050 times 2 dw -F_0_601,-F_0_509 -PW_F145_MF021 times 2 dw F_1_451,-F_0_211 -PW_F362_MF127 times 2 dw F_3_624,-F_1_272 -PW_F085_MF072 times 2 dw F_0_850,-F_0_720 -PD_DESCALE_P1_4 times 2 dd 1 << (DESCALE_P1_4-1) -PD_DESCALE_P2_4 times 2 dd 1 << (DESCALE_P2_4-1) -PD_DESCALE_P1_2 times 2 dd 1 << (DESCALE_P1_2-1) -PD_DESCALE_P2_2 times 2 dd 1 << (DESCALE_P2_2-1) -PB_CENTERJSAMP times 8 db CENTERJSAMPLE - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Perform dequantization and inverse DCT on one block of coefficients, -; producing a reduced-size 4x4 output block. -; -; GLOBAL(void) -; jsimd_idct_4x4_mmx (void *dct_table, JCOEFPTR coef_block, -; JSAMPARRAY output_buf, JDIMENSION output_col) -; - -%define dct_table(b) (b)+8 ; void *dct_table -%define coef_block(b) (b)+12 ; JCOEFPTR coef_block -%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf -%define output_col(b) (b)+20 ; JDIMENSION output_col - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] -%define WK_NUM 2 -%define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF - ; JCOEF workspace[DCTSIZE2] - - align 16 - global EXTN(jsimd_idct_4x4_mmx) - -EXTN(jsimd_idct_4x4_mmx): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [workspace] - pushpic ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process columns from input, store into work array. - -; mov eax, [original_ebp] - mov edx, POINTER [dct_table(eax)] ; quantptr - mov esi, JCOEFPTR [coef_block(eax)] ; inptr - lea edi, [workspace] ; JCOEF *wsptr - mov ecx, DCTSIZE/4 ; ctr - alignx 16,7 -.columnloop: -%ifndef NO_ZERO_COLUMN_TEST_4X4_MMX - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] - jnz short .columnDCT - - movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] - por mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] - por mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] - por mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] - por mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] - por mm0,mm1 - packsswb mm0,mm0 - movd eax,mm0 - test eax,eax - jnz short .columnDCT - - ; -- AC terms all zero - - movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] - pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - - psllw mm0,PASS1_BITS - - movq mm2,mm0 ; mm0=in0=(00 01 02 03) - punpcklwd mm0,mm0 ; mm0=(00 00 01 01) - punpckhwd mm2,mm2 ; mm2=(02 02 03 03) - - movq mm1,mm0 - punpckldq mm0,mm0 ; mm0=(00 00 00 00) - punpckhdq mm1,mm1 ; mm1=(01 01 01 01) - movq mm3,mm2 - punpckldq mm2,mm2 ; mm2=(02 02 02 02) - punpckhdq mm3,mm3 ; mm3=(03 03 03 03) - - movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 - movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1 - movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2 - movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 - jmp near .nextcolumn - alignx 16,7 -%endif -.columnDCT: - - ; -- Odd part - - movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] - pmullw mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] - movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] - pmullw mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - - movq mm4,mm0 - movq mm5,mm0 - punpcklwd mm4,mm1 - punpckhwd mm5,mm1 - movq mm0,mm4 - movq mm1,mm5 - pmaddwd mm4,[GOTOFF(ebx,PW_F256_F089)] ; mm4=(tmp2L) - pmaddwd mm5,[GOTOFF(ebx,PW_F256_F089)] ; mm5=(tmp2H) - pmaddwd mm0,[GOTOFF(ebx,PW_F106_MF217)] ; mm0=(tmp0L) - pmaddwd mm1,[GOTOFF(ebx,PW_F106_MF217)] ; mm1=(tmp0H) - - movq mm6,mm2 - movq mm7,mm2 - punpcklwd mm6,mm3 - punpckhwd mm7,mm3 - movq mm2,mm6 - movq mm3,mm7 - pmaddwd mm6,[GOTOFF(ebx,PW_MF060_MF050)] ; mm6=(tmp2L) - pmaddwd mm7,[GOTOFF(ebx,PW_MF060_MF050)] ; mm7=(tmp2H) - pmaddwd mm2,[GOTOFF(ebx,PW_F145_MF021)] ; mm2=(tmp0L) - pmaddwd mm3,[GOTOFF(ebx,PW_F145_MF021)] ; mm3=(tmp0H) - - paddd mm6,mm4 ; mm6=tmp2L - paddd mm7,mm5 ; mm7=tmp2H - paddd mm2,mm0 ; mm2=tmp0L - paddd mm3,mm1 ; mm3=tmp0H - - movq MMWORD [wk(0)], mm2 ; wk(0)=tmp0L - movq MMWORD [wk(1)], mm3 ; wk(1)=tmp0H - - ; -- Even part - - movq mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] - movq mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] - movq mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] - pmullw mm4, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw mm5, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw mm0, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - - pxor mm1,mm1 - pxor mm2,mm2 - punpcklwd mm1,mm4 ; mm1=tmp0L - punpckhwd mm2,mm4 ; mm2=tmp0H - psrad mm1,(16-CONST_BITS-1) ; psrad mm1,16 & pslld mm1,CONST_BITS+1 - psrad mm2,(16-CONST_BITS-1) ; psrad mm2,16 & pslld mm2,CONST_BITS+1 - - movq mm3,mm5 ; mm5=in2=z2 - punpcklwd mm5,mm0 ; mm0=in6=z3 - punpckhwd mm3,mm0 - pmaddwd mm5,[GOTOFF(ebx,PW_F184_MF076)] ; mm5=tmp2L - pmaddwd mm3,[GOTOFF(ebx,PW_F184_MF076)] ; mm3=tmp2H - - movq mm4,mm1 - movq mm0,mm2 - paddd mm1,mm5 ; mm1=tmp10L - paddd mm2,mm3 ; mm2=tmp10H - psubd mm4,mm5 ; mm4=tmp12L - psubd mm0,mm3 ; mm0=tmp12H - - ; -- Final output stage - - movq mm5,mm1 - movq mm3,mm2 - paddd mm1,mm6 ; mm1=data0L - paddd mm2,mm7 ; mm2=data0H - psubd mm5,mm6 ; mm5=data3L - psubd mm3,mm7 ; mm3=data3H - - movq mm6,[GOTOFF(ebx,PD_DESCALE_P1_4)] ; mm6=[PD_DESCALE_P1_4] - - paddd mm1,mm6 - paddd mm2,mm6 - psrad mm1,DESCALE_P1_4 - psrad mm2,DESCALE_P1_4 - paddd mm5,mm6 - paddd mm3,mm6 - psrad mm5,DESCALE_P1_4 - psrad mm3,DESCALE_P1_4 - - packssdw mm1,mm2 ; mm1=data0=(00 01 02 03) - packssdw mm5,mm3 ; mm5=data3=(30 31 32 33) - - movq mm7, MMWORD [wk(0)] ; mm7=tmp0L - movq mm6, MMWORD [wk(1)] ; mm6=tmp0H - - movq mm2,mm4 - movq mm3,mm0 - paddd mm4,mm7 ; mm4=data1L - paddd mm0,mm6 ; mm0=data1H - psubd mm2,mm7 ; mm2=data2L - psubd mm3,mm6 ; mm3=data2H - - movq mm7,[GOTOFF(ebx,PD_DESCALE_P1_4)] ; mm7=[PD_DESCALE_P1_4] - - paddd mm4,mm7 - paddd mm0,mm7 - psrad mm4,DESCALE_P1_4 - psrad mm0,DESCALE_P1_4 - paddd mm2,mm7 - paddd mm3,mm7 - psrad mm2,DESCALE_P1_4 - psrad mm3,DESCALE_P1_4 - - packssdw mm4,mm0 ; mm4=data1=(10 11 12 13) - packssdw mm2,mm3 ; mm2=data2=(20 21 22 23) - - movq mm6,mm1 ; transpose coefficients(phase 1) - punpcklwd mm1,mm4 ; mm1=(00 10 01 11) - punpckhwd mm6,mm4 ; mm6=(02 12 03 13) - movq mm7,mm2 ; transpose coefficients(phase 1) - punpcklwd mm2,mm5 ; mm2=(20 30 21 31) - punpckhwd mm7,mm5 ; mm7=(22 32 23 33) - - movq mm0,mm1 ; transpose coefficients(phase 2) - punpckldq mm1,mm2 ; mm1=(00 10 20 30) - punpckhdq mm0,mm2 ; mm0=(01 11 21 31) - movq mm3,mm6 ; transpose coefficients(phase 2) - punpckldq mm6,mm7 ; mm6=(02 12 22 32) - punpckhdq mm3,mm7 ; mm3=(03 13 23 33) - - movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm1 - movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0 - movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm6 - movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3 - -.nextcolumn: - add esi, byte 4*SIZEOF_JCOEF ; coef_block - add edx, byte 4*SIZEOF_ISLOW_MULT_TYPE ; quantptr - add edi, byte 4*DCTSIZE*SIZEOF_JCOEF ; wsptr - dec ecx ; ctr - jnz near .columnloop - - ; ---- Pass 2: process rows from work array, store into output array. - - mov eax, [original_ebp] - lea esi, [workspace] ; JCOEF *wsptr - mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) - mov eax, JDIMENSION [output_col(eax)] - - ; -- Odd part - - movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] - movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] - movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] - - movq mm4,mm0 - movq mm5,mm0 - punpcklwd mm4,mm1 - punpckhwd mm5,mm1 - movq mm0,mm4 - movq mm1,mm5 - pmaddwd mm4,[GOTOFF(ebx,PW_F256_F089)] ; mm4=(tmp2L) - pmaddwd mm5,[GOTOFF(ebx,PW_F256_F089)] ; mm5=(tmp2H) - pmaddwd mm0,[GOTOFF(ebx,PW_F106_MF217)] ; mm0=(tmp0L) - pmaddwd mm1,[GOTOFF(ebx,PW_F106_MF217)] ; mm1=(tmp0H) - - movq mm6,mm2 - movq mm7,mm2 - punpcklwd mm6,mm3 - punpckhwd mm7,mm3 - movq mm2,mm6 - movq mm3,mm7 - pmaddwd mm6,[GOTOFF(ebx,PW_MF060_MF050)] ; mm6=(tmp2L) - pmaddwd mm7,[GOTOFF(ebx,PW_MF060_MF050)] ; mm7=(tmp2H) - pmaddwd mm2,[GOTOFF(ebx,PW_F145_MF021)] ; mm2=(tmp0L) - pmaddwd mm3,[GOTOFF(ebx,PW_F145_MF021)] ; mm3=(tmp0H) - - paddd mm6,mm4 ; mm6=tmp2L - paddd mm7,mm5 ; mm7=tmp2H - paddd mm2,mm0 ; mm2=tmp0L - paddd mm3,mm1 ; mm3=tmp0H - - movq MMWORD [wk(0)], mm2 ; wk(0)=tmp0L - movq MMWORD [wk(1)], mm3 ; wk(1)=tmp0H - - ; -- Even part - - movq mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] - movq mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)] - movq mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)] - - pxor mm1,mm1 - pxor mm2,mm2 - punpcklwd mm1,mm4 ; mm1=tmp0L - punpckhwd mm2,mm4 ; mm2=tmp0H - psrad mm1,(16-CONST_BITS-1) ; psrad mm1,16 & pslld mm1,CONST_BITS+1 - psrad mm2,(16-CONST_BITS-1) ; psrad mm2,16 & pslld mm2,CONST_BITS+1 - - movq mm3,mm5 ; mm5=in2=z2 - punpcklwd mm5,mm0 ; mm0=in6=z3 - punpckhwd mm3,mm0 - pmaddwd mm5,[GOTOFF(ebx,PW_F184_MF076)] ; mm5=tmp2L - pmaddwd mm3,[GOTOFF(ebx,PW_F184_MF076)] ; mm3=tmp2H - - movq mm4,mm1 - movq mm0,mm2 - paddd mm1,mm5 ; mm1=tmp10L - paddd mm2,mm3 ; mm2=tmp10H - psubd mm4,mm5 ; mm4=tmp12L - psubd mm0,mm3 ; mm0=tmp12H - - ; -- Final output stage - - movq mm5,mm1 - movq mm3,mm2 - paddd mm1,mm6 ; mm1=data0L - paddd mm2,mm7 ; mm2=data0H - psubd mm5,mm6 ; mm5=data3L - psubd mm3,mm7 ; mm3=data3H - - movq mm6,[GOTOFF(ebx,PD_DESCALE_P2_4)] ; mm6=[PD_DESCALE_P2_4] - - paddd mm1,mm6 - paddd mm2,mm6 - psrad mm1,DESCALE_P2_4 - psrad mm2,DESCALE_P2_4 - paddd mm5,mm6 - paddd mm3,mm6 - psrad mm5,DESCALE_P2_4 - psrad mm3,DESCALE_P2_4 - - packssdw mm1,mm2 ; mm1=data0=(00 10 20 30) - packssdw mm5,mm3 ; mm5=data3=(03 13 23 33) - - movq mm7, MMWORD [wk(0)] ; mm7=tmp0L - movq mm6, MMWORD [wk(1)] ; mm6=tmp0H - - movq mm2,mm4 - movq mm3,mm0 - paddd mm4,mm7 ; mm4=data1L - paddd mm0,mm6 ; mm0=data1H - psubd mm2,mm7 ; mm2=data2L - psubd mm3,mm6 ; mm3=data2H - - movq mm7,[GOTOFF(ebx,PD_DESCALE_P2_4)] ; mm7=[PD_DESCALE_P2_4] - - paddd mm4,mm7 - paddd mm0,mm7 - psrad mm4,DESCALE_P2_4 - psrad mm0,DESCALE_P2_4 - paddd mm2,mm7 - paddd mm3,mm7 - psrad mm2,DESCALE_P2_4 - psrad mm3,DESCALE_P2_4 - - packssdw mm4,mm0 ; mm4=data1=(01 11 21 31) - packssdw mm2,mm3 ; mm2=data2=(02 12 22 32) - - movq mm6,[GOTOFF(ebx,PB_CENTERJSAMP)] ; mm6=[PB_CENTERJSAMP] - - packsswb mm1,mm2 ; mm1=(00 10 20 30 02 12 22 32) - packsswb mm4,mm5 ; mm4=(01 11 21 31 03 13 23 33) - paddb mm1,mm6 - paddb mm4,mm6 - - movq mm7,mm1 ; transpose coefficients(phase 1) - punpcklbw mm1,mm4 ; mm1=(00 01 10 11 20 21 30 31) - punpckhbw mm7,mm4 ; mm7=(02 03 12 13 22 23 32 33) - - movq mm0,mm1 ; transpose coefficients(phase 2) - punpcklwd mm1,mm7 ; mm1=(00 01 02 03 10 11 12 13) - punpckhwd mm0,mm7 ; mm0=(20 21 22 23 30 31 32 33) - - mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] - mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW] - movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1 - movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0 - - psrlq mm1,4*BYTE_BIT - psrlq mm0,4*BYTE_BIT - - mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW] - mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] - movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1 - movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0 - - emms ; empty MMX state - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - poppic ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - - -; -------------------------------------------------------------------------- -; -; Perform dequantization and inverse DCT on one block of coefficients, -; producing a reduced-size 2x2 output block. -; -; GLOBAL(void) -; jsimd_idct_2x2_mmx (void *dct_table, JCOEFPTR coef_block, -; JSAMPARRAY output_buf, JDIMENSION output_col) -; - -%define dct_table(b) (b)+8 ; void *dct_table -%define coef_block(b) (b)+12 ; JCOEFPTR coef_block -%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf -%define output_col(b) (b)+20 ; JDIMENSION output_col - - align 16 - global EXTN(jsimd_idct_2x2_mmx) - -EXTN(jsimd_idct_2x2_mmx): - push ebp - mov ebp,esp - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process columns from input. - - mov edx, POINTER [dct_table(ebp)] ; quantptr - mov esi, JCOEFPTR [coef_block(ebp)] ; inptr - - ; | input: | result: | - ; | 00 01 ** 03 ** 05 ** 07 | | - ; | 10 11 ** 13 ** 15 ** 17 | | - ; | ** ** ** ** ** ** ** ** | | - ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 | - ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 | - ; | 50 51 ** 53 ** 55 ** 57 | | - ; | ** ** ** ** ** ** ** ** | | - ; | 70 71 ** 73 ** 75 ** 77 | | - - ; -- Odd part - - movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)] - pmullw mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - movq mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)] - movq mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)] - pmullw mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - - ; mm0=(10 11 ** 13), mm1=(30 31 ** 33) - ; mm2=(50 51 ** 53), mm3=(70 71 ** 73) - - pcmpeqd mm7,mm7 - pslld mm7,WORD_BIT ; mm7={0x0000 0xFFFF 0x0000 0xFFFF} - - movq mm4,mm0 ; mm4=(10 11 ** 13) - movq mm5,mm2 ; mm5=(50 51 ** 53) - punpcklwd mm4,mm1 ; mm4=(10 30 11 31) - punpcklwd mm5,mm3 ; mm5=(50 70 51 71) - pmaddwd mm4,[GOTOFF(ebx,PW_F362_MF127)] - pmaddwd mm5,[GOTOFF(ebx,PW_F085_MF072)] - - psrld mm0,WORD_BIT ; mm0=(11 -- 13 --) - pand mm1,mm7 ; mm1=(-- 31 -- 33) - psrld mm2,WORD_BIT ; mm2=(51 -- 53 --) - pand mm3,mm7 ; mm3=(-- 71 -- 73) - por mm0,mm1 ; mm0=(11 31 13 33) - por mm2,mm3 ; mm2=(51 71 53 73) - pmaddwd mm0,[GOTOFF(ebx,PW_F362_MF127)] - pmaddwd mm2,[GOTOFF(ebx,PW_F085_MF072)] - - paddd mm4,mm5 ; mm4=tmp0[col0 col1] - - movq mm6, MMWORD [MMBLOCK(1,1,esi,SIZEOF_JCOEF)] - movq mm1, MMWORD [MMBLOCK(3,1,esi,SIZEOF_JCOEF)] - pmullw mm6, MMWORD [MMBLOCK(1,1,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw mm1, MMWORD [MMBLOCK(3,1,edx,SIZEOF_ISLOW_MULT_TYPE)] - movq mm3, MMWORD [MMBLOCK(5,1,esi,SIZEOF_JCOEF)] - movq mm5, MMWORD [MMBLOCK(7,1,esi,SIZEOF_JCOEF)] - pmullw mm3, MMWORD [MMBLOCK(5,1,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw mm5, MMWORD [MMBLOCK(7,1,edx,SIZEOF_ISLOW_MULT_TYPE)] - - ; mm6=(** 15 ** 17), mm1=(** 35 ** 37) - ; mm3=(** 55 ** 57), mm5=(** 75 ** 77) - - psrld mm6,WORD_BIT ; mm6=(15 -- 17 --) - pand mm1,mm7 ; mm1=(-- 35 -- 37) - psrld mm3,WORD_BIT ; mm3=(55 -- 57 --) - pand mm5,mm7 ; mm5=(-- 75 -- 77) - por mm6,mm1 ; mm6=(15 35 17 37) - por mm3,mm5 ; mm3=(55 75 57 77) - pmaddwd mm6,[GOTOFF(ebx,PW_F362_MF127)] - pmaddwd mm3,[GOTOFF(ebx,PW_F085_MF072)] - - paddd mm0,mm2 ; mm0=tmp0[col1 col3] - paddd mm6,mm3 ; mm6=tmp0[col5 col7] - - ; -- Even part - - movq mm1, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)] - movq mm5, MMWORD [MMBLOCK(0,1,esi,SIZEOF_JCOEF)] - pmullw mm1, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw mm5, MMWORD [MMBLOCK(0,1,edx,SIZEOF_ISLOW_MULT_TYPE)] - - ; mm1=(00 01 ** 03), mm5=(** 05 ** 07) - - movq mm2,mm1 ; mm2=(00 01 ** 03) - pslld mm1,WORD_BIT ; mm1=(-- 00 -- **) - psrad mm1,(WORD_BIT-CONST_BITS-2) ; mm1=tmp10[col0 ****] - - pand mm2,mm7 ; mm2=(-- 01 -- 03) - pand mm5,mm7 ; mm5=(-- 05 -- 07) - psrad mm2,(WORD_BIT-CONST_BITS-2) ; mm2=tmp10[col1 col3] - psrad mm5,(WORD_BIT-CONST_BITS-2) ; mm5=tmp10[col5 col7] - - ; -- Final output stage - - movq mm3,mm1 - paddd mm1,mm4 ; mm1=data0[col0 ****]=(A0 **) - psubd mm3,mm4 ; mm3=data1[col0 ****]=(B0 **) - punpckldq mm1,mm3 ; mm1=(A0 B0) - - movq mm7,[GOTOFF(ebx,PD_DESCALE_P1_2)] ; mm7=[PD_DESCALE_P1_2] - - movq mm4,mm2 - movq mm3,mm5 - paddd mm2,mm0 ; mm2=data0[col1 col3]=(A1 A3) - paddd mm5,mm6 ; mm5=data0[col5 col7]=(A5 A7) - psubd mm4,mm0 ; mm4=data1[col1 col3]=(B1 B3) - psubd mm3,mm6 ; mm3=data1[col5 col7]=(B5 B7) - - paddd mm1,mm7 - psrad mm1,DESCALE_P1_2 - - paddd mm2,mm7 - paddd mm5,mm7 - psrad mm2,DESCALE_P1_2 - psrad mm5,DESCALE_P1_2 - paddd mm4,mm7 - paddd mm3,mm7 - psrad mm4,DESCALE_P1_2 - psrad mm3,DESCALE_P1_2 - - ; ---- Pass 2: process rows, store into output array. - - mov edi, JSAMPARRAY [output_buf(ebp)] ; (JSAMPROW *) - mov eax, JDIMENSION [output_col(ebp)] - - ; | input:| result:| - ; | A0 B0 | | - ; | A1 B1 | C0 C1 | - ; | A3 B3 | D0 D1 | - ; | A5 B5 | | - ; | A7 B7 | | - - ; -- Odd part - - packssdw mm2,mm4 ; mm2=(A1 A3 B1 B3) - packssdw mm5,mm3 ; mm5=(A5 A7 B5 B7) - pmaddwd mm2,[GOTOFF(ebx,PW_F362_MF127)] - pmaddwd mm5,[GOTOFF(ebx,PW_F085_MF072)] - - paddd mm2,mm5 ; mm2=tmp0[row0 row1] - - ; -- Even part - - pslld mm1,(CONST_BITS+2) ; mm1=tmp10[row0 row1] - - ; -- Final output stage - - movq mm0,[GOTOFF(ebx,PD_DESCALE_P2_2)] ; mm0=[PD_DESCALE_P2_2] - - movq mm6,mm1 - paddd mm1,mm2 ; mm1=data0[row0 row1]=(C0 C1) - psubd mm6,mm2 ; mm6=data1[row0 row1]=(D0 D1) - - paddd mm1,mm0 - paddd mm6,mm0 - psrad mm1,DESCALE_P2_2 - psrad mm6,DESCALE_P2_2 - - movq mm7,mm1 ; transpose coefficients - punpckldq mm1,mm6 ; mm1=(C0 D0) - punpckhdq mm7,mm6 ; mm7=(C1 D1) - - packssdw mm1,mm7 ; mm1=(C0 D0 C1 D1) - packsswb mm1,mm1 ; mm1=(C0 D0 C1 D1 C0 D0 C1 D1) - paddb mm1,[GOTOFF(ebx,PB_CENTERJSAMP)] - - movd ecx,mm1 - movd ebx,mm1 ; ebx=(C0 D0 C1 D1) - shr ecx,2*BYTE_BIT ; ecx=(C1 D1 -- --) - - mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] - mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] - mov WORD [edx+eax*SIZEOF_JSAMPLE], bx - mov WORD [esi+eax*SIZEOF_JSAMPLE], cx - - emms ; empty MMX state - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jidctred-sse2-64.asm b/simd/jidctred-sse2-64.asm deleted file mode 100644 index a54bbe2..0000000 --- a/simd/jidctred-sse2-64.asm +++ /dev/null @@ -1,575 +0,0 @@ -; -; jidctred.asm - reduced-size IDCT (64-bit SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains inverse-DCT routines that produce reduced-size -; output: either 4x4 or 2x2 pixels from an 8x8 DCT block. -; The following code is based directly on the IJG's original jidctred.c; -; see the jidctred.c for more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%define CONST_BITS 13 -%define PASS1_BITS 2 - -%define DESCALE_P1_4 (CONST_BITS-PASS1_BITS+1) -%define DESCALE_P2_4 (CONST_BITS+PASS1_BITS+3+1) -%define DESCALE_P1_2 (CONST_BITS-PASS1_BITS+2) -%define DESCALE_P2_2 (CONST_BITS+PASS1_BITS+3+2) - -%if CONST_BITS == 13 -F_0_211 equ 1730 ; FIX(0.211164243) -F_0_509 equ 4176 ; FIX(0.509795579) -F_0_601 equ 4926 ; FIX(0.601344887) -F_0_720 equ 5906 ; FIX(0.720959822) -F_0_765 equ 6270 ; FIX(0.765366865) -F_0_850 equ 6967 ; FIX(0.850430095) -F_0_899 equ 7373 ; FIX(0.899976223) -F_1_061 equ 8697 ; FIX(1.061594337) -F_1_272 equ 10426 ; FIX(1.272758580) -F_1_451 equ 11893 ; FIX(1.451774981) -F_1_847 equ 15137 ; FIX(1.847759065) -F_2_172 equ 17799 ; FIX(2.172734803) -F_2_562 equ 20995 ; FIX(2.562915447) -F_3_624 equ 29692 ; FIX(3.624509785) -%else -; NASM cannot do compile-time arithmetic on floating-point constants. -%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) -F_0_211 equ DESCALE( 226735879,30-CONST_BITS) ; FIX(0.211164243) -F_0_509 equ DESCALE( 547388834,30-CONST_BITS) ; FIX(0.509795579) -F_0_601 equ DESCALE( 645689155,30-CONST_BITS) ; FIX(0.601344887) -F_0_720 equ DESCALE( 774124714,30-CONST_BITS) ; FIX(0.720959822) -F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) -F_0_850 equ DESCALE( 913142361,30-CONST_BITS) ; FIX(0.850430095) -F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) -F_1_061 equ DESCALE(1139878239,30-CONST_BITS) ; FIX(1.061594337) -F_1_272 equ DESCALE(1366614119,30-CONST_BITS) ; FIX(1.272758580) -F_1_451 equ DESCALE(1558831516,30-CONST_BITS) ; FIX(1.451774981) -F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) -F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) -F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) -F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) -%endif - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_idct_red_sse2) - -EXTN(jconst_idct_red_sse2): - -PW_F184_MF076 times 4 dw F_1_847,-F_0_765 -PW_F256_F089 times 4 dw F_2_562, F_0_899 -PW_F106_MF217 times 4 dw F_1_061,-F_2_172 -PW_MF060_MF050 times 4 dw -F_0_601,-F_0_509 -PW_F145_MF021 times 4 dw F_1_451,-F_0_211 -PW_F362_MF127 times 4 dw F_3_624,-F_1_272 -PW_F085_MF072 times 4 dw F_0_850,-F_0_720 -PD_DESCALE_P1_4 times 4 dd 1 << (DESCALE_P1_4-1) -PD_DESCALE_P2_4 times 4 dd 1 << (DESCALE_P2_4-1) -PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2-1) -PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2-1) -PB_CENTERJSAMP times 16 db CENTERJSAMPLE - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 64 -; -; Perform dequantization and inverse DCT on one block of coefficients, -; producing a reduced-size 4x4 output block. -; -; GLOBAL(void) -; jsimd_idct_4x4_sse2 (void *dct_table, JCOEFPTR coef_block, -; JSAMPARRAY output_buf, JDIMENSION output_col) -; - -; r10 = void *dct_table -; r11 = JCOEFPTR coef_block -; r12 = JSAMPARRAY output_buf -; r13 = JDIMENSION output_col - -%define original_rbp rbp+0 -%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 2 - - align 16 - global EXTN(jsimd_idct_4x4_sse2) - -EXTN(jsimd_idct_4x4_sse2): - push rbp - mov rax,rsp ; rax = original rbp - sub rsp, byte 4 - and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [rsp],rax - mov rbp,rsp ; rbp = aligned rbp - lea rsp, [wk(0)] - collect_args - - ; ---- Pass 1: process columns from input. - - mov rdx, r10 ; quantptr - mov rsi, r11 ; inptr - -%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2 - mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] - jnz short .columnDCT - - movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] - movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] - por xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] - por xmm1, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] - por xmm0, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] - por xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] - por xmm0,xmm1 - packsswb xmm0,xmm0 - packsswb xmm0,xmm0 - movd eax,xmm0 - test rax,rax - jnz short .columnDCT - - ; -- AC terms all zero - - movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] - pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - - psllw xmm0,PASS1_BITS - - movdqa xmm3,xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) - punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) - punpckhwd xmm3,xmm3 ; xmm3=(04 04 05 05 06 06 07 07) - - pshufd xmm1,xmm0,0x50 ; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01) - pshufd xmm0,xmm0,0xFA ; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03) - pshufd xmm6,xmm3,0x50 ; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05) - pshufd xmm3,xmm3,0xFA ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07) - - jmp near .column_end -%endif -.columnDCT: - - ; -- Odd part - - movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] - movdqa xmm1, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] - pmullw xmm0, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - movdqa xmm2, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] - movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] - pmullw xmm2, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - - movdqa xmm4,xmm0 - movdqa xmm5,xmm0 - punpcklwd xmm4,xmm1 - punpckhwd xmm5,xmm1 - movdqa xmm0,xmm4 - movdqa xmm1,xmm5 - pmaddwd xmm4,[rel PW_F256_F089] ; xmm4=(tmp2L) - pmaddwd xmm5,[rel PW_F256_F089] ; xmm5=(tmp2H) - pmaddwd xmm0,[rel PW_F106_MF217] ; xmm0=(tmp0L) - pmaddwd xmm1,[rel PW_F106_MF217] ; xmm1=(tmp0H) - - movdqa xmm6,xmm2 - movdqa xmm7,xmm2 - punpcklwd xmm6,xmm3 - punpckhwd xmm7,xmm3 - movdqa xmm2,xmm6 - movdqa xmm3,xmm7 - pmaddwd xmm6,[rel PW_MF060_MF050] ; xmm6=(tmp2L) - pmaddwd xmm7,[rel PW_MF060_MF050] ; xmm7=(tmp2H) - pmaddwd xmm2,[rel PW_F145_MF021] ; xmm2=(tmp0L) - pmaddwd xmm3,[rel PW_F145_MF021] ; xmm3=(tmp0H) - - paddd xmm6,xmm4 ; xmm6=tmp2L - paddd xmm7,xmm5 ; xmm7=tmp2H - paddd xmm2,xmm0 ; xmm2=tmp0L - paddd xmm3,xmm1 ; xmm3=tmp0H - - movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp0L - movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=tmp0H - - ; -- Even part - - movdqa xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] - movdqa xmm5, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] - movdqa xmm0, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] - pmullw xmm4, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm5, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm0, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - - pxor xmm1,xmm1 - pxor xmm2,xmm2 - punpcklwd xmm1,xmm4 ; xmm1=tmp0L - punpckhwd xmm2,xmm4 ; xmm2=tmp0H - psrad xmm1,(16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1 - psrad xmm2,(16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1 - - movdqa xmm3,xmm5 ; xmm5=in2=z2 - punpcklwd xmm5,xmm0 ; xmm0=in6=z3 - punpckhwd xmm3,xmm0 - pmaddwd xmm5,[rel PW_F184_MF076] ; xmm5=tmp2L - pmaddwd xmm3,[rel PW_F184_MF076] ; xmm3=tmp2H - - movdqa xmm4,xmm1 - movdqa xmm0,xmm2 - paddd xmm1,xmm5 ; xmm1=tmp10L - paddd xmm2,xmm3 ; xmm2=tmp10H - psubd xmm4,xmm5 ; xmm4=tmp12L - psubd xmm0,xmm3 ; xmm0=tmp12H - - ; -- Final output stage - - movdqa xmm5,xmm1 - movdqa xmm3,xmm2 - paddd xmm1,xmm6 ; xmm1=data0L - paddd xmm2,xmm7 ; xmm2=data0H - psubd xmm5,xmm6 ; xmm5=data3L - psubd xmm3,xmm7 ; xmm3=data3H - - movdqa xmm6,[rel PD_DESCALE_P1_4] ; xmm6=[rel PD_DESCALE_P1_4] - - paddd xmm1,xmm6 - paddd xmm2,xmm6 - psrad xmm1,DESCALE_P1_4 - psrad xmm2,DESCALE_P1_4 - paddd xmm5,xmm6 - paddd xmm3,xmm6 - psrad xmm5,DESCALE_P1_4 - psrad xmm3,DESCALE_P1_4 - - packssdw xmm1,xmm2 ; xmm1=data0=(00 01 02 03 04 05 06 07) - packssdw xmm5,xmm3 ; xmm5=data3=(30 31 32 33 34 35 36 37) - - movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp0L - movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp0H - - movdqa xmm2,xmm4 - movdqa xmm3,xmm0 - paddd xmm4,xmm7 ; xmm4=data1L - paddd xmm0,xmm6 ; xmm0=data1H - psubd xmm2,xmm7 ; xmm2=data2L - psubd xmm3,xmm6 ; xmm3=data2H - - movdqa xmm7,[rel PD_DESCALE_P1_4] ; xmm7=[rel PD_DESCALE_P1_4] - - paddd xmm4,xmm7 - paddd xmm0,xmm7 - psrad xmm4,DESCALE_P1_4 - psrad xmm0,DESCALE_P1_4 - paddd xmm2,xmm7 - paddd xmm3,xmm7 - psrad xmm2,DESCALE_P1_4 - psrad xmm3,DESCALE_P1_4 - - packssdw xmm4,xmm0 ; xmm4=data1=(10 11 12 13 14 15 16 17) - packssdw xmm2,xmm3 ; xmm2=data2=(20 21 22 23 24 25 26 27) - - movdqa xmm6,xmm1 ; transpose coefficients(phase 1) - punpcklwd xmm1,xmm4 ; xmm1=(00 10 01 11 02 12 03 13) - punpckhwd xmm6,xmm4 ; xmm6=(04 14 05 15 06 16 07 17) - movdqa xmm7,xmm2 ; transpose coefficients(phase 1) - punpcklwd xmm2,xmm5 ; xmm2=(20 30 21 31 22 32 23 33) - punpckhwd xmm7,xmm5 ; xmm7=(24 34 25 35 26 36 27 37) - - movdqa xmm0,xmm1 ; transpose coefficients(phase 2) - punpckldq xmm1,xmm2 ; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31) - punpckhdq xmm0,xmm2 ; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33) - movdqa xmm3,xmm6 ; transpose coefficients(phase 2) - punpckldq xmm6,xmm7 ; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35) - punpckhdq xmm3,xmm7 ; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37) -.column_end: - - ; -- Prefetch the next coefficient block - - prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] - prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] - prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] - prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] - - ; ---- Pass 2: process rows, store into output array. - - mov rax, [original_rbp] - mov rdi, r12 ; (JSAMPROW *) - mov eax, r13d - - ; -- Even part - - pxor xmm4,xmm4 - punpcklwd xmm4,xmm1 ; xmm4=tmp0 - psrad xmm4,(16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1 - - ; -- Odd part - - punpckhwd xmm1,xmm0 - punpckhwd xmm6,xmm3 - movdqa xmm5,xmm1 - movdqa xmm2,xmm6 - pmaddwd xmm1,[rel PW_F256_F089] ; xmm1=(tmp2) - pmaddwd xmm6,[rel PW_MF060_MF050] ; xmm6=(tmp2) - pmaddwd xmm5,[rel PW_F106_MF217] ; xmm5=(tmp0) - pmaddwd xmm2,[rel PW_F145_MF021] ; xmm2=(tmp0) - - paddd xmm6,xmm1 ; xmm6=tmp2 - paddd xmm2,xmm5 ; xmm2=tmp0 - - ; -- Even part - - punpcklwd xmm0,xmm3 - pmaddwd xmm0,[rel PW_F184_MF076] ; xmm0=tmp2 - - movdqa xmm7,xmm4 - paddd xmm4,xmm0 ; xmm4=tmp10 - psubd xmm7,xmm0 ; xmm7=tmp12 - - ; -- Final output stage - - movdqa xmm1,[rel PD_DESCALE_P2_4] ; xmm1=[rel PD_DESCALE_P2_4] - - movdqa xmm5,xmm4 - movdqa xmm3,xmm7 - paddd xmm4,xmm6 ; xmm4=data0=(00 10 20 30) - paddd xmm7,xmm2 ; xmm7=data1=(01 11 21 31) - psubd xmm5,xmm6 ; xmm5=data3=(03 13 23 33) - psubd xmm3,xmm2 ; xmm3=data2=(02 12 22 32) - - paddd xmm4,xmm1 - paddd xmm7,xmm1 - psrad xmm4,DESCALE_P2_4 - psrad xmm7,DESCALE_P2_4 - paddd xmm5,xmm1 - paddd xmm3,xmm1 - psrad xmm5,DESCALE_P2_4 - psrad xmm3,DESCALE_P2_4 - - packssdw xmm4,xmm3 ; xmm4=(00 10 20 30 02 12 22 32) - packssdw xmm7,xmm5 ; xmm7=(01 11 21 31 03 13 23 33) - - movdqa xmm0,xmm4 ; transpose coefficients(phase 1) - punpcklwd xmm4,xmm7 ; xmm4=(00 01 10 11 20 21 30 31) - punpckhwd xmm0,xmm7 ; xmm0=(02 03 12 13 22 23 32 33) - - movdqa xmm6,xmm4 ; transpose coefficients(phase 2) - punpckldq xmm4,xmm0 ; xmm4=(00 01 02 03 10 11 12 13) - punpckhdq xmm6,xmm0 ; xmm6=(20 21 22 23 30 31 32 33) - - packsswb xmm4,xmm6 ; xmm4=(00 01 02 03 10 11 12 13 20 ..) - paddb xmm4,[rel PB_CENTERJSAMP] - - pshufd xmm2,xmm4,0x39 ; xmm2=(10 11 12 13 20 21 22 23 30 ..) - pshufd xmm1,xmm4,0x4E ; xmm1=(20 21 22 23 30 31 32 33 00 ..) - pshufd xmm3,xmm4,0x93 ; xmm3=(30 31 32 33 00 01 02 03 10 ..) - - mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] - mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] - movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4 - movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2 - mov rdx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW] - mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW] - movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1 - movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3 - - uncollect_args - mov rsp,rbp ; rsp <- aligned rbp - pop rsp ; rsp <- original rbp - pop rbp - ret - - -; -------------------------------------------------------------------------- -; -; Perform dequantization and inverse DCT on one block of coefficients, -; producing a reduced-size 2x2 output block. -; -; GLOBAL(void) -; jsimd_idct_2x2_sse2 (void *dct_table, JCOEFPTR coef_block, -; JSAMPARRAY output_buf, JDIMENSION output_col) -; - -; r10 = void *dct_table -; r11 = JCOEFPTR coef_block -; r12 = JSAMPARRAY output_buf -; r13 = JDIMENSION output_col - - align 16 - global EXTN(jsimd_idct_2x2_sse2) - -EXTN(jsimd_idct_2x2_sse2): - push rbp - mov rax,rsp - mov rbp,rsp - collect_args - push rbx - - ; ---- Pass 1: process columns from input. - - mov rdx, r10 ; quantptr - mov rsi, r11 ; inptr - - ; | input: | result: | - ; | 00 01 ** 03 ** 05 ** 07 | | - ; | 10 11 ** 13 ** 15 ** 17 | | - ; | ** ** ** ** ** ** ** ** | | - ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 | - ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 | - ; | 50 51 ** 53 ** 55 ** 57 | | - ; | ** ** ** ** ** ** ** ** | | - ; | 70 71 ** 73 ** 75 ** 77 | | - - ; -- Odd part - - movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] - movdqa xmm1, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] - pmullw xmm0, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - movdqa xmm2, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] - movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] - pmullw xmm2, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - - ; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37) - ; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77) - - pcmpeqd xmm7,xmm7 - pslld xmm7,WORD_BIT ; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..} - - movdqa xmm4,xmm0 ; xmm4=(10 11 ** 13 ** 15 ** 17) - movdqa xmm5,xmm2 ; xmm5=(50 51 ** 53 ** 55 ** 57) - punpcklwd xmm4,xmm1 ; xmm4=(10 30 11 31 ** ** 13 33) - punpcklwd xmm5,xmm3 ; xmm5=(50 70 51 71 ** ** 53 73) - pmaddwd xmm4,[rel PW_F362_MF127] - pmaddwd xmm5,[rel PW_F085_MF072] - - psrld xmm0,WORD_BIT ; xmm0=(11 -- 13 -- 15 -- 17 --) - pand xmm1,xmm7 ; xmm1=(-- 31 -- 33 -- 35 -- 37) - psrld xmm2,WORD_BIT ; xmm2=(51 -- 53 -- 55 -- 57 --) - pand xmm3,xmm7 ; xmm3=(-- 71 -- 73 -- 75 -- 77) - por xmm0,xmm1 ; xmm0=(11 31 13 33 15 35 17 37) - por xmm2,xmm3 ; xmm2=(51 71 53 73 55 75 57 77) - pmaddwd xmm0,[rel PW_F362_MF127] - pmaddwd xmm2,[rel PW_F085_MF072] - - paddd xmm4,xmm5 ; xmm4=tmp0[col0 col1 **** col3] - paddd xmm0,xmm2 ; xmm0=tmp0[col1 col3 col5 col7] - - ; -- Even part - - movdqa xmm6, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] - pmullw xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] - - ; xmm6=(00 01 ** 03 ** 05 ** 07) - - movdqa xmm1,xmm6 ; xmm1=(00 01 ** 03 ** 05 ** 07) - pslld xmm6,WORD_BIT ; xmm6=(-- 00 -- ** -- ** -- **) - pand xmm1,xmm7 ; xmm1=(-- 01 -- 03 -- 05 -- 07) - psrad xmm6,(WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****] - psrad xmm1,(WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7] - - ; -- Final output stage - - movdqa xmm3,xmm6 - movdqa xmm5,xmm1 - paddd xmm6,xmm4 ; xmm6=data0[col0 **** **** ****]=(A0 ** ** **) - paddd xmm1,xmm0 ; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7) - psubd xmm3,xmm4 ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **) - psubd xmm5,xmm0 ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7) - - movdqa xmm2,[rel PD_DESCALE_P1_2] ; xmm2=[rel PD_DESCALE_P1_2] - - punpckldq xmm6,xmm3 ; xmm6=(A0 B0 ** **) - - movdqa xmm7,xmm1 - punpcklqdq xmm1,xmm5 ; xmm1=(A1 A3 B1 B3) - punpckhqdq xmm7,xmm5 ; xmm7=(A5 A7 B5 B7) - - paddd xmm6,xmm2 - psrad xmm6,DESCALE_P1_2 - - paddd xmm1,xmm2 - paddd xmm7,xmm2 - psrad xmm1,DESCALE_P1_2 - psrad xmm7,DESCALE_P1_2 - - ; -- Prefetch the next coefficient block - - prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] - prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] - prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] - prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] - - ; ---- Pass 2: process rows, store into output array. - - mov rdi, r12 ; (JSAMPROW *) - mov eax, r13d - - ; | input:| result:| - ; | A0 B0 | | - ; | A1 B1 | C0 C1 | - ; | A3 B3 | D0 D1 | - ; | A5 B5 | | - ; | A7 B7 | | - - ; -- Odd part - - packssdw xmm1,xmm1 ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3) - packssdw xmm7,xmm7 ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7) - pmaddwd xmm1,[rel PW_F362_MF127] - pmaddwd xmm7,[rel PW_F085_MF072] - - paddd xmm1,xmm7 ; xmm1=tmp0[row0 row1 row0 row1] - - ; -- Even part - - pslld xmm6,(CONST_BITS+2) ; xmm6=tmp10[row0 row1 **** ****] - - ; -- Final output stage - - movdqa xmm4,xmm6 - paddd xmm6,xmm1 ; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **) - psubd xmm4,xmm1 ; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **) - - punpckldq xmm6,xmm4 ; xmm6=(C0 D0 C1 D1) - - paddd xmm6,[rel PD_DESCALE_P2_2] - psrad xmm6,DESCALE_P2_2 - - packssdw xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1) - packsswb xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..) - paddb xmm6,[rel PB_CENTERJSAMP] - - pextrw ebx,xmm6,0x00 ; ebx=(C0 D0 -- --) - pextrw ecx,xmm6,0x01 ; ecx=(C1 D1 -- --) - - mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] - mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] - mov WORD [rdx+rax*SIZEOF_JSAMPLE], bx - mov WORD [rsi+rax*SIZEOF_JSAMPLE], cx - - pop rbx - uncollect_args - pop rbp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jidctred-sse2.asm b/simd/jidctred-sse2.asm deleted file mode 100644 index 232d983..0000000 --- a/simd/jidctred-sse2.asm +++ /dev/null @@ -1,593 +0,0 @@ -; -; jidctred.asm - reduced-size IDCT (SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; This file contains inverse-DCT routines that produce reduced-size -; output: either 4x4 or 2x2 pixels from an 8x8 DCT block. -; The following code is based directly on the IJG's original jidctred.c; -; see the jidctred.c for more details. -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - -%define CONST_BITS 13 -%define PASS1_BITS 2 - -%define DESCALE_P1_4 (CONST_BITS-PASS1_BITS+1) -%define DESCALE_P2_4 (CONST_BITS+PASS1_BITS+3+1) -%define DESCALE_P1_2 (CONST_BITS-PASS1_BITS+2) -%define DESCALE_P2_2 (CONST_BITS+PASS1_BITS+3+2) - -%if CONST_BITS == 13 -F_0_211 equ 1730 ; FIX(0.211164243) -F_0_509 equ 4176 ; FIX(0.509795579) -F_0_601 equ 4926 ; FIX(0.601344887) -F_0_720 equ 5906 ; FIX(0.720959822) -F_0_765 equ 6270 ; FIX(0.765366865) -F_0_850 equ 6967 ; FIX(0.850430095) -F_0_899 equ 7373 ; FIX(0.899976223) -F_1_061 equ 8697 ; FIX(1.061594337) -F_1_272 equ 10426 ; FIX(1.272758580) -F_1_451 equ 11893 ; FIX(1.451774981) -F_1_847 equ 15137 ; FIX(1.847759065) -F_2_172 equ 17799 ; FIX(2.172734803) -F_2_562 equ 20995 ; FIX(2.562915447) -F_3_624 equ 29692 ; FIX(3.624509785) -%else -; NASM cannot do compile-time arithmetic on floating-point constants. -%define DESCALE(x,n) (((x)+(1<<((n)-1)))>>(n)) -F_0_211 equ DESCALE( 226735879,30-CONST_BITS) ; FIX(0.211164243) -F_0_509 equ DESCALE( 547388834,30-CONST_BITS) ; FIX(0.509795579) -F_0_601 equ DESCALE( 645689155,30-CONST_BITS) ; FIX(0.601344887) -F_0_720 equ DESCALE( 774124714,30-CONST_BITS) ; FIX(0.720959822) -F_0_765 equ DESCALE( 821806413,30-CONST_BITS) ; FIX(0.765366865) -F_0_850 equ DESCALE( 913142361,30-CONST_BITS) ; FIX(0.850430095) -F_0_899 equ DESCALE( 966342111,30-CONST_BITS) ; FIX(0.899976223) -F_1_061 equ DESCALE(1139878239,30-CONST_BITS) ; FIX(1.061594337) -F_1_272 equ DESCALE(1366614119,30-CONST_BITS) ; FIX(1.272758580) -F_1_451 equ DESCALE(1558831516,30-CONST_BITS) ; FIX(1.451774981) -F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) -F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) -F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) -F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) -%endif - -; -------------------------------------------------------------------------- - SECTION SEG_CONST - - alignz 16 - global EXTN(jconst_idct_red_sse2) - -EXTN(jconst_idct_red_sse2): - -PW_F184_MF076 times 4 dw F_1_847,-F_0_765 -PW_F256_F089 times 4 dw F_2_562, F_0_899 -PW_F106_MF217 times 4 dw F_1_061,-F_2_172 -PW_MF060_MF050 times 4 dw -F_0_601,-F_0_509 -PW_F145_MF021 times 4 dw F_1_451,-F_0_211 -PW_F362_MF127 times 4 dw F_3_624,-F_1_272 -PW_F085_MF072 times 4 dw F_0_850,-F_0_720 -PD_DESCALE_P1_4 times 4 dd 1 << (DESCALE_P1_4-1) -PD_DESCALE_P2_4 times 4 dd 1 << (DESCALE_P2_4-1) -PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2-1) -PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2-1) -PB_CENTERJSAMP times 16 db CENTERJSAMPLE - - alignz 16 - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Perform dequantization and inverse DCT on one block of coefficients, -; producing a reduced-size 4x4 output block. -; -; GLOBAL(void) -; jsimd_idct_4x4_sse2 (void *dct_table, JCOEFPTR coef_block, -; JSAMPARRAY output_buf, JDIMENSION output_col) -; - -%define dct_table(b) (b)+8 ; void *dct_table -%define coef_block(b) (b)+12 ; JCOEFPTR coef_block -%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf -%define output_col(b) (b)+20 ; JDIMENSION output_col - -%define original_ebp ebp+0 -%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] -%define WK_NUM 2 - - align 16 - global EXTN(jsimd_idct_4x4_sse2) - -EXTN(jsimd_idct_4x4_sse2): - push ebp - mov eax,esp ; eax = original ebp - sub esp, byte 4 - and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits - mov [esp],eax - mov ebp,esp ; ebp = aligned ebp - lea esp, [wk(0)] - pushpic ebx -; push ecx ; unused -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process columns from input. - -; mov eax, [original_ebp] - mov edx, POINTER [dct_table(eax)] ; quantptr - mov esi, JCOEFPTR [coef_block(eax)] ; inptr - -%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2 - mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)] - or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)] - jnz short .columnDCT - - movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] - por xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] - por xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] - por xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] - por xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] - por xmm0,xmm1 - packsswb xmm0,xmm0 - packsswb xmm0,xmm0 - movd eax,xmm0 - test eax,eax - jnz short .columnDCT - - ; -- AC terms all zero - - movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] - pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - - psllw xmm0,PASS1_BITS - - movdqa xmm3,xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) - punpcklwd xmm0,xmm0 ; xmm0=(00 00 01 01 02 02 03 03) - punpckhwd xmm3,xmm3 ; xmm3=(04 04 05 05 06 06 07 07) - - pshufd xmm1,xmm0,0x50 ; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01) - pshufd xmm0,xmm0,0xFA ; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03) - pshufd xmm6,xmm3,0x50 ; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05) - pshufd xmm3,xmm3,0xFA ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07) - - jmp near .column_end - alignx 16,7 -%endif -.columnDCT: - - ; -- Odd part - - movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movdqa xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] - pmullw xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - movdqa xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] - movdqa xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] - pmullw xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - - movdqa xmm4,xmm0 - movdqa xmm5,xmm0 - punpcklwd xmm4,xmm1 - punpckhwd xmm5,xmm1 - movdqa xmm0,xmm4 - movdqa xmm1,xmm5 - pmaddwd xmm4,[GOTOFF(ebx,PW_F256_F089)] ; xmm4=(tmp2L) - pmaddwd xmm5,[GOTOFF(ebx,PW_F256_F089)] ; xmm5=(tmp2H) - pmaddwd xmm0,[GOTOFF(ebx,PW_F106_MF217)] ; xmm0=(tmp0L) - pmaddwd xmm1,[GOTOFF(ebx,PW_F106_MF217)] ; xmm1=(tmp0H) - - movdqa xmm6,xmm2 - movdqa xmm7,xmm2 - punpcklwd xmm6,xmm3 - punpckhwd xmm7,xmm3 - movdqa xmm2,xmm6 - movdqa xmm3,xmm7 - pmaddwd xmm6,[GOTOFF(ebx,PW_MF060_MF050)] ; xmm6=(tmp2L) - pmaddwd xmm7,[GOTOFF(ebx,PW_MF060_MF050)] ; xmm7=(tmp2H) - pmaddwd xmm2,[GOTOFF(ebx,PW_F145_MF021)] ; xmm2=(tmp0L) - pmaddwd xmm3,[GOTOFF(ebx,PW_F145_MF021)] ; xmm3=(tmp0H) - - paddd xmm6,xmm4 ; xmm6=tmp2L - paddd xmm7,xmm5 ; xmm7=tmp2H - paddd xmm2,xmm0 ; xmm2=tmp0L - paddd xmm3,xmm1 ; xmm3=tmp0H - - movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp0L - movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=tmp0H - - ; -- Even part - - movdqa xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] - movdqa xmm5, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)] - movdqa xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)] - pmullw xmm4, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm5, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm0, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - - pxor xmm1,xmm1 - pxor xmm2,xmm2 - punpcklwd xmm1,xmm4 ; xmm1=tmp0L - punpckhwd xmm2,xmm4 ; xmm2=tmp0H - psrad xmm1,(16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1 - psrad xmm2,(16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1 - - movdqa xmm3,xmm5 ; xmm5=in2=z2 - punpcklwd xmm5,xmm0 ; xmm0=in6=z3 - punpckhwd xmm3,xmm0 - pmaddwd xmm5,[GOTOFF(ebx,PW_F184_MF076)] ; xmm5=tmp2L - pmaddwd xmm3,[GOTOFF(ebx,PW_F184_MF076)] ; xmm3=tmp2H - - movdqa xmm4,xmm1 - movdqa xmm0,xmm2 - paddd xmm1,xmm5 ; xmm1=tmp10L - paddd xmm2,xmm3 ; xmm2=tmp10H - psubd xmm4,xmm5 ; xmm4=tmp12L - psubd xmm0,xmm3 ; xmm0=tmp12H - - ; -- Final output stage - - movdqa xmm5,xmm1 - movdqa xmm3,xmm2 - paddd xmm1,xmm6 ; xmm1=data0L - paddd xmm2,xmm7 ; xmm2=data0H - psubd xmm5,xmm6 ; xmm5=data3L - psubd xmm3,xmm7 ; xmm3=data3H - - movdqa xmm6,[GOTOFF(ebx,PD_DESCALE_P1_4)] ; xmm6=[PD_DESCALE_P1_4] - - paddd xmm1,xmm6 - paddd xmm2,xmm6 - psrad xmm1,DESCALE_P1_4 - psrad xmm2,DESCALE_P1_4 - paddd xmm5,xmm6 - paddd xmm3,xmm6 - psrad xmm5,DESCALE_P1_4 - psrad xmm3,DESCALE_P1_4 - - packssdw xmm1,xmm2 ; xmm1=data0=(00 01 02 03 04 05 06 07) - packssdw xmm5,xmm3 ; xmm5=data3=(30 31 32 33 34 35 36 37) - - movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp0L - movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp0H - - movdqa xmm2,xmm4 - movdqa xmm3,xmm0 - paddd xmm4,xmm7 ; xmm4=data1L - paddd xmm0,xmm6 ; xmm0=data1H - psubd xmm2,xmm7 ; xmm2=data2L - psubd xmm3,xmm6 ; xmm3=data2H - - movdqa xmm7,[GOTOFF(ebx,PD_DESCALE_P1_4)] ; xmm7=[PD_DESCALE_P1_4] - - paddd xmm4,xmm7 - paddd xmm0,xmm7 - psrad xmm4,DESCALE_P1_4 - psrad xmm0,DESCALE_P1_4 - paddd xmm2,xmm7 - paddd xmm3,xmm7 - psrad xmm2,DESCALE_P1_4 - psrad xmm3,DESCALE_P1_4 - - packssdw xmm4,xmm0 ; xmm4=data1=(10 11 12 13 14 15 16 17) - packssdw xmm2,xmm3 ; xmm2=data2=(20 21 22 23 24 25 26 27) - - movdqa xmm6,xmm1 ; transpose coefficients(phase 1) - punpcklwd xmm1,xmm4 ; xmm1=(00 10 01 11 02 12 03 13) - punpckhwd xmm6,xmm4 ; xmm6=(04 14 05 15 06 16 07 17) - movdqa xmm7,xmm2 ; transpose coefficients(phase 1) - punpcklwd xmm2,xmm5 ; xmm2=(20 30 21 31 22 32 23 33) - punpckhwd xmm7,xmm5 ; xmm7=(24 34 25 35 26 36 27 37) - - movdqa xmm0,xmm1 ; transpose coefficients(phase 2) - punpckldq xmm1,xmm2 ; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31) - punpckhdq xmm0,xmm2 ; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33) - movdqa xmm3,xmm6 ; transpose coefficients(phase 2) - punpckldq xmm6,xmm7 ; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35) - punpckhdq xmm3,xmm7 ; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37) -.column_end: - - ; -- Prefetch the next coefficient block - - prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] - prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] - prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] - prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] - - ; ---- Pass 2: process rows, store into output array. - - mov eax, [original_ebp] - mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *) - mov eax, JDIMENSION [output_col(eax)] - - ; -- Even part - - pxor xmm4,xmm4 - punpcklwd xmm4,xmm1 ; xmm4=tmp0 - psrad xmm4,(16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1 - - ; -- Odd part - - punpckhwd xmm1,xmm0 - punpckhwd xmm6,xmm3 - movdqa xmm5,xmm1 - movdqa xmm2,xmm6 - pmaddwd xmm1,[GOTOFF(ebx,PW_F256_F089)] ; xmm1=(tmp2) - pmaddwd xmm6,[GOTOFF(ebx,PW_MF060_MF050)] ; xmm6=(tmp2) - pmaddwd xmm5,[GOTOFF(ebx,PW_F106_MF217)] ; xmm5=(tmp0) - pmaddwd xmm2,[GOTOFF(ebx,PW_F145_MF021)] ; xmm2=(tmp0) - - paddd xmm6,xmm1 ; xmm6=tmp2 - paddd xmm2,xmm5 ; xmm2=tmp0 - - ; -- Even part - - punpcklwd xmm0,xmm3 - pmaddwd xmm0,[GOTOFF(ebx,PW_F184_MF076)] ; xmm0=tmp2 - - movdqa xmm7,xmm4 - paddd xmm4,xmm0 ; xmm4=tmp10 - psubd xmm7,xmm0 ; xmm7=tmp12 - - ; -- Final output stage - - movdqa xmm1,[GOTOFF(ebx,PD_DESCALE_P2_4)] ; xmm1=[PD_DESCALE_P2_4] - - movdqa xmm5,xmm4 - movdqa xmm3,xmm7 - paddd xmm4,xmm6 ; xmm4=data0=(00 10 20 30) - paddd xmm7,xmm2 ; xmm7=data1=(01 11 21 31) - psubd xmm5,xmm6 ; xmm5=data3=(03 13 23 33) - psubd xmm3,xmm2 ; xmm3=data2=(02 12 22 32) - - paddd xmm4,xmm1 - paddd xmm7,xmm1 - psrad xmm4,DESCALE_P2_4 - psrad xmm7,DESCALE_P2_4 - paddd xmm5,xmm1 - paddd xmm3,xmm1 - psrad xmm5,DESCALE_P2_4 - psrad xmm3,DESCALE_P2_4 - - packssdw xmm4,xmm3 ; xmm4=(00 10 20 30 02 12 22 32) - packssdw xmm7,xmm5 ; xmm7=(01 11 21 31 03 13 23 33) - - movdqa xmm0,xmm4 ; transpose coefficients(phase 1) - punpcklwd xmm4,xmm7 ; xmm4=(00 01 10 11 20 21 30 31) - punpckhwd xmm0,xmm7 ; xmm0=(02 03 12 13 22 23 32 33) - - movdqa xmm6,xmm4 ; transpose coefficients(phase 2) - punpckldq xmm4,xmm0 ; xmm4=(00 01 02 03 10 11 12 13) - punpckhdq xmm6,xmm0 ; xmm6=(20 21 22 23 30 31 32 33) - - packsswb xmm4,xmm6 ; xmm4=(00 01 02 03 10 11 12 13 20 ..) - paddb xmm4,[GOTOFF(ebx,PB_CENTERJSAMP)] - - pshufd xmm2,xmm4,0x39 ; xmm2=(10 11 12 13 20 21 22 23 30 ..) - pshufd xmm1,xmm4,0x4E ; xmm1=(20 21 22 23 30 31 32 33 00 ..) - pshufd xmm3,xmm4,0x93 ; xmm3=(30 31 32 33 00 01 02 03 10 ..) - - mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] - mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] - movd XMM_DWORD [edx+eax*SIZEOF_JSAMPLE], xmm4 - movd XMM_DWORD [esi+eax*SIZEOF_JSAMPLE], xmm2 - mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW] - mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW] - movd XMM_DWORD [edx+eax*SIZEOF_JSAMPLE], xmm1 - movd XMM_DWORD [esi+eax*SIZEOF_JSAMPLE], xmm3 - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; unused - poppic ebx - mov esp,ebp ; esp <- aligned ebp - pop esp ; esp <- original ebp - pop ebp - ret - - -; -------------------------------------------------------------------------- -; -; Perform dequantization and inverse DCT on one block of coefficients, -; producing a reduced-size 2x2 output block. -; -; GLOBAL(void) -; jsimd_idct_2x2_sse2 (void *dct_table, JCOEFPTR coef_block, -; JSAMPARRAY output_buf, JDIMENSION output_col) -; - -%define dct_table(b) (b)+8 ; void *dct_table -%define coef_block(b) (b)+12 ; JCOEFPTR coef_block -%define output_buf(b) (b)+16 ; JSAMPARRAY output_buf -%define output_col(b) (b)+20 ; JDIMENSION output_col - - align 16 - global EXTN(jsimd_idct_2x2_sse2) - -EXTN(jsimd_idct_2x2_sse2): - push ebp - mov ebp,esp - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - get_GOT ebx ; get GOT address - - ; ---- Pass 1: process columns from input. - - mov edx, POINTER [dct_table(ebp)] ; quantptr - mov esi, JCOEFPTR [coef_block(ebp)] ; inptr - - ; | input: | result: | - ; | 00 01 ** 03 ** 05 ** 07 | | - ; | 10 11 ** 13 ** 15 ** 17 | | - ; | ** ** ** ** ** ** ** ** | | - ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 | - ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 | - ; | 50 51 ** 53 ** 55 ** 57 | | - ; | ** ** ** ** ** ** ** ** | | - ; | 70 71 ** 73 ** 75 ** 77 | | - - ; -- Odd part - - movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)] - movdqa xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)] - pmullw xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - movdqa xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)] - movdqa xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)] - pmullw xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - pmullw xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - - ; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37) - ; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77) - - pcmpeqd xmm7,xmm7 - pslld xmm7,WORD_BIT ; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..} - - movdqa xmm4,xmm0 ; xmm4=(10 11 ** 13 ** 15 ** 17) - movdqa xmm5,xmm2 ; xmm5=(50 51 ** 53 ** 55 ** 57) - punpcklwd xmm4,xmm1 ; xmm4=(10 30 11 31 ** ** 13 33) - punpcklwd xmm5,xmm3 ; xmm5=(50 70 51 71 ** ** 53 73) - pmaddwd xmm4,[GOTOFF(ebx,PW_F362_MF127)] - pmaddwd xmm5,[GOTOFF(ebx,PW_F085_MF072)] - - psrld xmm0,WORD_BIT ; xmm0=(11 -- 13 -- 15 -- 17 --) - pand xmm1,xmm7 ; xmm1=(-- 31 -- 33 -- 35 -- 37) - psrld xmm2,WORD_BIT ; xmm2=(51 -- 53 -- 55 -- 57 --) - pand xmm3,xmm7 ; xmm3=(-- 71 -- 73 -- 75 -- 77) - por xmm0,xmm1 ; xmm0=(11 31 13 33 15 35 17 37) - por xmm2,xmm3 ; xmm2=(51 71 53 73 55 75 57 77) - pmaddwd xmm0,[GOTOFF(ebx,PW_F362_MF127)] - pmaddwd xmm2,[GOTOFF(ebx,PW_F085_MF072)] - - paddd xmm4,xmm5 ; xmm4=tmp0[col0 col1 **** col3] - paddd xmm0,xmm2 ; xmm0=tmp0[col1 col3 col5 col7] - - ; -- Even part - - movdqa xmm6, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)] - pmullw xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)] - - ; xmm6=(00 01 ** 03 ** 05 ** 07) - - movdqa xmm1,xmm6 ; xmm1=(00 01 ** 03 ** 05 ** 07) - pslld xmm6,WORD_BIT ; xmm6=(-- 00 -- ** -- ** -- **) - pand xmm1,xmm7 ; xmm1=(-- 01 -- 03 -- 05 -- 07) - psrad xmm6,(WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****] - psrad xmm1,(WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7] - - ; -- Final output stage - - movdqa xmm3,xmm6 - movdqa xmm5,xmm1 - paddd xmm6,xmm4 ; xmm6=data0[col0 **** **** ****]=(A0 ** ** **) - paddd xmm1,xmm0 ; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7) - psubd xmm3,xmm4 ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **) - psubd xmm5,xmm0 ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7) - - movdqa xmm2,[GOTOFF(ebx,PD_DESCALE_P1_2)] ; xmm2=[PD_DESCALE_P1_2] - - punpckldq xmm6,xmm3 ; xmm6=(A0 B0 ** **) - - movdqa xmm7,xmm1 - punpcklqdq xmm1,xmm5 ; xmm1=(A1 A3 B1 B3) - punpckhqdq xmm7,xmm5 ; xmm7=(A5 A7 B5 B7) - - paddd xmm6,xmm2 - psrad xmm6,DESCALE_P1_2 - - paddd xmm1,xmm2 - paddd xmm7,xmm2 - psrad xmm1,DESCALE_P1_2 - psrad xmm7,DESCALE_P1_2 - - ; -- Prefetch the next coefficient block - - prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32] - prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32] - prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32] - prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32] - - ; ---- Pass 2: process rows, store into output array. - - mov edi, JSAMPARRAY [output_buf(ebp)] ; (JSAMPROW *) - mov eax, JDIMENSION [output_col(ebp)] - - ; | input:| result:| - ; | A0 B0 | | - ; | A1 B1 | C0 C1 | - ; | A3 B3 | D0 D1 | - ; | A5 B5 | | - ; | A7 B7 | | - - ; -- Odd part - - packssdw xmm1,xmm1 ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3) - packssdw xmm7,xmm7 ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7) - pmaddwd xmm1,[GOTOFF(ebx,PW_F362_MF127)] - pmaddwd xmm7,[GOTOFF(ebx,PW_F085_MF072)] - - paddd xmm1,xmm7 ; xmm1=tmp0[row0 row1 row0 row1] - - ; -- Even part - - pslld xmm6,(CONST_BITS+2) ; xmm6=tmp10[row0 row1 **** ****] - - ; -- Final output stage - - movdqa xmm4,xmm6 - paddd xmm6,xmm1 ; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **) - psubd xmm4,xmm1 ; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **) - - punpckldq xmm6,xmm4 ; xmm6=(C0 D0 C1 D1) - - paddd xmm6,[GOTOFF(ebx,PD_DESCALE_P2_2)] - psrad xmm6,DESCALE_P2_2 - - packssdw xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1) - packsswb xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..) - paddb xmm6,[GOTOFF(ebx,PB_CENTERJSAMP)] - - pextrw ebx,xmm6,0x00 ; ebx=(C0 D0 -- --) - pextrw ecx,xmm6,0x01 ; ecx=(C1 D1 -- --) - - mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] - mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] - mov WORD [edx+eax*SIZEOF_JSAMPLE], bx - mov WORD [esi+eax*SIZEOF_JSAMPLE], cx - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jquant-3dn.asm b/simd/jquant-3dn.asm deleted file mode 100644 index 0b4164b..0000000 --- a/simd/jquant-3dn.asm +++ /dev/null @@ -1,232 +0,0 @@ -; -; jquant.asm - sample data conversion and quantization (3DNow! & MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Load data into workspace, applying unsigned->signed conversion -; -; GLOBAL(void) -; jsimd_convsamp_float_3dnow (JSAMPARRAY sample_data, JDIMENSION start_col, -; FAST_FLOAT *workspace); -; - -%define sample_data ebp+8 ; JSAMPARRAY sample_data -%define start_col ebp+12 ; JDIMENSION start_col -%define workspace ebp+16 ; FAST_FLOAT *workspace - - align 16 - global EXTN(jsimd_convsamp_float_3dnow) - -EXTN(jsimd_convsamp_float_3dnow): - push ebp - mov ebp,esp - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - pcmpeqw mm7,mm7 - psllw mm7,7 - packsswb mm7,mm7 ; mm7 = PB_CENTERJSAMPLE (0x808080..) - - mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) - mov eax, JDIMENSION [start_col] - mov edi, POINTER [workspace] ; (DCTELEM *) - mov ecx, DCTSIZE/2 - alignx 16,7 -.convloop: - mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) - mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) - - movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] - movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] - - psubb mm0,mm7 ; mm0=(01234567) - psubb mm1,mm7 ; mm1=(89ABCDEF) - - punpcklbw mm2,mm0 ; mm2=(*0*1*2*3) - punpckhbw mm0,mm0 ; mm0=(*4*5*6*7) - punpcklbw mm3,mm1 ; mm3=(*8*9*A*B) - punpckhbw mm1,mm1 ; mm1=(*C*D*E*F) - - punpcklwd mm4,mm2 ; mm4=(***0***1) - punpckhwd mm2,mm2 ; mm2=(***2***3) - punpcklwd mm5,mm0 ; mm5=(***4***5) - punpckhwd mm0,mm0 ; mm0=(***6***7) - - psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(01) - psrad mm2,(DWORD_BIT-BYTE_BIT) ; mm2=(23) - pi2fd mm4,mm4 - pi2fd mm2,mm2 - psrad mm5,(DWORD_BIT-BYTE_BIT) ; mm5=(45) - psrad mm0,(DWORD_BIT-BYTE_BIT) ; mm0=(67) - pi2fd mm5,mm5 - pi2fd mm0,mm0 - - movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm4 - movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm2 - movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5 - movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0 - - punpcklwd mm6,mm3 ; mm6=(***8***9) - punpckhwd mm3,mm3 ; mm3=(***A***B) - punpcklwd mm4,mm1 ; mm4=(***C***D) - punpckhwd mm1,mm1 ; mm1=(***E***F) - - psrad mm6,(DWORD_BIT-BYTE_BIT) ; mm6=(89) - psrad mm3,(DWORD_BIT-BYTE_BIT) ; mm3=(AB) - pi2fd mm6,mm6 - pi2fd mm3,mm3 - psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(CD) - psrad mm1,(DWORD_BIT-BYTE_BIT) ; mm1=(EF) - pi2fd mm4,mm4 - pi2fd mm1,mm1 - - movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm6 - movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm3 - movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm4 - movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1 - - add esi, byte 2*SIZEOF_JSAMPROW - add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT - dec ecx - jnz near .convloop - - femms ; empty MMX/3DNow! state - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - pop ebp - ret - - -; -------------------------------------------------------------------------- -; -; Quantize/descale the coefficients, and store into coef_block -; -; GLOBAL(void) -; jsimd_quantize_float_3dnow (JCOEFPTR coef_block, FAST_FLOAT *divisors, -; FAST_FLOAT *workspace); -; - -%define coef_block ebp+8 ; JCOEFPTR coef_block -%define divisors ebp+12 ; FAST_FLOAT *divisors -%define workspace ebp+16 ; FAST_FLOAT *workspace - - align 16 - global EXTN(jsimd_quantize_float_3dnow) - -EXTN(jsimd_quantize_float_3dnow): - push ebp - mov ebp,esp -; push ebx ; unused -; push ecx ; unused -; push edx ; need not be preserved - push esi - push edi - - mov eax, 0x4B400000 ; (float)0x00C00000 (rndint_magic) - movd mm7,eax - punpckldq mm7,mm7 ; mm7={12582912.0F 12582912.0F} - - mov esi, POINTER [workspace] - mov edx, POINTER [divisors] - mov edi, JCOEFPTR [coef_block] - mov eax, DCTSIZE2/16 - alignx 16,7 -.quantloop: - movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] - movq mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)] - pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] - pfmul mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] - movq mm2, MMWORD [MMBLOCK(0,2,esi,SIZEOF_FAST_FLOAT)] - movq mm3, MMWORD [MMBLOCK(0,3,esi,SIZEOF_FAST_FLOAT)] - pfmul mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)] - pfmul mm3, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)] - - pfadd mm0,mm7 ; mm0=(00 ** 01 **) - pfadd mm1,mm7 ; mm1=(02 ** 03 **) - pfadd mm2,mm7 ; mm0=(04 ** 05 **) - pfadd mm3,mm7 ; mm1=(06 ** 07 **) - - movq mm4,mm0 - punpcklwd mm0,mm1 ; mm0=(00 02 ** **) - punpckhwd mm4,mm1 ; mm4=(01 03 ** **) - movq mm5,mm2 - punpcklwd mm2,mm3 ; mm2=(04 06 ** **) - punpckhwd mm5,mm3 ; mm5=(05 07 ** **) - - punpcklwd mm0,mm4 ; mm0=(00 01 02 03) - punpcklwd mm2,mm5 ; mm2=(04 05 06 07) - - movq mm6, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] - movq mm1, MMWORD [MMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)] - pfmul mm6, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] - pfmul mm1, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] - movq mm3, MMWORD [MMBLOCK(1,2,esi,SIZEOF_FAST_FLOAT)] - movq mm4, MMWORD [MMBLOCK(1,3,esi,SIZEOF_FAST_FLOAT)] - pfmul mm3, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)] - pfmul mm4, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)] - - pfadd mm6,mm7 ; mm0=(10 ** 11 **) - pfadd mm1,mm7 ; mm4=(12 ** 13 **) - pfadd mm3,mm7 ; mm0=(14 ** 15 **) - pfadd mm4,mm7 ; mm4=(16 ** 17 **) - - movq mm5,mm6 - punpcklwd mm6,mm1 ; mm6=(10 12 ** **) - punpckhwd mm5,mm1 ; mm5=(11 13 ** **) - movq mm1,mm3 - punpcklwd mm3,mm4 ; mm3=(14 16 ** **) - punpckhwd mm1,mm4 ; mm1=(15 17 ** **) - - punpcklwd mm6,mm5 ; mm6=(10 11 12 13) - punpcklwd mm3,mm1 ; mm3=(14 15 16 17) - - movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 - movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm2 - movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm6 - movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3 - - add esi, byte 16*SIZEOF_FAST_FLOAT - add edx, byte 16*SIZEOF_FAST_FLOAT - add edi, byte 16*SIZEOF_JCOEF - dec eax - jnz near .quantloop - - femms ; empty MMX/3DNow! state - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; unused -; pop ebx ; unused - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jquant-mmx.asm b/simd/jquant-mmx.asm deleted file mode 100644 index aed6071..0000000 --- a/simd/jquant-mmx.asm +++ /dev/null @@ -1,273 +0,0 @@ -; -; jquant.asm - sample data conversion and quantization (MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Load data into workspace, applying unsigned->signed conversion -; -; GLOBAL(void) -; jsimd_convsamp_mmx (JSAMPARRAY sample_data, JDIMENSION start_col, -; DCTELEM *workspace); -; - -%define sample_data ebp+8 ; JSAMPARRAY sample_data -%define start_col ebp+12 ; JDIMENSION start_col -%define workspace ebp+16 ; DCTELEM *workspace - - align 16 - global EXTN(jsimd_convsamp_mmx) - -EXTN(jsimd_convsamp_mmx): - push ebp - mov ebp,esp - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - pxor mm6,mm6 ; mm6=(all 0's) - pcmpeqw mm7,mm7 - psllw mm7,7 ; mm7={0xFF80 0xFF80 0xFF80 0xFF80} - - mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) - mov eax, JDIMENSION [start_col] - mov edi, POINTER [workspace] ; (DCTELEM *) - mov ecx, DCTSIZE/4 - alignx 16,7 -.convloop: - mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) - mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) - - movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; mm0=(01234567) - movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] ; mm1=(89ABCDEF) - - mov ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) - mov edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) - - movq mm2, MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; mm2=(GHIJKLMN) - movq mm3, MMWORD [edx+eax*SIZEOF_JSAMPLE] ; mm3=(OPQRSTUV) - - movq mm4,mm0 - punpcklbw mm0,mm6 ; mm0=(0123) - punpckhbw mm4,mm6 ; mm4=(4567) - movq mm5,mm1 - punpcklbw mm1,mm6 ; mm1=(89AB) - punpckhbw mm5,mm6 ; mm5=(CDEF) - - paddw mm0,mm7 - paddw mm4,mm7 - paddw mm1,mm7 - paddw mm5,mm7 - - movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0 - movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm4 - movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_DCTELEM)], mm1 - movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_DCTELEM)], mm5 - - movq mm0,mm2 - punpcklbw mm2,mm6 ; mm2=(GHIJ) - punpckhbw mm0,mm6 ; mm0=(KLMN) - movq mm4,mm3 - punpcklbw mm3,mm6 ; mm3=(OPQR) - punpckhbw mm4,mm6 ; mm4=(STUV) - - paddw mm2,mm7 - paddw mm0,mm7 - paddw mm3,mm7 - paddw mm4,mm7 - - movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_DCTELEM)], mm2 - movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_DCTELEM)], mm0 - movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_DCTELEM)], mm3 - movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_DCTELEM)], mm4 - - add esi, byte 4*SIZEOF_JSAMPROW - add edi, byte 4*DCTSIZE*SIZEOF_DCTELEM - dec ecx - jnz short .convloop - - emms ; empty MMX state - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - pop ebp - ret - -; -------------------------------------------------------------------------- -; -; Quantize/descale the coefficients, and store into coef_block -; -; This implementation is based on an algorithm described in -; "How to optimize for the Pentium family of microprocessors" -; (http://www.agner.org/assem/). -; -; GLOBAL(void) -; jsimd_quantize_mmx (JCOEFPTR coef_block, DCTELEM *divisors, -; DCTELEM *workspace); -; - -%define RECIPROCAL(m,n,b) MMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM) -%define CORRECTION(m,n,b) MMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM) -%define SCALE(m,n,b) MMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM) -%define SHIFT(m,n,b) MMBLOCK(DCTSIZE*3+(m),(n),(b),SIZEOF_DCTELEM) - -%define coef_block ebp+8 ; JCOEFPTR coef_block -%define divisors ebp+12 ; DCTELEM *divisors -%define workspace ebp+16 ; DCTELEM *workspace - - align 16 - global EXTN(jsimd_quantize_mmx) - -EXTN(jsimd_quantize_mmx): - push ebp - mov ebp,esp -; push ebx ; unused -; push ecx ; unused -; push edx ; need not be preserved - push esi - push edi - - mov esi, POINTER [workspace] - mov edx, POINTER [divisors] - mov edi, JCOEFPTR [coef_block] - mov ah, 2 - alignx 16,7 -.quantloop1: - mov al, DCTSIZE2/8/2 - alignx 16,7 -.quantloop2: - movq mm2, MMWORD [MMBLOCK(0,0,esi,SIZEOF_DCTELEM)] - movq mm3, MMWORD [MMBLOCK(0,1,esi,SIZEOF_DCTELEM)] - - movq mm0,mm2 - movq mm1,mm3 - - psraw mm2,(WORD_BIT-1) ; -1 if value < 0, 0 otherwise - psraw mm3,(WORD_BIT-1) - - pxor mm0,mm2 ; val = -val - pxor mm1,mm3 - psubw mm0,mm2 - psubw mm1,mm3 - - ; - ; MMX is an annoyingly crappy instruction set. It has two - ; misfeatures that are causing problems here: - ; - ; - All multiplications are signed. - ; - ; - The second operand for the shifts is not treated as packed. - ; - ; - ; We work around the first problem by implementing this algorithm: - ; - ; unsigned long unsigned_multiply(unsigned short x, unsigned short y) - ; { - ; enum { SHORT_BIT = 16 }; - ; signed short sx = (signed short) x; - ; signed short sy = (signed short) y; - ; signed long sz; - ; - ; sz = (long) sx * (long) sy; /* signed multiply */ - ; - ; if (sx < 0) sz += (long) sy << SHORT_BIT; - ; if (sy < 0) sz += (long) sx << SHORT_BIT; - ; - ; return (unsigned long) sz; - ; } - ; - ; (note that a negative sx adds _sy_ and vice versa) - ; - ; For the second problem, we replace the shift by a multiplication. - ; Unfortunately that means we have to deal with the signed issue again. - ; - - paddw mm0, MMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor - paddw mm1, MMWORD [CORRECTION(0,1,edx)] - - movq mm4,mm0 ; store current value for later - movq mm5,mm1 - pmulhw mm0, MMWORD [RECIPROCAL(0,0,edx)] ; reciprocal - pmulhw mm1, MMWORD [RECIPROCAL(0,1,edx)] - paddw mm0,mm4 ; reciprocal is always negative (MSB=1), - paddw mm1,mm5 ; so we always need to add the initial value - ; (input value is never negative as we - ; inverted it at the start of this routine) - - ; here it gets a bit tricky as both scale - ; and mm0/mm1 can be negative - movq mm6, MMWORD [SCALE(0,0,edx)] ; scale - movq mm7, MMWORD [SCALE(0,1,edx)] - movq mm4,mm0 - movq mm5,mm1 - pmulhw mm0,mm6 - pmulhw mm1,mm7 - - psraw mm6,(WORD_BIT-1) ; determine if scale is negative - psraw mm7,(WORD_BIT-1) - - pand mm6,mm4 ; and add input if it is - pand mm7,mm5 - paddw mm0,mm6 - paddw mm1,mm7 - - psraw mm4,(WORD_BIT-1) ; then check if negative input - psraw mm5,(WORD_BIT-1) - - pand mm4, MMWORD [SCALE(0,0,edx)] ; and add scale if it is - pand mm5, MMWORD [SCALE(0,1,edx)] - paddw mm0,mm4 - paddw mm1,mm5 - - pxor mm0,mm2 ; val = -val - pxor mm1,mm3 - psubw mm0,mm2 - psubw mm1,mm3 - - movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0 - movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm1 - - add esi, byte 8*SIZEOF_DCTELEM - add edx, byte 8*SIZEOF_DCTELEM - add edi, byte 8*SIZEOF_JCOEF - dec al - jnz near .quantloop2 - dec ah - jnz near .quantloop1 ; to avoid branch misprediction - - emms ; empty MMX state - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; unused -; pop ebx ; unused - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jquant-sse.asm b/simd/jquant-sse.asm deleted file mode 100644 index 1baf88f..0000000 --- a/simd/jquant-sse.asm +++ /dev/null @@ -1,210 +0,0 @@ -; -; jquant.asm - sample data conversion and quantization (SSE & MMX) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Load data into workspace, applying unsigned->signed conversion -; -; GLOBAL(void) -; jsimd_convsamp_float_sse (JSAMPARRAY sample_data, JDIMENSION start_col, -; FAST_FLOAT *workspace); -; - -%define sample_data ebp+8 ; JSAMPARRAY sample_data -%define start_col ebp+12 ; JDIMENSION start_col -%define workspace ebp+16 ; FAST_FLOAT *workspace - - align 16 - global EXTN(jsimd_convsamp_float_sse) - -EXTN(jsimd_convsamp_float_sse): - push ebp - mov ebp,esp - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - pcmpeqw mm7,mm7 - psllw mm7,7 - packsswb mm7,mm7 ; mm7 = PB_CENTERJSAMPLE (0x808080..) - - mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) - mov eax, JDIMENSION [start_col] - mov edi, POINTER [workspace] ; (DCTELEM *) - mov ecx, DCTSIZE/2 - alignx 16,7 -.convloop: - mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) - mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) - - movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] - movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] - - psubb mm0,mm7 ; mm0=(01234567) - psubb mm1,mm7 ; mm1=(89ABCDEF) - - punpcklbw mm2,mm0 ; mm2=(*0*1*2*3) - punpckhbw mm0,mm0 ; mm0=(*4*5*6*7) - punpcklbw mm3,mm1 ; mm3=(*8*9*A*B) - punpckhbw mm1,mm1 ; mm1=(*C*D*E*F) - - punpcklwd mm4,mm2 ; mm4=(***0***1) - punpckhwd mm2,mm2 ; mm2=(***2***3) - punpcklwd mm5,mm0 ; mm5=(***4***5) - punpckhwd mm0,mm0 ; mm0=(***6***7) - - psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(01) - psrad mm2,(DWORD_BIT-BYTE_BIT) ; mm2=(23) - cvtpi2ps xmm0,mm4 ; xmm0=(01**) - cvtpi2ps xmm1,mm2 ; xmm1=(23**) - psrad mm5,(DWORD_BIT-BYTE_BIT) ; mm5=(45) - psrad mm0,(DWORD_BIT-BYTE_BIT) ; mm0=(67) - cvtpi2ps xmm2,mm5 ; xmm2=(45**) - cvtpi2ps xmm3,mm0 ; xmm3=(67**) - - punpcklwd mm6,mm3 ; mm6=(***8***9) - punpckhwd mm3,mm3 ; mm3=(***A***B) - punpcklwd mm4,mm1 ; mm4=(***C***D) - punpckhwd mm1,mm1 ; mm1=(***E***F) - - psrad mm6,(DWORD_BIT-BYTE_BIT) ; mm6=(89) - psrad mm3,(DWORD_BIT-BYTE_BIT) ; mm3=(AB) - cvtpi2ps xmm4,mm6 ; xmm4=(89**) - cvtpi2ps xmm5,mm3 ; xmm5=(AB**) - psrad mm4,(DWORD_BIT-BYTE_BIT) ; mm4=(CD) - psrad mm1,(DWORD_BIT-BYTE_BIT) ; mm1=(EF) - cvtpi2ps xmm6,mm4 ; xmm6=(CD**) - cvtpi2ps xmm7,mm1 ; xmm7=(EF**) - - movlhps xmm0,xmm1 ; xmm0=(0123) - movlhps xmm2,xmm3 ; xmm2=(4567) - movlhps xmm4,xmm5 ; xmm4=(89AB) - movlhps xmm6,xmm7 ; xmm6=(CDEF) - - movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0 - movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm2 - movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm4 - movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6 - - add esi, byte 2*SIZEOF_JSAMPROW - add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT - dec ecx - jnz near .convloop - - emms ; empty MMX state - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - pop ebp - ret - - -; -------------------------------------------------------------------------- -; -; Quantize/descale the coefficients, and store into coef_block -; -; GLOBAL(void) -; jsimd_quantize_float_sse (JCOEFPTR coef_block, FAST_FLOAT *divisors, -; FAST_FLOAT *workspace); -; - -%define coef_block ebp+8 ; JCOEFPTR coef_block -%define divisors ebp+12 ; FAST_FLOAT *divisors -%define workspace ebp+16 ; FAST_FLOAT *workspace - - align 16 - global EXTN(jsimd_quantize_float_sse) - -EXTN(jsimd_quantize_float_sse): - push ebp - mov ebp,esp -; push ebx ; unused -; push ecx ; unused -; push edx ; need not be preserved - push esi - push edi - - mov esi, POINTER [workspace] - mov edx, POINTER [divisors] - mov edi, JCOEFPTR [coef_block] - mov eax, DCTSIZE2/16 - alignx 16,7 -.quantloop: - movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)] - mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] - mulps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] - movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)] - mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] - mulps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] - - movhlps xmm4,xmm0 - movhlps xmm5,xmm1 - - cvtps2pi mm0,xmm0 - cvtps2pi mm1,xmm1 - cvtps2pi mm4,xmm4 - cvtps2pi mm5,xmm5 - - movhlps xmm6,xmm2 - movhlps xmm7,xmm3 - - cvtps2pi mm2,xmm2 - cvtps2pi mm3,xmm3 - cvtps2pi mm6,xmm6 - cvtps2pi mm7,xmm7 - - packssdw mm0,mm4 - packssdw mm1,mm5 - packssdw mm2,mm6 - packssdw mm3,mm7 - - movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0 - movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1 - movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm2 - movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3 - - add esi, byte 16*SIZEOF_FAST_FLOAT - add edx, byte 16*SIZEOF_FAST_FLOAT - add edi, byte 16*SIZEOF_JCOEF - dec eax - jnz short .quantloop - - emms ; empty MMX state - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; unused -; pop ebx ; unused - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jquantf-sse2-64.asm b/simd/jquantf-sse2-64.asm deleted file mode 100644 index ef5c1f9..0000000 --- a/simd/jquantf-sse2-64.asm +++ /dev/null @@ -1,157 +0,0 @@ -; -; jquantf.asm - sample data conversion and quantization (64-bit SSE & SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 64 -; -; Load data into workspace, applying unsigned->signed conversion -; -; GLOBAL(void) -; jsimd_convsamp_float_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, -; FAST_FLOAT *workspace); -; - -; r10 = JSAMPARRAY sample_data -; r11 = JDIMENSION start_col -; r12 = FAST_FLOAT *workspace - - align 16 - global EXTN(jsimd_convsamp_float_sse2) - -EXTN(jsimd_convsamp_float_sse2): - push rbp - mov rax,rsp - mov rbp,rsp - collect_args - push rbx - - pcmpeqw xmm7,xmm7 - psllw xmm7,7 - packsswb xmm7,xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..) - - mov rsi, r10 - mov eax, r11d - mov rdi, r12 - mov rcx, DCTSIZE/2 -.convloop: - mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) - mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) - - movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] - movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] - - psubb xmm0,xmm7 ; xmm0=(01234567) - psubb xmm1,xmm7 ; xmm1=(89ABCDEF) - - punpcklbw xmm0,xmm0 ; xmm0=(*0*1*2*3*4*5*6*7) - punpcklbw xmm1,xmm1 ; xmm1=(*8*9*A*B*C*D*E*F) - - punpcklwd xmm2,xmm0 ; xmm2=(***0***1***2***3) - punpckhwd xmm0,xmm0 ; xmm0=(***4***5***6***7) - punpcklwd xmm3,xmm1 ; xmm3=(***8***9***A***B) - punpckhwd xmm1,xmm1 ; xmm1=(***C***D***E***F) - - psrad xmm2,(DWORD_BIT-BYTE_BIT) ; xmm2=(0123) - psrad xmm0,(DWORD_BIT-BYTE_BIT) ; xmm0=(4567) - cvtdq2ps xmm2,xmm2 ; xmm2=(0123) - cvtdq2ps xmm0,xmm0 ; xmm0=(4567) - psrad xmm3,(DWORD_BIT-BYTE_BIT) ; xmm3=(89AB) - psrad xmm1,(DWORD_BIT-BYTE_BIT) ; xmm1=(CDEF) - cvtdq2ps xmm3,xmm3 ; xmm3=(89AB) - cvtdq2ps xmm1,xmm1 ; xmm1=(CDEF) - - movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm2 - movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0 - movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3 - movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1 - - add rsi, byte 2*SIZEOF_JSAMPROW - add rdi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT - dec rcx - jnz short .convloop - - pop rbx - uncollect_args - pop rbp - ret - - -; -------------------------------------------------------------------------- -; -; Quantize/descale the coefficients, and store into coef_block -; -; GLOBAL(void) -; jsimd_quantize_float_sse2 (JCOEFPTR coef_block, FAST_FLOAT *divisors, -; FAST_FLOAT *workspace); -; - -; r10 = JCOEFPTR coef_block -; r11 = FAST_FLOAT *divisors -; r12 = FAST_FLOAT *workspace - - align 16 - global EXTN(jsimd_quantize_float_sse2) - -EXTN(jsimd_quantize_float_sse2): - push rbp - mov rax,rsp - mov rbp,rsp - collect_args - - mov rsi, r12 - mov rdx, r11 - mov rdi, r10 - mov rax, DCTSIZE2/16 -.quantloop: - movaps xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(0,1,rsi,SIZEOF_FAST_FLOAT)] - mulps xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)] - mulps xmm1, XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)] - movaps xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(1,1,rsi,SIZEOF_FAST_FLOAT)] - mulps xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)] - mulps xmm3, XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)] - - cvtps2dq xmm0,xmm0 - cvtps2dq xmm1,xmm1 - cvtps2dq xmm2,xmm2 - cvtps2dq xmm3,xmm3 - - packssdw xmm0,xmm1 - packssdw xmm2,xmm3 - - movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_JCOEF)], xmm0 - movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_JCOEF)], xmm2 - - add rsi, byte 16*SIZEOF_FAST_FLOAT - add rdx, byte 16*SIZEOF_FAST_FLOAT - add rdi, byte 16*SIZEOF_JCOEF - dec rax - jnz short .quantloop - - uncollect_args - pop rbp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jquantf-sse2.asm b/simd/jquantf-sse2.asm deleted file mode 100644 index 1cbc267..0000000 --- a/simd/jquantf-sse2.asm +++ /dev/null @@ -1,170 +0,0 @@ -; -; jquantf.asm - sample data conversion and quantization (SSE & SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Load data into workspace, applying unsigned->signed conversion -; -; GLOBAL(void) -; jsimd_convsamp_float_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, -; FAST_FLOAT *workspace); -; - -%define sample_data ebp+8 ; JSAMPARRAY sample_data -%define start_col ebp+12 ; JDIMENSION start_col -%define workspace ebp+16 ; FAST_FLOAT *workspace - - align 16 - global EXTN(jsimd_convsamp_float_sse2) - -EXTN(jsimd_convsamp_float_sse2): - push ebp - mov ebp,esp - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - pcmpeqw xmm7,xmm7 - psllw xmm7,7 - packsswb xmm7,xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..) - - mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) - mov eax, JDIMENSION [start_col] - mov edi, POINTER [workspace] ; (DCTELEM *) - mov ecx, DCTSIZE/2 - alignx 16,7 -.convloop: - mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) - mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) - - movq xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] - movq xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] - - psubb xmm0,xmm7 ; xmm0=(01234567) - psubb xmm1,xmm7 ; xmm1=(89ABCDEF) - - punpcklbw xmm0,xmm0 ; xmm0=(*0*1*2*3*4*5*6*7) - punpcklbw xmm1,xmm1 ; xmm1=(*8*9*A*B*C*D*E*F) - - punpcklwd xmm2,xmm0 ; xmm2=(***0***1***2***3) - punpckhwd xmm0,xmm0 ; xmm0=(***4***5***6***7) - punpcklwd xmm3,xmm1 ; xmm3=(***8***9***A***B) - punpckhwd xmm1,xmm1 ; xmm1=(***C***D***E***F) - - psrad xmm2,(DWORD_BIT-BYTE_BIT) ; xmm2=(0123) - psrad xmm0,(DWORD_BIT-BYTE_BIT) ; xmm0=(4567) - cvtdq2ps xmm2,xmm2 ; xmm2=(0123) - cvtdq2ps xmm0,xmm0 ; xmm0=(4567) - psrad xmm3,(DWORD_BIT-BYTE_BIT) ; xmm3=(89AB) - psrad xmm1,(DWORD_BIT-BYTE_BIT) ; xmm1=(CDEF) - cvtdq2ps xmm3,xmm3 ; xmm3=(89AB) - cvtdq2ps xmm1,xmm1 ; xmm1=(CDEF) - - movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm2 - movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0 - movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3 - movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1 - - add esi, byte 2*SIZEOF_JSAMPROW - add edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT - dec ecx - jnz short .convloop - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - pop ebp - ret - - -; -------------------------------------------------------------------------- -; -; Quantize/descale the coefficients, and store into coef_block -; -; GLOBAL(void) -; jsimd_quantize_float_sse2 (JCOEFPTR coef_block, FAST_FLOAT *divisors, -; FAST_FLOAT *workspace); -; - -%define coef_block ebp+8 ; JCOEFPTR coef_block -%define divisors ebp+12 ; FAST_FLOAT *divisors -%define workspace ebp+16 ; FAST_FLOAT *workspace - - align 16 - global EXTN(jsimd_quantize_float_sse2) - -EXTN(jsimd_quantize_float_sse2): - push ebp - mov ebp,esp -; push ebx ; unused -; push ecx ; unused -; push edx ; need not be preserved - push esi - push edi - - mov esi, POINTER [workspace] - mov edx, POINTER [divisors] - mov edi, JCOEFPTR [coef_block] - mov eax, DCTSIZE2/16 - alignx 16,7 -.quantloop: - movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)] - movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)] - mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)] - mulps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)] - movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)] - movaps xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)] - mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)] - mulps xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)] - - cvtps2dq xmm0,xmm0 - cvtps2dq xmm1,xmm1 - cvtps2dq xmm2,xmm2 - cvtps2dq xmm3,xmm3 - - packssdw xmm0,xmm1 - packssdw xmm2,xmm3 - - movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_JCOEF)], xmm0 - movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_JCOEF)], xmm2 - - add esi, byte 16*SIZEOF_FAST_FLOAT - add edx, byte 16*SIZEOF_FAST_FLOAT - add edi, byte 16*SIZEOF_JCOEF - dec eax - jnz short .quantloop - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; unused -; pop ebx ; unused - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jquanti-sse2-64.asm b/simd/jquanti-sse2-64.asm deleted file mode 100644 index 66c4e51..0000000 --- a/simd/jquanti-sse2-64.asm +++ /dev/null @@ -1,186 +0,0 @@ -; -; jquanti.asm - sample data conversion and quantization (64-bit SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2009, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 64 -; -; Load data into workspace, applying unsigned->signed conversion -; -; GLOBAL(void) -; jsimd_convsamp_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, -; DCTELEM *workspace); -; - -; r10 = JSAMPARRAY sample_data -; r11 = JDIMENSION start_col -; r12 = DCTELEM *workspace - - align 16 - global EXTN(jsimd_convsamp_sse2) - -EXTN(jsimd_convsamp_sse2): - push rbp - mov rax,rsp - mov rbp,rsp - collect_args - push rbx - - pxor xmm6,xmm6 ; xmm6=(all 0's) - pcmpeqw xmm7,xmm7 - psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} - - mov rsi, r10 - mov eax, r11d - mov rdi, r12 - mov rcx, DCTSIZE/4 -.convloop: - mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) - mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) - - movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm0=(01234567) - movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF) - - mov rbx, JSAMPROW [rsi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) - mov rdx, JSAMPROW [rsi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) - - movq xmm2, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm2=(GHIJKLMN) - movq xmm3, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm3=(OPQRSTUV) - - punpcklbw xmm0,xmm6 ; xmm0=(01234567) - punpcklbw xmm1,xmm6 ; xmm1=(89ABCDEF) - paddw xmm0,xmm7 - paddw xmm1,xmm7 - punpcklbw xmm2,xmm6 ; xmm2=(GHIJKLMN) - punpcklbw xmm3,xmm6 ; xmm3=(OPQRSTUV) - paddw xmm2,xmm7 - paddw xmm3,xmm7 - - movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0 - movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1 - movdqa XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2 - movdqa XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3 - - add rsi, byte 4*SIZEOF_JSAMPROW - add rdi, byte 4*DCTSIZE*SIZEOF_DCTELEM - dec rcx - jnz short .convloop - - pop rbx - uncollect_args - pop rbp - ret - -; -------------------------------------------------------------------------- -; -; Quantize/descale the coefficients, and store into coef_block -; -; This implementation is based on an algorithm described in -; "How to optimize for the Pentium family of microprocessors" -; (http://www.agner.org/assem/). -; -; GLOBAL(void) -; jsimd_quantize_sse2 (JCOEFPTR coef_block, DCTELEM *divisors, -; DCTELEM *workspace); -; - -%define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM) -%define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM) -%define SCALE(m,n,b) XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM) - -; r10 = JCOEFPTR coef_block -; r11 = DCTELEM *divisors -; r12 = DCTELEM *workspace - - align 16 - global EXTN(jsimd_quantize_sse2) - -EXTN(jsimd_quantize_sse2): - push rbp - mov rax,rsp - mov rbp,rsp - collect_args - - mov rsi, r12 - mov rdx, r11 - mov rdi, r10 - mov rax, DCTSIZE2/32 -.quantloop: - movdqa xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_DCTELEM)] - movdqa xmm5, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_DCTELEM)] - movdqa xmm6, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_DCTELEM)] - movdqa xmm7, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_DCTELEM)] - movdqa xmm0,xmm4 - movdqa xmm1,xmm5 - movdqa xmm2,xmm6 - movdqa xmm3,xmm7 - psraw xmm4,(WORD_BIT-1) - psraw xmm5,(WORD_BIT-1) - psraw xmm6,(WORD_BIT-1) - psraw xmm7,(WORD_BIT-1) - pxor xmm0,xmm4 - pxor xmm1,xmm5 - pxor xmm2,xmm6 - pxor xmm3,xmm7 - psubw xmm0,xmm4 ; if (xmm0 < 0) xmm0 = -xmm0; - psubw xmm1,xmm5 ; if (xmm1 < 0) xmm1 = -xmm1; - psubw xmm2,xmm6 ; if (xmm2 < 0) xmm2 = -xmm2; - psubw xmm3,xmm7 ; if (xmm3 < 0) xmm3 = -xmm3; - - paddw xmm0, XMMWORD [CORRECTION(0,0,rdx)] ; correction + roundfactor - paddw xmm1, XMMWORD [CORRECTION(1,0,rdx)] - paddw xmm2, XMMWORD [CORRECTION(2,0,rdx)] - paddw xmm3, XMMWORD [CORRECTION(3,0,rdx)] - pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,rdx)] ; reciprocal - pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,rdx)] - pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,rdx)] - pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,rdx)] - pmulhuw xmm0, XMMWORD [SCALE(0,0,rdx)] ; scale - pmulhuw xmm1, XMMWORD [SCALE(1,0,rdx)] - pmulhuw xmm2, XMMWORD [SCALE(2,0,rdx)] - pmulhuw xmm3, XMMWORD [SCALE(3,0,rdx)] - - pxor xmm0,xmm4 - pxor xmm1,xmm5 - pxor xmm2,xmm6 - pxor xmm3,xmm7 - psubw xmm0,xmm4 - psubw xmm1,xmm5 - psubw xmm2,xmm6 - psubw xmm3,xmm7 - movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0 - movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1 - movdqa XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2 - movdqa XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3 - - add rsi, byte 32*SIZEOF_DCTELEM - add rdx, byte 32*SIZEOF_DCTELEM - add rdi, byte 32*SIZEOF_JCOEF - dec rax - jnz near .quantloop - - uncollect_args - pop rbp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jquanti-sse2.asm b/simd/jquanti-sse2.asm deleted file mode 100644 index aea8604..0000000 --- a/simd/jquanti-sse2.asm +++ /dev/null @@ -1,199 +0,0 @@ -; -; jquanti.asm - sample data conversion and quantization (SSE2) -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" -%include "jdct.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Load data into workspace, applying unsigned->signed conversion -; -; GLOBAL(void) -; jsimd_convsamp_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, -; DCTELEM *workspace); -; - -%define sample_data ebp+8 ; JSAMPARRAY sample_data -%define start_col ebp+12 ; JDIMENSION start_col -%define workspace ebp+16 ; DCTELEM *workspace - - align 16 - global EXTN(jsimd_convsamp_sse2) - -EXTN(jsimd_convsamp_sse2): - push ebp - mov ebp,esp - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved - push esi - push edi - - pxor xmm6,xmm6 ; xmm6=(all 0's) - pcmpeqw xmm7,xmm7 - psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} - - mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *) - mov eax, JDIMENSION [start_col] - mov edi, POINTER [workspace] ; (DCTELEM *) - mov ecx, DCTSIZE/4 - alignx 16,7 -.convloop: - mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) - mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) - - movq xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; xmm0=(01234567) - movq xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF) - - mov ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) - mov edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) - - movq xmm2, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; xmm2=(GHIJKLMN) - movq xmm3, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] ; xmm3=(OPQRSTUV) - - punpcklbw xmm0,xmm6 ; xmm0=(01234567) - punpcklbw xmm1,xmm6 ; xmm1=(89ABCDEF) - paddw xmm0,xmm7 - paddw xmm1,xmm7 - punpcklbw xmm2,xmm6 ; xmm2=(GHIJKLMN) - punpcklbw xmm3,xmm6 ; xmm3=(OPQRSTUV) - paddw xmm2,xmm7 - paddw xmm3,xmm7 - - movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0 - movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1 - movdqa XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2 - movdqa XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3 - - add esi, byte 4*SIZEOF_JSAMPROW - add edi, byte 4*DCTSIZE*SIZEOF_DCTELEM - dec ecx - jnz short .convloop - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - pop ebp - ret - -; -------------------------------------------------------------------------- -; -; Quantize/descale the coefficients, and store into coef_block -; -; This implementation is based on an algorithm described in -; "How to optimize for the Pentium family of microprocessors" -; (http://www.agner.org/assem/). -; -; GLOBAL(void) -; jsimd_quantize_sse2 (JCOEFPTR coef_block, DCTELEM *divisors, -; DCTELEM *workspace); -; - -%define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM) -%define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM) -%define SCALE(m,n,b) XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM) - -%define coef_block ebp+8 ; JCOEFPTR coef_block -%define divisors ebp+12 ; DCTELEM *divisors -%define workspace ebp+16 ; DCTELEM *workspace - - align 16 - global EXTN(jsimd_quantize_sse2) - -EXTN(jsimd_quantize_sse2): - push ebp - mov ebp,esp -; push ebx ; unused -; push ecx ; unused -; push edx ; need not be preserved - push esi - push edi - - mov esi, POINTER [workspace] - mov edx, POINTER [divisors] - mov edi, JCOEFPTR [coef_block] - mov eax, DCTSIZE2/32 - alignx 16,7 -.quantloop: - movdqa xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)] - movdqa xmm5, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_DCTELEM)] - movdqa xmm6, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_DCTELEM)] - movdqa xmm7, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_DCTELEM)] - movdqa xmm0,xmm4 - movdqa xmm1,xmm5 - movdqa xmm2,xmm6 - movdqa xmm3,xmm7 - psraw xmm4,(WORD_BIT-1) - psraw xmm5,(WORD_BIT-1) - psraw xmm6,(WORD_BIT-1) - psraw xmm7,(WORD_BIT-1) - pxor xmm0,xmm4 - pxor xmm1,xmm5 - pxor xmm2,xmm6 - pxor xmm3,xmm7 - psubw xmm0,xmm4 ; if (xmm0 < 0) xmm0 = -xmm0; - psubw xmm1,xmm5 ; if (xmm1 < 0) xmm1 = -xmm1; - psubw xmm2,xmm6 ; if (xmm2 < 0) xmm2 = -xmm2; - psubw xmm3,xmm7 ; if (xmm3 < 0) xmm3 = -xmm3; - - paddw xmm0, XMMWORD [CORRECTION(0,0,edx)] ; correction + roundfactor - paddw xmm1, XMMWORD [CORRECTION(1,0,edx)] - paddw xmm2, XMMWORD [CORRECTION(2,0,edx)] - paddw xmm3, XMMWORD [CORRECTION(3,0,edx)] - pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,edx)] ; reciprocal - pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,edx)] - pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,edx)] - pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,edx)] - pmulhuw xmm0, XMMWORD [SCALE(0,0,edx)] ; scale - pmulhuw xmm1, XMMWORD [SCALE(1,0,edx)] - pmulhuw xmm2, XMMWORD [SCALE(2,0,edx)] - pmulhuw xmm3, XMMWORD [SCALE(3,0,edx)] - - pxor xmm0,xmm4 - pxor xmm1,xmm5 - pxor xmm2,xmm6 - pxor xmm3,xmm7 - psubw xmm0,xmm4 - psubw xmm1,xmm5 - psubw xmm2,xmm6 - psubw xmm3,xmm7 - movdqa XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0 - movdqa XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1 - movdqa XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2 - movdqa XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3 - - add esi, byte 32*SIZEOF_DCTELEM - add edx, byte 32*SIZEOF_DCTELEM - add edi, byte 32*SIZEOF_JCOEF - dec eax - jnz near .quantloop - - pop edi - pop esi -; pop edx ; need not be preserved -; pop ecx ; unused -; pop ebx ; unused - pop ebp - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jsimd.h b/simd/jsimd.h index dc6ec43..a9fc812 100644 --- a/simd/jsimd.h +++ b/simd/jsimd.h @@ -2,10 +2,11 @@ * simd/jsimd.h * * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2011, 2014-2016, D. R. Commander. + * Copyright (C) 2011, 2014-2016, 2018, D. R. Commander. * Copyright (C) 2013-2014, MIPS Technologies, Inc., California. * Copyright (C) 2014, Linaro Limited. - * Copyright (C) 2015-2016, Matthieu Darbois. + * Copyright (C) 2015-2016, 2018, Matthieu Darbois. + * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. @@ -15,857 +16,1068 @@ /* Bitmask for supported acceleration methods */ -#define JSIMD_NONE 0x00 -#define JSIMD_MMX 0x01 -#define JSIMD_3DNOW 0x02 -#define JSIMD_SSE 0x04 -#define JSIMD_SSE2 0x08 -#define JSIMD_ARM_NEON 0x10 -#define JSIMD_MIPS_DSPR2 0x20 -#define JSIMD_ALTIVEC 0x40 +#define JSIMD_NONE 0x00 +#define JSIMD_MMX 0x01 +#define JSIMD_3DNOW 0x02 +#define JSIMD_SSE 0x04 +#define JSIMD_SSE2 0x08 +#define JSIMD_NEON 0x10 +#define JSIMD_DSPR2 0x20 +#define JSIMD_ALTIVEC 0x40 +#define JSIMD_AVX2 0x80 +#define JSIMD_MMI 0x100 /* SIMD Ext: retrieve SIMD/CPU information */ -EXTERN(unsigned int) jpeg_simd_cpu_support (void); +EXTERN(unsigned int) jpeg_simd_cpu_support(void); /* RGB & extended RGB --> YCC Colorspace Conversion */ EXTERN(void) jsimd_rgb_ycc_convert_mmx - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgb_ycc_convert_mmx - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgbx_ycc_convert_mmx - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgr_ycc_convert_mmx - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgrx_ycc_convert_mmx - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxbgr_ycc_convert_mmx - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxrgb_ycc_convert_mmx - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); extern const int jconst_rgb_ycc_convert_sse2[]; EXTERN(void) jsimd_rgb_ycc_convert_sse2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgb_ycc_convert_sse2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgbx_ycc_convert_sse2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgr_ycc_convert_sse2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgrx_ycc_convert_sse2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxbgr_ycc_convert_sse2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxrgb_ycc_convert_sse2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +extern const int jconst_rgb_ycc_convert_avx2[]; +EXTERN(void) jsimd_rgb_ycc_convert_avx2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_ycc_convert_avx2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_ycc_convert_avx2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_ycc_convert_avx2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_ycc_convert_avx2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_ycc_convert_avx2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_ycc_convert_avx2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_rgb_ycc_convert_neon - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgb_ycc_convert_neon - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgbx_ycc_convert_neon - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgr_ycc_convert_neon - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgrx_ycc_convert_neon - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxbgr_ycc_convert_neon - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxrgb_ycc_convert_neon - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgb_ycc_convert_neon_slowld3 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgr_ycc_convert_neon_slowld3 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); - -EXTERN(void) jsimd_rgb_ycc_convert_mips_dspr2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); -EXTERN(void) jsimd_extrgb_ycc_convert_mips_dspr2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); -EXTERN(void) jsimd_extrgbx_ycc_convert_mips_dspr2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); -EXTERN(void) jsimd_extbgr_ycc_convert_mips_dspr2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); -EXTERN(void) jsimd_extbgrx_ycc_convert_mips_dspr2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); -EXTERN(void) jsimd_extxbgr_ycc_convert_mips_dspr2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); -EXTERN(void) jsimd_extxrgb_ycc_convert_mips_dspr2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +EXTERN(void) jsimd_rgb_ycc_convert_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_ycc_convert_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_ycc_convert_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_ycc_convert_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_ycc_convert_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_ycc_convert_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_ycc_convert_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +EXTERN(void) jsimd_rgb_ycc_convert_mmi + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_ycc_convert_mmi + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_ycc_convert_mmi + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_ycc_convert_mmi + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_ycc_convert_mmi + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_ycc_convert_mmi + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_ycc_convert_mmi + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_rgb_ycc_convert_altivec - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgb_ycc_convert_altivec - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgbx_ycc_convert_altivec - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgr_ycc_convert_altivec - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgrx_ycc_convert_altivec - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxbgr_ycc_convert_altivec - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxrgb_ycc_convert_altivec - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); /* RGB & extended RGB --> Grayscale Colorspace Conversion */ EXTERN(void) jsimd_rgb_gray_convert_mmx - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgb_gray_convert_mmx - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgbx_gray_convert_mmx - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgr_gray_convert_mmx - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgrx_gray_convert_mmx - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxbgr_gray_convert_mmx - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxrgb_gray_convert_mmx - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); extern const int jconst_rgb_gray_convert_sse2[]; EXTERN(void) jsimd_rgb_gray_convert_sse2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgb_gray_convert_sse2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgbx_gray_convert_sse2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgr_gray_convert_sse2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgrx_gray_convert_sse2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxbgr_gray_convert_sse2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxrgb_gray_convert_sse2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); - -EXTERN(void) jsimd_rgb_gray_convert_mips_dspr2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); -EXTERN(void) jsimd_extrgb_gray_convert_mips_dspr2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); -EXTERN(void) jsimd_extrgbx_gray_convert_mips_dspr2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); -EXTERN(void) jsimd_extbgr_gray_convert_mips_dspr2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); -EXTERN(void) jsimd_extbgrx_gray_convert_mips_dspr2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); -EXTERN(void) jsimd_extxbgr_gray_convert_mips_dspr2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); -EXTERN(void) jsimd_extxrgb_gray_convert_mips_dspr2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +extern const int jconst_rgb_gray_convert_avx2[]; +EXTERN(void) jsimd_rgb_gray_convert_avx2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_gray_convert_avx2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_gray_convert_avx2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_gray_convert_avx2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_gray_convert_avx2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_gray_convert_avx2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_gray_convert_avx2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); + +EXTERN(void) jsimd_rgb_gray_convert_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgb_gray_convert_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extrgbx_gray_convert_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgr_gray_convert_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extbgrx_gray_convert_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxbgr_gray_convert_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); +EXTERN(void) jsimd_extxrgb_gray_convert_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_rgb_gray_convert_altivec - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgb_gray_convert_altivec - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extrgbx_gray_convert_altivec - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgr_gray_convert_altivec - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extbgrx_gray_convert_altivec - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxbgr_gray_convert_altivec - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); EXTERN(void) jsimd_extxrgb_gray_convert_altivec - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows); + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows); /* YCC --> RGB & extended RGB Colorspace Conversion */ EXTERN(void) jsimd_ycc_rgb_convert_mmx - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgb_convert_mmx - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgbx_convert_mmx - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgr_convert_mmx - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgrx_convert_mmx - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extxbgr_convert_mmx - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extxrgb_convert_mmx - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); extern const int jconst_ycc_rgb_convert_sse2[]; EXTERN(void) jsimd_ycc_rgb_convert_sse2 - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgb_convert_sse2 - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgbx_convert_sse2 - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgr_convert_sse2 - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgrx_convert_sse2 - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extxbgr_convert_sse2 - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extxrgb_convert_sse2 - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); + +extern const int jconst_ycc_rgb_convert_avx2[]; +EXTERN(void) jsimd_ycc_rgb_convert_avx2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgb_convert_avx2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgbx_convert_avx2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgr_convert_avx2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgrx_convert_avx2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxbgr_convert_avx2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxrgb_convert_avx2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_rgb_convert_neon - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgb_convert_neon - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgbx_convert_neon - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgr_convert_neon - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgrx_convert_neon - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extxbgr_convert_neon - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extxrgb_convert_neon - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_rgb565_convert_neon - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgb_convert_neon_slowst3 - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgr_convert_neon_slowst3 - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); - -EXTERN(void) jsimd_ycc_rgb_convert_mips_dspr2 - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); -EXTERN(void) jsimd_ycc_extrgb_convert_mips_dspr2 - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); -EXTERN(void) jsimd_ycc_extrgbx_convert_mips_dspr2 - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); -EXTERN(void) jsimd_ycc_extbgr_convert_mips_dspr2 - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); -EXTERN(void) jsimd_ycc_extbgrx_convert_mips_dspr2 - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); -EXTERN(void) jsimd_ycc_extxbgr_convert_mips_dspr2 - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); -EXTERN(void) jsimd_ycc_extxrgb_convert_mips_dspr2 - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); + +EXTERN(void) jsimd_ycc_rgb_convert_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgb_convert_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgbx_convert_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgr_convert_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgrx_convert_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxbgr_convert_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxrgb_convert_dspr2 + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); + +EXTERN(void) jsimd_ycc_rgb_convert_mmi + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgb_convert_mmi + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extrgbx_convert_mmi + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgr_convert_mmi + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extbgrx_convert_mmi + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxbgr_convert_mmi + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); +EXTERN(void) jsimd_ycc_extxrgb_convert_mmi + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_rgb_convert_altivec - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgb_convert_altivec - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extrgbx_convert_altivec - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgr_convert_altivec - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extbgrx_convert_altivec - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extxbgr_convert_altivec - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); EXTERN(void) jsimd_ycc_extxrgb_convert_altivec - (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows); + (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row, + JSAMPARRAY output_buf, int num_rows); /* NULL Colorspace Conversion */ -EXTERN(void) jsimd_c_null_convert_mips_dspr2 - (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows, int num_components); +EXTERN(void) jsimd_c_null_convert_dspr2 + (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows, int num_components); /* h2v1 Downsampling */ EXTERN(void) jsimd_h2v1_downsample_mmx - (JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data); + (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); EXTERN(void) jsimd_h2v1_downsample_sse2 - (JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data); + (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v1_downsample_avx2 + (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); EXTERN(void) jsimd_h2v1_downsample_neon - (JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data); + (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); -EXTERN(void) jsimd_h2v1_downsample_mips_dspr2 - (JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data); +EXTERN(void) jsimd_h2v1_downsample_dspr2 + (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); EXTERN(void) jsimd_h2v1_downsample_altivec - (JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data); + (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); /* h2v2 Downsampling */ EXTERN(void) jsimd_h2v2_downsample_mmx - (JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data); + (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); EXTERN(void) jsimd_h2v2_downsample_sse2 - (JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data); + (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v2_downsample_avx2 + (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); EXTERN(void) jsimd_h2v2_downsample_neon - (JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data); + (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); -EXTERN(void) jsimd_h2v2_downsample_mips_dspr2 - (JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data); +EXTERN(void) jsimd_h2v2_downsample_dspr2 + (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); + +EXTERN(void) jsimd_h2v2_downsample_mmi + (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); EXTERN(void) jsimd_h2v2_downsample_altivec - (JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data); + (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data); /* h2v2 Smooth Downsampling */ -EXTERN(void) jsimd_h2v2_smooth_downsample_mips_dspr2 - (JSAMPARRAY input_data, JSAMPARRAY output_data, - JDIMENSION v_samp_factor, int max_v_samp_factor, - int smoothing_factor, JDIMENSION width_blocks, - JDIMENSION image_width); +EXTERN(void) jsimd_h2v2_smooth_downsample_dspr2 + (JSAMPARRAY input_data, JSAMPARRAY output_data, JDIMENSION v_samp_factor, + int max_v_samp_factor, int smoothing_factor, JDIMENSION width_in_blocks, + JDIMENSION image_width); /* Upsampling */ EXTERN(void) jsimd_h2v1_upsample_mmx - (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr); + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); EXTERN(void) jsimd_h2v2_upsample_mmx - (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr); + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); EXTERN(void) jsimd_h2v1_upsample_sse2 - (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr); + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); EXTERN(void) jsimd_h2v2_upsample_sse2 - (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr); - -EXTERN(void) jsimd_h2v1_upsample_mips_dspr2 - (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr); -EXTERN(void) jsimd_h2v2_upsample_mips_dspr2 - (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr); - -EXTERN(void) jsimd_int_upsample_mips_dspr2 - (UINT8 h_expand, UINT8 v_expand, JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr, JDIMENSION output_width, - int max_v_samp_factor); + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +EXTERN(void) jsimd_h2v1_upsample_avx2 + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_upsample_avx2 + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +EXTERN(void) jsimd_h2v1_upsample_dspr2 + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_upsample_dspr2 + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +EXTERN(void) jsimd_int_upsample_dspr2 + (UINT8 h_expand, UINT8 v_expand, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr, JDIMENSION output_width, + int max_v_samp_factor); EXTERN(void) jsimd_h2v1_upsample_altivec - (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr); + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); EXTERN(void) jsimd_h2v2_upsample_altivec - (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr); + (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); /* Fancy Upsampling */ EXTERN(void) jsimd_h2v1_fancy_upsample_mmx - (int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); + (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); EXTERN(void) jsimd_h2v2_fancy_upsample_mmx - (int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); + (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); extern const int jconst_fancy_upsample_sse2[]; EXTERN(void) jsimd_h2v1_fancy_upsample_sse2 - (int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); + (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); EXTERN(void) jsimd_h2v2_fancy_upsample_sse2 - (int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); + (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +extern const int jconst_fancy_upsample_avx2[]; +EXTERN(void) jsimd_h2v1_fancy_upsample_avx2 + (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_fancy_upsample_avx2 + (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); EXTERN(void) jsimd_h2v1_fancy_upsample_neon - (int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); + (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); -EXTERN(void) jsimd_h2v1_fancy_upsample_mips_dspr2 - (int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); -EXTERN(void) jsimd_h2v2_fancy_upsample_mips_dspr2 - (int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v1_fancy_upsample_dspr2 + (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h2v2_fancy_upsample_dspr2 + (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); + +EXTERN(void) jsimd_h2v2_fancy_upsample_mmi + (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); EXTERN(void) jsimd_h2v1_fancy_upsample_altivec - (int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); + (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); EXTERN(void) jsimd_h2v2_fancy_upsample_altivec - (int max_v_samp_factor, JDIMENSION downsampled_width, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); + (int max_v_samp_factor, JDIMENSION downsampled_width, JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); /* Merged Upsampling */ EXTERN(void) jsimd_h2v1_merged_upsample_mmx - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_mmx - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_mmx - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_mmx - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_mmx - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_mmx - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_mmx - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_merged_upsample_mmx - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_mmx - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_mmx - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_mmx - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_mmx - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_mmx - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_mmx - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); extern const int jconst_merged_upsample_sse2[]; EXTERN(void) jsimd_h2v1_merged_upsample_sse2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_sse2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_sse2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_sse2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_sse2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_sse2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_sse2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_merged_upsample_sse2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_sse2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_sse2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_sse2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_sse2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_sse2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_sse2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); - -EXTERN(void) jsimd_h2v1_merged_upsample_mips_dspr2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); -EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_mips_dspr2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); -EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_mips_dspr2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); -EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_mips_dspr2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); -EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_mips_dspr2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); -EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_mips_dspr2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); -EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_mips_dspr2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); - -EXTERN(void) jsimd_h2v2_merged_upsample_mips_dspr2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); -EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_mips_dspr2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); -EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_mips_dspr2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); -EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_mips_dspr2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); -EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_mips_dspr2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); -EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_mips_dspr2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); -EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_mips_dspr2 - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); + +extern const int jconst_merged_upsample_avx2[]; +EXTERN(void) jsimd_h2v1_merged_upsample_avx2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_avx2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_avx2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_avx2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_avx2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_avx2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_avx2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); + +EXTERN(void) jsimd_h2v2_merged_upsample_avx2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_avx2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_avx2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_avx2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_avx2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_avx2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); +EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_avx2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); + +EXTERN(void) jsimd_h2v1_merged_upsample_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf, JSAMPLE *range); +EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf, JSAMPLE *range); +EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf, JSAMPLE *range); +EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf, JSAMPLE *range); +EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf, JSAMPLE *range); +EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf, JSAMPLE *range); +EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf, JSAMPLE *range); + +EXTERN(void) jsimd_h2v2_merged_upsample_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf, JSAMPLE *range); +EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf, JSAMPLE *range); +EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf, JSAMPLE *range); +EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf, JSAMPLE *range); +EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf, JSAMPLE *range); +EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf, JSAMPLE *range); +EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_dspr2 + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf, JSAMPLE *range); EXTERN(void) jsimd_h2v1_merged_upsample_altivec - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_altivec - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_altivec - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_altivec - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_altivec - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_altivec - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_altivec - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_merged_upsample_altivec - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_altivec - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_altivec - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_altivec - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_altivec - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_altivec - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_altivec - (JDIMENSION output_width, JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf); + (JDIMENSION output_width, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf); /* Sample Conversion */ EXTERN(void) jsimd_convsamp_mmx - (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); EXTERN(void) jsimd_convsamp_sse2 - (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); + +EXTERN(void) jsimd_convsamp_avx2 + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); EXTERN(void) jsimd_convsamp_neon - (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); -EXTERN(void) jsimd_convsamp_mips_dspr2 - (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); +EXTERN(void) jsimd_convsamp_dspr2 + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); EXTERN(void) jsimd_convsamp_altivec - (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); + (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace); /* Floating Point Sample Conversion */ EXTERN(void) jsimd_convsamp_float_3dnow - (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); + (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); EXTERN(void) jsimd_convsamp_float_sse - (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); + (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); EXTERN(void) jsimd_convsamp_float_sse2 - (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); + (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); -EXTERN(void) jsimd_convsamp_float_mips_dspr2 - (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); +EXTERN(void) jsimd_convsamp_float_dspr2 + (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT *workspace); /* Slow Integer Forward DCT */ -EXTERN(void) jsimd_fdct_islow_mmx (DCTELEM *data); +EXTERN(void) jsimd_fdct_islow_mmx(DCTELEM *data); extern const int jconst_fdct_islow_sse2[]; -EXTERN(void) jsimd_fdct_islow_sse2 (DCTELEM *data); +EXTERN(void) jsimd_fdct_islow_sse2(DCTELEM *data); -EXTERN(void) jsimd_fdct_islow_neon (DCTELEM *data); +extern const int jconst_fdct_islow_avx2[]; +EXTERN(void) jsimd_fdct_islow_avx2(DCTELEM *data); -EXTERN(void) jsimd_fdct_islow_mips_dspr2 (DCTELEM *data); +EXTERN(void) jsimd_fdct_islow_neon(DCTELEM *data); -EXTERN(void) jsimd_fdct_islow_altivec (DCTELEM *data); +EXTERN(void) jsimd_fdct_islow_dspr2(DCTELEM *data); + +EXTERN(void) jsimd_fdct_islow_mmi(DCTELEM *data); + +EXTERN(void) jsimd_fdct_islow_altivec(DCTELEM *data); /* Fast Integer Forward DCT */ -EXTERN(void) jsimd_fdct_ifast_mmx (DCTELEM *data); +EXTERN(void) jsimd_fdct_ifast_mmx(DCTELEM *data); extern const int jconst_fdct_ifast_sse2[]; -EXTERN(void) jsimd_fdct_ifast_sse2 (DCTELEM *data); +EXTERN(void) jsimd_fdct_ifast_sse2(DCTELEM *data); -EXTERN(void) jsimd_fdct_ifast_neon (DCTELEM *data); +EXTERN(void) jsimd_fdct_ifast_neon(DCTELEM *data); -EXTERN(void) jsimd_fdct_ifast_mips_dspr2 (DCTELEM *data); +EXTERN(void) jsimd_fdct_ifast_dspr2(DCTELEM *data); -EXTERN(void) jsimd_fdct_ifast_altivec (DCTELEM *data); +EXTERN(void) jsimd_fdct_ifast_altivec(DCTELEM *data); /* Floating Point Forward DCT */ -EXTERN(void) jsimd_fdct_float_3dnow (FAST_FLOAT *data); +EXTERN(void) jsimd_fdct_float_3dnow(FAST_FLOAT *data); extern const int jconst_fdct_float_sse[]; -EXTERN(void) jsimd_fdct_float_sse (FAST_FLOAT *data); +EXTERN(void) jsimd_fdct_float_sse(FAST_FLOAT *data); /* Quantization */ EXTERN(void) jsimd_quantize_mmx - (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); EXTERN(void) jsimd_quantize_sse2 - (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); + +EXTERN(void) jsimd_quantize_avx2 + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); EXTERN(void) jsimd_quantize_neon - (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); + +EXTERN(void) jsimd_quantize_dspr2 + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); -EXTERN(void) jsimd_quantize_mips_dspr2 - (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); +EXTERN(void) jsimd_quantize_mmi + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); EXTERN(void) jsimd_quantize_altivec - (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); + (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace); /* Floating Point Quantization */ EXTERN(void) jsimd_quantize_float_3dnow - (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); + (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); EXTERN(void) jsimd_quantize_float_sse - (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); + (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); EXTERN(void) jsimd_quantize_float_sse2 - (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); + (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); -EXTERN(void) jsimd_quantize_float_mips_dspr2 - (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); +EXTERN(void) jsimd_quantize_float_dspr2 + (JCOEFPTR coef_block, FAST_FLOAT *divisors, FAST_FLOAT *workspace); /* Scaled Inverse DCT */ EXTERN(void) jsimd_idct_2x2_mmx - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); EXTERN(void) jsimd_idct_4x4_mmx - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); extern const int jconst_idct_red_sse2[]; EXTERN(void) jsimd_idct_2x2_sse2 - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); EXTERN(void) jsimd_idct_4x4_sse2 - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); EXTERN(void) jsimd_idct_2x2_neon - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); EXTERN(void) jsimd_idct_4x4_neon - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); - -EXTERN(void) jsimd_idct_2x2_mips_dspr2 - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); -EXTERN(void) jsimd_idct_4x4_mips_dspr2 - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col, int *workspace); -EXTERN(void) jsimd_idct_6x6_mips_dspr2 - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); -EXTERN(void) jsimd_idct_12x12_pass1_mips_dspr2 - (JCOEFPTR coef_block, void *dct_table, int *workspace); -EXTERN(void) jsimd_idct_12x12_pass2_mips_dspr2 - (int *workspace, int *output); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +EXTERN(void) jsimd_idct_2x2_dspr2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_4x4_dspr2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col, int *workspace); +EXTERN(void) jsimd_idct_6x6_dspr2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); +EXTERN(void) jsimd_idct_12x12_pass1_dspr2 + (JCOEFPTR coef_block, void *dct_table, int *workspace); +EXTERN(void) jsimd_idct_12x12_pass2_dspr2 + (int *workspace, int *output); /* Slow Integer Inverse DCT */ EXTERN(void) jsimd_idct_islow_mmx - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); extern const int jconst_idct_islow_sse2[]; EXTERN(void) jsimd_idct_islow_sse2 - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +extern const int jconst_idct_islow_avx2[]; +EXTERN(void) jsimd_idct_islow_avx2 + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); EXTERN(void) jsimd_idct_islow_neon - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); + +EXTERN(void) jsimd_idct_islow_dspr2 + (void *dct_table, JCOEFPTR coef_block, int *output_buf, JSAMPLE *output_col); -EXTERN(void) jsimd_idct_islow_mips_dspr2 - (void *dct_table, JCOEFPTR coef_block, int *output_buf, - JSAMPLE *output_col); +EXTERN(void) jsimd_idct_islow_mmi + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); EXTERN(void) jsimd_idct_islow_altivec - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); /* Fast Integer Inverse DCT */ EXTERN(void) jsimd_idct_ifast_mmx - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); extern const int jconst_idct_ifast_sse2[]; EXTERN(void) jsimd_idct_ifast_sse2 - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); EXTERN(void) jsimd_idct_ifast_neon - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); -EXTERN(void) jsimd_idct_ifast_cols_mips_dspr2 - (JCOEF *inptr, IFAST_MULT_TYPE *quantptr, DCTELEM *wsptr, - const int *idct_coefs); -EXTERN(void) jsimd_idct_ifast_rows_mips_dspr2 - (DCTELEM *wsptr, JSAMPARRAY output_buf, JDIMENSION output_col, - const int *idct_coefs); +EXTERN(void) jsimd_idct_ifast_cols_dspr2 + (JCOEF *inptr, IFAST_MULT_TYPE *quantptr, DCTELEM *wsptr, + const int *idct_coefs); +EXTERN(void) jsimd_idct_ifast_rows_dspr2 + (DCTELEM *wsptr, JSAMPARRAY output_buf, JDIMENSION output_col, + const int *idct_coefs); EXTERN(void) jsimd_idct_ifast_altivec - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); /* Floating Point Inverse DCT */ EXTERN(void) jsimd_idct_float_3dnow - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); extern const int jconst_idct_float_sse[]; EXTERN(void) jsimd_idct_float_sse - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); extern const int jconst_idct_float_sse2[]; EXTERN(void) jsimd_idct_float_sse2 - (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col); + (void *dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col); /* Huffman coding */ extern const int jconst_huff_encode_one_block[]; -EXTERN(JOCTET*) jsimd_huff_encode_one_block_sse2 - (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val, - c_derived_tbl *dctbl, c_derived_tbl *actbl); - -EXTERN(JOCTET*) jsimd_huff_encode_one_block_neon - (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val, - c_derived_tbl *dctbl, c_derived_tbl *actbl); - -EXTERN(JOCTET*) jsimd_huff_encode_one_block_neon_slowtbl - (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val, - c_derived_tbl *dctbl, c_derived_tbl *actbl); +EXTERN(JOCTET *) jsimd_huff_encode_one_block_sse2 + (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl); + +EXTERN(JOCTET *) jsimd_huff_encode_one_block_neon + (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl); + +EXTERN(JOCTET *) jsimd_huff_encode_one_block_neon_slowtbl + (void *state, JOCTET *buffer, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl); + +/* Progressive Huffman encoding */ +EXTERN(void) jsimd_encode_mcu_AC_first_prepare_sse2 + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, + JCOEF *values, size_t *zerobits); + +EXTERN(int) jsimd_encode_mcu_AC_refine_prepare_sse2 + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, + JCOEF *absvalues, size_t *bits); diff --git a/simd/jsimd_i386.c b/simd/jsimd_i386.c deleted file mode 100644 index 6da8bd8..0000000 --- a/simd/jsimd_i386.c +++ /dev/null @@ -1,1091 +0,0 @@ -/* - * jsimd_i386.c - * - * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2009-2011, 2013-2014, 2016, D. R. Commander. - * Copyright (C) 2015, Matthieu Darbois. - * - * Based on the x86 SIMD extension for IJG JPEG library, - * Copyright (C) 1999-2006, MIYASAKA Masaru. - * For conditions of distribution and use, see copyright notice in jsimdext.inc - * - * This file contains the interface between the "normal" portions - * of the library and the SIMD implementations when running on a - * 32-bit x86 architecture. - */ - -#define JPEG_INTERNALS -#include "../jinclude.h" -#include "../jpeglib.h" -#include "../jsimd.h" -#include "../jdct.h" -#include "../jsimddct.h" -#include "jsimd.h" - -/* - * In the PIC cases, we have no guarantee that constants will keep - * their alignment. This macro allows us to verify it at runtime. - */ -#define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0) - -#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ - -static unsigned int simd_support = ~0; -static unsigned int simd_huffman = 1; - -/* - * Check what SIMD accelerations are supported. - * - * FIXME: This code is racy under a multi-threaded environment. - */ -LOCAL(void) -init_simd (void) -{ - char *env = NULL; - - if (simd_support != ~0U) - return; - - simd_support = jpeg_simd_cpu_support(); - - /* Force different settings through environment variables */ - env = getenv("JSIMD_FORCEMMX"); - if ((env != NULL) && (strcmp(env, "1") == 0)) - simd_support &= JSIMD_MMX; - env = getenv("JSIMD_FORCE3DNOW"); - if ((env != NULL) && (strcmp(env, "1") == 0)) - simd_support &= JSIMD_3DNOW|JSIMD_MMX; - env = getenv("JSIMD_FORCESSE"); - if ((env != NULL) && (strcmp(env, "1") == 0)) - simd_support &= JSIMD_SSE|JSIMD_MMX; - env = getenv("JSIMD_FORCESSE2"); - if ((env != NULL) && (strcmp(env, "1") == 0)) - simd_support &= JSIMD_SSE2; - env = getenv("JSIMD_FORCENONE"); - if ((env != NULL) && (strcmp(env, "1") == 0)) - simd_support = 0; - env = getenv("JSIMD_NOHUFFENC"); - if ((env != NULL) && (strcmp(env, "1") == 0)) - simd_huffman = 0; -} - -GLOBAL(int) -jsimd_can_rgb_ycc (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) - return 0; - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_rgb_gray (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) - return 0; - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_ycc_rgb (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) - return 0; - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_ycc_rgb565 (void) -{ - return 0; -} - -GLOBAL(void) -jsimd_rgb_ycc_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) -{ - void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); - void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); - - switch(cinfo->in_color_space) { - case JCS_EXT_RGB: - sse2fct=jsimd_extrgb_ycc_convert_sse2; - mmxfct=jsimd_extrgb_ycc_convert_mmx; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - sse2fct=jsimd_extrgbx_ycc_convert_sse2; - mmxfct=jsimd_extrgbx_ycc_convert_mmx; - break; - case JCS_EXT_BGR: - sse2fct=jsimd_extbgr_ycc_convert_sse2; - mmxfct=jsimd_extbgr_ycc_convert_mmx; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - sse2fct=jsimd_extbgrx_ycc_convert_sse2; - mmxfct=jsimd_extbgrx_ycc_convert_mmx; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - sse2fct=jsimd_extxbgr_ycc_convert_sse2; - mmxfct=jsimd_extxbgr_ycc_convert_mmx; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - sse2fct=jsimd_extxrgb_ycc_convert_sse2; - mmxfct=jsimd_extxrgb_ycc_convert_mmx; - break; - default: - sse2fct=jsimd_rgb_ycc_convert_sse2; - mmxfct=jsimd_rgb_ycc_convert_mmx; - break; - } - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) - sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); - else if (simd_support & JSIMD_MMX) - mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); -} - -GLOBAL(void) -jsimd_rgb_gray_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) -{ - void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); - void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); - - switch(cinfo->in_color_space) { - case JCS_EXT_RGB: - sse2fct=jsimd_extrgb_gray_convert_sse2; - mmxfct=jsimd_extrgb_gray_convert_mmx; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - sse2fct=jsimd_extrgbx_gray_convert_sse2; - mmxfct=jsimd_extrgbx_gray_convert_mmx; - break; - case JCS_EXT_BGR: - sse2fct=jsimd_extbgr_gray_convert_sse2; - mmxfct=jsimd_extbgr_gray_convert_mmx; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - sse2fct=jsimd_extbgrx_gray_convert_sse2; - mmxfct=jsimd_extbgrx_gray_convert_mmx; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - sse2fct=jsimd_extxbgr_gray_convert_sse2; - mmxfct=jsimd_extxbgr_gray_convert_mmx; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - sse2fct=jsimd_extxrgb_gray_convert_sse2; - mmxfct=jsimd_extxrgb_gray_convert_mmx; - break; - default: - sse2fct=jsimd_rgb_gray_convert_sse2; - mmxfct=jsimd_rgb_gray_convert_mmx; - break; - } - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) - sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); - else if (simd_support & JSIMD_MMX) - mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); -} - -GLOBAL(void) -jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) -{ - void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); - void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); - - switch(cinfo->out_color_space) { - case JCS_EXT_RGB: - sse2fct=jsimd_ycc_extrgb_convert_sse2; - mmxfct=jsimd_ycc_extrgb_convert_mmx; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - sse2fct=jsimd_ycc_extrgbx_convert_sse2; - mmxfct=jsimd_ycc_extrgbx_convert_mmx; - break; - case JCS_EXT_BGR: - sse2fct=jsimd_ycc_extbgr_convert_sse2; - mmxfct=jsimd_ycc_extbgr_convert_mmx; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - sse2fct=jsimd_ycc_extbgrx_convert_sse2; - mmxfct=jsimd_ycc_extbgrx_convert_mmx; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - sse2fct=jsimd_ycc_extxbgr_convert_sse2; - mmxfct=jsimd_ycc_extxbgr_convert_mmx; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - sse2fct=jsimd_ycc_extxrgb_convert_sse2; - mmxfct=jsimd_ycc_extxrgb_convert_mmx; - break; - default: - sse2fct=jsimd_ycc_rgb_convert_sse2; - mmxfct=jsimd_ycc_rgb_convert_mmx; - break; - } - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) - sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); - else if (simd_support & JSIMD_MMX) - mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); -} - -GLOBAL(void) -jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) -{ -} - -GLOBAL(int) -jsimd_can_h2v2_downsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_SSE2) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_h2v1_downsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_SSE2) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) -{ - if (simd_support & JSIMD_SSE2) - jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, - compptr->v_samp_factor, - compptr->width_in_blocks, input_data, - output_data); - else if (simd_support & JSIMD_MMX) - jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, - compptr->v_samp_factor, compptr->width_in_blocks, - input_data, output_data); -} - -GLOBAL(void) -jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) -{ - if (simd_support & JSIMD_SSE2) - jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, - compptr->v_samp_factor, - compptr->width_in_blocks, input_data, - output_data); - else if (simd_support & JSIMD_MMX) - jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor, - compptr->v_samp_factor, compptr->width_in_blocks, - input_data, output_data); -} - -GLOBAL(int) -jsimd_can_h2v2_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_SSE2) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_h2v1_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_SSE2) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_h2v2_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) -{ - if (simd_support & JSIMD_SSE2) - jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, - input_data, output_data_ptr); - else if (simd_support & JSIMD_MMX) - jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width, - input_data, output_data_ptr); -} - -GLOBAL(void) -jsimd_h2v1_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) -{ - if (simd_support & JSIMD_SSE2) - jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, - input_data, output_data_ptr); - else if (simd_support & JSIMD_MMX) - jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width, - input_data, output_data_ptr); -} - -GLOBAL(int) -jsimd_can_h2v2_fancy_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_h2v1_fancy_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) -{ - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) - jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, - compptr->downsampled_width, input_data, - output_data_ptr); - else if (simd_support & JSIMD_MMX) - jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor, - compptr->downsampled_width, input_data, - output_data_ptr); -} - -GLOBAL(void) -jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) -{ - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) - jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, - compptr->downsampled_width, input_data, - output_data_ptr); - else if (simd_support & JSIMD_MMX) - jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor, - compptr->downsampled_width, input_data, - output_data_ptr); -} - -GLOBAL(int) -jsimd_can_h2v2_merged_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_h2v1_merged_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) -{ - void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); - void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); - - switch(cinfo->out_color_space) { - case JCS_EXT_RGB: - sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; - mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; - mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx; - break; - case JCS_EXT_BGR: - sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; - mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2; - mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2; - mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2; - mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx; - break; - default: - sse2fct=jsimd_h2v2_merged_upsample_sse2; - mmxfct=jsimd_h2v2_merged_upsample_mmx; - break; - } - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) - sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); - else if (simd_support & JSIMD_MMX) - mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); -} - -GLOBAL(void) -jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) -{ - void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); - void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); - - switch(cinfo->out_color_space) { - case JCS_EXT_RGB: - sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; - mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; - mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx; - break; - case JCS_EXT_BGR: - sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; - mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2; - mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2; - mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; - mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx; - break; - default: - sse2fct=jsimd_h2v1_merged_upsample_sse2; - mmxfct=jsimd_h2v1_merged_upsample_mmx; - break; - } - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) - sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); - else if (simd_support & JSIMD_MMX) - mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); -} - -GLOBAL(int) -jsimd_can_convsamp (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(DCTELEM) != 2) - return 0; - - if (simd_support & JSIMD_SSE2) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_convsamp_float (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(FAST_FLOAT) != 4) - return 0; - - if (simd_support & JSIMD_SSE2) - return 1; - if (simd_support & JSIMD_SSE) - return 1; - if (simd_support & JSIMD_3DNOW) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, - DCTELEM *workspace) -{ - if (simd_support & JSIMD_SSE2) - jsimd_convsamp_sse2(sample_data, start_col, workspace); - else if (simd_support & JSIMD_MMX) - jsimd_convsamp_mmx(sample_data, start_col, workspace); -} - -GLOBAL(void) -jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, - FAST_FLOAT *workspace) -{ - if (simd_support & JSIMD_SSE2) - jsimd_convsamp_float_sse2(sample_data, start_col, workspace); - else if (simd_support & JSIMD_SSE) - jsimd_convsamp_float_sse(sample_data, start_col, workspace); - else if (simd_support & JSIMD_3DNOW) - jsimd_convsamp_float_3dnow(sample_data, start_col, workspace); -} - -GLOBAL(int) -jsimd_can_fdct_islow (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(DCTELEM) != 2) - return 0; - - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_fdct_ifast (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(DCTELEM) != 2) - return 0; - - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_fdct_float (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(FAST_FLOAT) != 4) - return 0; - - if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) - return 1; - if (simd_support & JSIMD_3DNOW) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_fdct_islow (DCTELEM *data) -{ - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) - jsimd_fdct_islow_sse2(data); - else if (simd_support & JSIMD_MMX) - jsimd_fdct_islow_mmx(data); -} - -GLOBAL(void) -jsimd_fdct_ifast (DCTELEM *data) -{ - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) - jsimd_fdct_ifast_sse2(data); - else if (simd_support & JSIMD_MMX) - jsimd_fdct_ifast_mmx(data); -} - -GLOBAL(void) -jsimd_fdct_float (FAST_FLOAT *data) -{ - if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) - jsimd_fdct_float_sse(data); - else if (simd_support & JSIMD_3DNOW) - jsimd_fdct_float_3dnow(data); -} - -GLOBAL(int) -jsimd_can_quantize (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (sizeof(DCTELEM) != 2) - return 0; - - if (simd_support & JSIMD_SSE2) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_quantize_float (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (sizeof(FAST_FLOAT) != 4) - return 0; - - if (simd_support & JSIMD_SSE2) - return 1; - if (simd_support & JSIMD_SSE) - return 1; - if (simd_support & JSIMD_3DNOW) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, - DCTELEM *workspace) -{ - if (simd_support & JSIMD_SSE2) - jsimd_quantize_sse2(coef_block, divisors, workspace); - else if (simd_support & JSIMD_MMX) - jsimd_quantize_mmx(coef_block, divisors, workspace); -} - -GLOBAL(void) -jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, - FAST_FLOAT *workspace) -{ - if (simd_support & JSIMD_SSE2) - jsimd_quantize_float_sse2(coef_block, divisors, workspace); - else if (simd_support & JSIMD_SSE) - jsimd_quantize_float_sse(coef_block, divisors, workspace); - else if (simd_support & JSIMD_3DNOW) - jsimd_quantize_float_3dnow(coef_block, divisors, workspace); -} - -GLOBAL(int) -jsimd_can_idct_2x2 (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(ISLOW_MULT_TYPE) != 2) - return 0; - - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_idct_4x4 (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(ISLOW_MULT_TYPE) != 2) - return 0; - - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) -{ - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) - jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, - output_col); - else if (simd_support & JSIMD_MMX) - jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col); -} - -GLOBAL(void) -jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) -{ - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) - jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, - output_col); - else if (simd_support & JSIMD_MMX) - jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col); -} - -GLOBAL(int) -jsimd_can_idct_islow (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(ISLOW_MULT_TYPE) != 2) - return 0; - - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_idct_ifast (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(IFAST_MULT_TYPE) != 2) - return 0; - if (IFAST_SCALE_BITS != 2) - return 0; - - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) - return 1; - if (simd_support & JSIMD_MMX) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_idct_float (void) -{ - init_simd(); - - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(FAST_FLOAT) != 4) - return 0; - if (sizeof(FLOAT_MULT_TYPE) != 4) - return 0; - - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) - return 1; - if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) - return 1; - if (simd_support & JSIMD_3DNOW) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) -{ - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) - jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, - output_col); - else if (simd_support & JSIMD_MMX) - jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, - output_col); -} - -GLOBAL(void) -jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) -{ - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) - jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, - output_col); - else if (simd_support & JSIMD_MMX) - jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, - output_col); -} - -GLOBAL(void) -jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) -{ - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) - jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, - output_col); - else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse)) - jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf, - output_col); - else if (simd_support & JSIMD_3DNOW) - jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf, - output_col); -} - -GLOBAL(int) -jsimd_can_huff_encode_one_block (void) -{ - init_simd(); - - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - - if ((simd_support & JSIMD_SSE2) && simd_huffman && - IS_ALIGNED_SSE(jconst_huff_encode_one_block)) - return 1; - - return 0; -} - -GLOBAL(JOCTET*) -jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, - int last_dc_val, c_derived_tbl *dctbl, - c_derived_tbl *actbl) -{ - return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val, - dctbl, actbl); -} diff --git a/simd/jsimd_mips.c b/simd/jsimd_mips.c deleted file mode 100644 index 02e90cd..0000000 --- a/simd/jsimd_mips.c +++ /dev/null @@ -1,1140 +0,0 @@ -/* - * jsimd_mips.c - * - * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2009-2011, 2014, 2016, D. R. Commander. - * Copyright (C) 2013-2014, MIPS Technologies, Inc., California. - * Copyright (C) 2015, Matthieu Darbois. - * - * Based on the x86 SIMD extension for IJG JPEG library, - * Copyright (C) 1999-2006, MIYASAKA Masaru. - * For conditions of distribution and use, see copyright notice in jsimdext.inc - * - * This file contains the interface between the "normal" portions - * of the library and the SIMD implementations when running on a - * MIPS architecture. - */ - -#define JPEG_INTERNALS -#include "../jinclude.h" -#include "../jpeglib.h" -#include "../jsimd.h" -#include "../jdct.h" -#include "../jsimddct.h" -#include "jsimd.h" - -#include -#include -#include - -static unsigned int simd_support = ~0; - -#if defined(__linux__) - -LOCAL(int) -parse_proc_cpuinfo(const char* search_string) -{ - const char* file_name = "/proc/cpuinfo"; - char cpuinfo_line[256]; - FILE* f = NULL; - simd_support = 0; - - if ((f = fopen(file_name, "r")) != NULL) { - while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f) != NULL) { - if (strstr(cpuinfo_line, search_string) != NULL) { - fclose(f); - simd_support |= JSIMD_MIPS_DSPR2; - return 1; - } - } - fclose(f); - } - /* Did not find string in the proc file, or not Linux ELF. */ - return 0; -} - -#endif - -/* - * Check what SIMD accelerations are supported. - * - * FIXME: This code is racy under a multi-threaded environment. - */ -LOCAL(void) -init_simd (void) -{ - char *env = NULL; - - if (simd_support != ~0U) - return; - - simd_support = 0; - -#if defined(__MIPSEL__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2) - simd_support |= JSIMD_MIPS_DSPR2; -#elif defined(__linux__) - /* We still have a chance to use MIPS DSPR2 regardless of globally used - * -mdspr2 options passed to gcc by performing runtime detection via - * /proc/cpuinfo parsing on linux */ - if (!parse_proc_cpuinfo("MIPS 74K")) - return; -#endif - - /* Force different settings through environment variables */ - env = getenv("JSIMD_FORCEDSPR2"); - if ((env != NULL) && (strcmp(env, "1") == 0)) - simd_support = JSIMD_MIPS_DSPR2; - env = getenv("JSIMD_FORCENONE"); - if ((env != NULL) && (strcmp(env, "1") == 0)) - simd_support = 0; -} - -static const int mips_idct_ifast_coefs[4] = { - 0x45404540, // FIX( 1.082392200 / 2) = 17734 = 0x4546 - 0x5A805A80, // FIX( 1.414213562 / 2) = 23170 = 0x5A82 - 0x76407640, // FIX( 1.847759065 / 2) = 30274 = 0x7642 - 0xAC60AC60 // FIX(-2.613125930 / 4) = -21407 = 0xAC61 -}; - -/* The following struct is borrowed from jdsample.c */ -typedef void (*upsample1_ptr) (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr); - -typedef struct { - struct jpeg_upsampler pub; - JSAMPARRAY color_buf[MAX_COMPONENTS]; - upsample1_ptr methods[MAX_COMPONENTS]; - int next_row_out; - JDIMENSION rows_to_go; - int rowgroup_height[MAX_COMPONENTS]; - UINT8 h_expand[MAX_COMPONENTS]; - UINT8 v_expand[MAX_COMPONENTS]; -} my_upsampler; - -typedef my_upsampler *my_upsample_ptr; - -GLOBAL(int) -jsimd_can_rgb_ycc (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_rgb_gray (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_ycc_rgb (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_ycc_rgb565 (void) -{ - return 0; -} - -GLOBAL(int) -jsimd_c_can_null_convert (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_rgb_ycc_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) -{ - void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); - - switch(cinfo->in_color_space) { - case JCS_EXT_RGB: - mipsdspr2fct=jsimd_extrgb_ycc_convert_mips_dspr2; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - mipsdspr2fct=jsimd_extrgbx_ycc_convert_mips_dspr2; - break; - case JCS_EXT_BGR: - mipsdspr2fct=jsimd_extbgr_ycc_convert_mips_dspr2; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - mipsdspr2fct=jsimd_extbgrx_ycc_convert_mips_dspr2; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - mipsdspr2fct=jsimd_extxbgr_ycc_convert_mips_dspr2; - - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - mipsdspr2fct=jsimd_extxrgb_ycc_convert_mips_dspr2; - break; - default: - mipsdspr2fct=jsimd_extrgb_ycc_convert_mips_dspr2; - break; - } - - if (simd_support & JSIMD_MIPS_DSPR2) - mipsdspr2fct(cinfo->image_width, input_buf, output_buf, output_row, - num_rows); -} - -GLOBAL(void) -jsimd_rgb_gray_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) -{ - void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); - - switch(cinfo->in_color_space) { - case JCS_EXT_RGB: - mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - mipsdspr2fct=jsimd_extrgbx_gray_convert_mips_dspr2; - break; - case JCS_EXT_BGR: - mipsdspr2fct=jsimd_extbgr_gray_convert_mips_dspr2; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - mipsdspr2fct=jsimd_extbgrx_gray_convert_mips_dspr2; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - mipsdspr2fct=jsimd_extxbgr_gray_convert_mips_dspr2; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - mipsdspr2fct=jsimd_extxrgb_gray_convert_mips_dspr2; - break; - default: - mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2; - break; - } - - if (simd_support & JSIMD_MIPS_DSPR2) - mipsdspr2fct(cinfo->image_width, input_buf, output_buf, output_row, - num_rows); -} - -GLOBAL(void) -jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) -{ - void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); - - switch(cinfo->out_color_space) { - case JCS_EXT_RGB: - mipsdspr2fct=jsimd_ycc_extrgb_convert_mips_dspr2; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - mipsdspr2fct=jsimd_ycc_extrgbx_convert_mips_dspr2; - break; - case JCS_EXT_BGR: - mipsdspr2fct=jsimd_ycc_extbgr_convert_mips_dspr2; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - mipsdspr2fct=jsimd_ycc_extbgrx_convert_mips_dspr2; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - mipsdspr2fct=jsimd_ycc_extxbgr_convert_mips_dspr2; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - mipsdspr2fct=jsimd_ycc_extxrgb_convert_mips_dspr2; - break; - default: - mipsdspr2fct=jsimd_ycc_extrgb_convert_mips_dspr2; - break; - } - - if (simd_support & JSIMD_MIPS_DSPR2) - mipsdspr2fct(cinfo->output_width, input_buf, input_row, output_buf, - num_rows); -} - -GLOBAL(void) -jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) -{ -} - -GLOBAL(void) -jsimd_c_null_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) -{ - if (simd_support & JSIMD_MIPS_DSPR2) - jsimd_c_null_convert_mips_dspr2(cinfo->image_width, input_buf, - output_buf, output_row, num_rows, - cinfo->num_components); -} - -GLOBAL(int) -jsimd_can_h2v2_downsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_h2v2_smooth_downsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if(DCTSIZE != 8) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_h2v1_downsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) -{ - if (simd_support & JSIMD_MIPS_DSPR2) - jsimd_h2v2_downsample_mips_dspr2(cinfo->image_width, - cinfo->max_v_samp_factor, - compptr->v_samp_factor, - compptr->width_in_blocks, input_data, - output_data); -} - -GLOBAL(void) -jsimd_h2v2_smooth_downsample (j_compress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) -{ - jsimd_h2v2_smooth_downsample_mips_dspr2(input_data, output_data, - compptr->v_samp_factor, - cinfo->max_v_samp_factor, - cinfo->smoothing_factor, - compptr->width_in_blocks, - cinfo->image_width); -} - -GLOBAL(void) -jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) -{ - if (simd_support & JSIMD_MIPS_DSPR2) - jsimd_h2v1_downsample_mips_dspr2(cinfo->image_width, - cinfo->max_v_samp_factor, - compptr->v_samp_factor, - compptr->width_in_blocks, - input_data, output_data); -} - -GLOBAL(int) -jsimd_can_h2v2_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_h2v1_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_int_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_h2v2_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) -{ - if (simd_support & JSIMD_MIPS_DSPR2) - jsimd_h2v2_upsample_mips_dspr2(cinfo->max_v_samp_factor, - cinfo->output_width, input_data, - output_data_ptr); -} - -GLOBAL(void) -jsimd_h2v1_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) -{ - if (simd_support & JSIMD_MIPS_DSPR2) - jsimd_h2v1_upsample_mips_dspr2(cinfo->max_v_samp_factor, - cinfo->output_width, input_data, - output_data_ptr); -} - -GLOBAL(void) -jsimd_int_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) -{ - my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; - - jsimd_int_upsample_mips_dspr2(upsample->h_expand[compptr->component_index], - upsample->v_expand[compptr->component_index], - input_data, output_data_ptr, - cinfo->output_width, - cinfo->max_v_samp_factor); -} - -GLOBAL(int) -jsimd_can_h2v2_fancy_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_h2v1_fancy_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) -{ - if (simd_support & JSIMD_MIPS_DSPR2) - jsimd_h2v2_fancy_upsample_mips_dspr2(cinfo->max_v_samp_factor, - compptr->downsampled_width, - input_data, output_data_ptr); -} - -GLOBAL(void) -jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) -{ - if (simd_support & JSIMD_MIPS_DSPR2) - jsimd_h2v1_fancy_upsample_mips_dspr2(cinfo->max_v_samp_factor, - compptr->downsampled_width, - input_data, output_data_ptr); -} - -GLOBAL(int) -jsimd_can_h2v2_merged_upsample (void) -{ - init_simd(); - - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_h2v1_merged_upsample (void) -{ - init_simd(); - - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) -{ - void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, - JSAMPLE *); - - switch(cinfo->out_color_space) { - case JCS_EXT_RGB: - mipsdspr2fct=jsimd_h2v2_extrgb_merged_upsample_mips_dspr2; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - mipsdspr2fct=jsimd_h2v2_extrgbx_merged_upsample_mips_dspr2; - break; - case JCS_EXT_BGR: - mipsdspr2fct=jsimd_h2v2_extbgr_merged_upsample_mips_dspr2; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - mipsdspr2fct=jsimd_h2v2_extbgrx_merged_upsample_mips_dspr2; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - mipsdspr2fct=jsimd_h2v2_extxbgr_merged_upsample_mips_dspr2; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - mipsdspr2fct=jsimd_h2v2_extxrgb_merged_upsample_mips_dspr2; - break; - default: - mipsdspr2fct=jsimd_h2v2_extrgb_merged_upsample_mips_dspr2; - break; - } - - mipsdspr2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf, - cinfo->sample_range_limit); -} - -GLOBAL(void) -jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) -{ - void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, - JSAMPLE *); - - switch(cinfo->out_color_space) { - case JCS_EXT_RGB: - mipsdspr2fct=jsimd_h2v1_extrgb_merged_upsample_mips_dspr2; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - mipsdspr2fct=jsimd_h2v1_extrgbx_merged_upsample_mips_dspr2; - break; - case JCS_EXT_BGR: - mipsdspr2fct=jsimd_h2v1_extbgr_merged_upsample_mips_dspr2; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - mipsdspr2fct=jsimd_h2v1_extbgrx_merged_upsample_mips_dspr2; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - mipsdspr2fct=jsimd_h2v1_extxbgr_merged_upsample_mips_dspr2; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - mipsdspr2fct=jsimd_h2v1_extxrgb_merged_upsample_mips_dspr2; - break; - default: - mipsdspr2fct=jsimd_h2v1_extrgb_merged_upsample_mips_dspr2; - break; - } - - mipsdspr2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf, - cinfo->sample_range_limit); -} - -GLOBAL(int) -jsimd_can_convsamp (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(DCTELEM) != 2) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_convsamp_float (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(ISLOW_MULT_TYPE) != 2) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, - DCTELEM *workspace) -{ - if (simd_support & JSIMD_MIPS_DSPR2) - jsimd_convsamp_mips_dspr2(sample_data, start_col, workspace); -} - -GLOBAL(void) -jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, - FAST_FLOAT *workspace) -{ - if ((simd_support & JSIMD_MIPS_DSPR2)) - jsimd_convsamp_float_mips_dspr2(sample_data, start_col, workspace); -} - -GLOBAL(int) -jsimd_can_fdct_islow (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(DCTELEM) != 2) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_fdct_ifast (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(DCTELEM) != 2) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_fdct_float (void) -{ - init_simd(); - - return 0; -} - -GLOBAL(void) -jsimd_fdct_islow (DCTELEM *data) -{ - if (simd_support & JSIMD_MIPS_DSPR2) - jsimd_fdct_islow_mips_dspr2(data); -} - -GLOBAL(void) -jsimd_fdct_ifast (DCTELEM *data) -{ - if (simd_support & JSIMD_MIPS_DSPR2) - jsimd_fdct_ifast_mips_dspr2(data); -} - -GLOBAL(void) -jsimd_fdct_float (FAST_FLOAT *data) -{ -} - -GLOBAL(int) -jsimd_can_quantize (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (sizeof(DCTELEM) != 2) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_quantize_float (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(ISLOW_MULT_TYPE) != 2) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, - DCTELEM *workspace) -{ - if (simd_support & JSIMD_MIPS_DSPR2) - jsimd_quantize_mips_dspr2(coef_block, divisors, workspace); -} - -GLOBAL(void) -jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, - FAST_FLOAT *workspace) -{ - if (simd_support & JSIMD_MIPS_DSPR2) - jsimd_quantize_float_mips_dspr2(coef_block, divisors, workspace); -} - -GLOBAL(int) -jsimd_can_idct_2x2 (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(ISLOW_MULT_TYPE) != 2) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_idct_4x4 (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(ISLOW_MULT_TYPE) != 2) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_idct_6x6 (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(ISLOW_MULT_TYPE) != 2) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_idct_12x12 (void) -{ - init_simd(); - - if (BITS_IN_JSAMPLE != 8) - return 0; - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(ISLOW_MULT_TYPE) != 2) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) -{ - if (simd_support & JSIMD_MIPS_DSPR2) - jsimd_idct_2x2_mips_dspr2(compptr->dct_table, coef_block, output_buf, - output_col); -} - -GLOBAL(void) -jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) -{ - if (simd_support & JSIMD_MIPS_DSPR2) { - int workspace[DCTSIZE*4]; /* buffers data between passes */ - jsimd_idct_4x4_mips_dspr2(compptr->dct_table, coef_block, output_buf, - output_col, workspace); - } -} - -GLOBAL(void) -jsimd_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) -{ - if (simd_support & JSIMD_MIPS_DSPR2) - jsimd_idct_6x6_mips_dspr2(compptr->dct_table, coef_block, output_buf, - output_col); -} - -GLOBAL(void) -jsimd_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) -{ - if (simd_support & JSIMD_MIPS_DSPR2) { - int workspace[96]; - int output[12] = { - (int)(output_buf[0] + output_col), - (int)(output_buf[1] + output_col), - (int)(output_buf[2] + output_col), - (int)(output_buf[3] + output_col), - (int)(output_buf[4] + output_col), - (int)(output_buf[5] + output_col), - (int)(output_buf[6] + output_col), - (int)(output_buf[7] + output_col), - (int)(output_buf[8] + output_col), - (int)(output_buf[9] + output_col), - (int)(output_buf[10] + output_col), - (int)(output_buf[11] + output_col), - }; - jsimd_idct_12x12_pass1_mips_dspr2(coef_block, compptr->dct_table, - workspace); - jsimd_idct_12x12_pass2_mips_dspr2(workspace, output); - } -} - -GLOBAL(int) -jsimd_can_idct_islow (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(ISLOW_MULT_TYPE) != 2) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_idct_ifast (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(IFAST_MULT_TYPE) != 2) - return 0; - if (IFAST_SCALE_BITS != 2) - return 0; - - if (simd_support & JSIMD_MIPS_DSPR2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_idct_float (void) -{ - init_simd(); - - return 0; -} - -GLOBAL(void) -jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) -{ - if (simd_support & JSIMD_MIPS_DSPR2) { - int output[8] = { - (int)(output_buf[0] + output_col), - (int)(output_buf[1] + output_col), - (int)(output_buf[2] + output_col), - (int)(output_buf[3] + output_col), - (int)(output_buf[4] + output_col), - (int)(output_buf[5] + output_col), - (int)(output_buf[6] + output_col), - (int)(output_buf[7] + output_col), - }; - - jsimd_idct_islow_mips_dspr2(coef_block, compptr->dct_table, - output, IDCT_range_limit(cinfo)); - } -} - -GLOBAL(void) -jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) -{ - if (simd_support & JSIMD_MIPS_DSPR2) { - JCOEFPTR inptr; - IFAST_MULT_TYPE *quantptr; - DCTELEM workspace[DCTSIZE2]; /* buffers data between passes */ - - /* Pass 1: process columns from input, store into work array. */ - - inptr = coef_block; - quantptr = (IFAST_MULT_TYPE *) compptr->dct_table; - - jsimd_idct_ifast_cols_mips_dspr2(inptr, quantptr, - workspace, mips_idct_ifast_coefs); - - /* Pass 2: process rows from work array, store into output array. */ - /* Note that we must descale the results by a factor of 8 == 2**3, */ - /* and also undo the PASS1_BITS scaling. */ - - jsimd_idct_ifast_rows_mips_dspr2(workspace, output_buf, - output_col, mips_idct_ifast_coefs); - } -} - -GLOBAL(void) -jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) -{ -} - -GLOBAL(int) -jsimd_can_huff_encode_one_block (void) -{ - return 0; -} - -GLOBAL(JOCTET*) -jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, - int last_dc_val, c_derived_tbl *dctbl, - c_derived_tbl *actbl) -{ - return NULL; -} diff --git a/simd/jsimd_mips_dspr2.S b/simd/jsimd_mips_dspr2.S deleted file mode 100644 index c26dd5c..0000000 --- a/simd/jsimd_mips_dspr2.S +++ /dev/null @@ -1,4486 +0,0 @@ -/* - * MIPS DSPr2 optimizations for libjpeg-turbo - * - * Copyright (C) 2013-2014, MIPS Technologies, Inc., California. - * All Rights Reserved. - * Authors: Teodora Novkovic (teodora.novkovic@imgtec.com) - * Darko Laus (darko.laus@imgtec.com) - * Copyright (C) 2015, D. R. Commander. All Rights Reserved. - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgment in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -#include "jsimd_mips_dspr2_asm.h" - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_c_null_convert_mips_dspr2) -/* - * a0 - cinfo->image_width - * a1 - input_buf - * a2 - output_buf - * a3 - output_row - * 16(sp) - num_rows - * 20(sp) - cinfo->num_components - * - * Null conversion for compression - */ - - SAVE_REGS_ON_STACK 8, s0, s1 - - lw t9, 24(sp) // t9 = num_rows - lw s0, 28(sp) // s0 = cinfo->num_components - andi t0, a0, 3 // t0 = cinfo->image_width & 3 - beqz t0, 4f // no residual - nop -0: - addiu t9, t9, -1 - bltz t9, 7f - li t1, 0 -1: - sll t3, t1, 2 - lwx t5, t3(a2) // t5 = outptr = output_buf[ci] - lw t2, 0(a1) // t2 = inptr = *input_buf - sll t4, a3, 2 - lwx t5, t4(t5) // t5 = outptr = output_buf[ci][output_row] - addu t2, t2, t1 - addu s1, t5, a0 - addu t6, t5, t0 -2: - lbu t3, 0(t2) - addiu t5, t5, 1 - sb t3, -1(t5) - bne t6, t5, 2b - addu t2, t2, s0 -3: - lbu t3, 0(t2) - addu t4, t2, s0 - addu t7, t4, s0 - addu t8, t7, s0 - addu t2, t8, s0 - lbu t4, 0(t4) - lbu t7, 0(t7) - lbu t8, 0(t8) - addiu t5, t5, 4 - sb t3, -4(t5) - sb t4, -3(t5) - sb t7, -2(t5) - bne s1, t5, 3b - sb t8, -1(t5) - addiu t1, t1, 1 - bne t1, s0, 1b - nop - addiu a1, a1, 4 - bgez t9, 0b - addiu a3, a3, 1 - b 7f - nop -4: - addiu t9, t9, -1 - bltz t9, 7f - li t1, 0 -5: - sll t3, t1, 2 - lwx t5, t3(a2) // t5 = outptr = output_buf[ci] - lw t2, 0(a1) // t2 = inptr = *input_buf - sll t4, a3, 2 - lwx t5, t4(t5) // t5 = outptr = output_buf[ci][output_row] - addu t2, t2, t1 - addu s1, t5, a0 - addu t6, t5, t0 -6: - lbu t3, 0(t2) - addu t4, t2, s0 - addu t7, t4, s0 - addu t8, t7, s0 - addu t2, t8, s0 - lbu t4, 0(t4) - lbu t7, 0(t7) - lbu t8, 0(t8) - addiu t5, t5, 4 - sb t3, -4(t5) - sb t4, -3(t5) - sb t7, -2(t5) - bne s1, t5, 6b - sb t8, -1(t5) - addiu t1, t1, 1 - bne t1, s0, 5b - nop - addiu a1, a1, 4 - bgez t9, 4b - addiu a3, a3, 1 -7: - RESTORE_REGS_FROM_STACK 8, s0, s1 - - j ra - nop - -END(jsimd_c_null_convert_mips_dspr2) - -/*****************************************************************************/ -/* - * jsimd_extrgb_ycc_convert_mips_dspr2 - * jsimd_extbgr_ycc_convert_mips_dspr2 - * jsimd_extrgbx_ycc_convert_mips_dspr2 - * jsimd_extbgrx_ycc_convert_mips_dspr2 - * jsimd_extxbgr_ycc_convert_mips_dspr2 - * jsimd_extxrgb_ycc_convert_mips_dspr2 - * - * Colorspace conversion RGB -> YCbCr - */ - -.macro GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs - -.macro DO_RGB_TO_YCC r, \ - g, \ - b, \ - inptr - lbu \r, \r_offs(\inptr) - lbu \g, \g_offs(\inptr) - lbu \b, \b_offs(\inptr) - addiu \inptr, \pixel_size -.endm - -LEAF_MIPS_DSPR2(jsimd_\colorid\()_ycc_convert_mips_dspr2) -/* - * a0 - cinfo->image_width - * a1 - input_buf - * a2 - output_buf - * a3 - output_row - * 16(sp) - num_rows - */ - - SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - lw t7, 48(sp) // t7 = num_rows - li s0, 0x4c8b // FIX(0.29900) - li s1, 0x9646 // FIX(0.58700) - li s2, 0x1d2f // FIX(0.11400) - li s3, 0xffffd4cd // -FIX(0.16874) - li s4, 0xffffab33 // -FIX(0.33126) - li s5, 0x8000 // FIX(0.50000) - li s6, 0xffff94d1 // -FIX(0.41869) - li s7, 0xffffeb2f // -FIX(0.08131) - li t8, 0x807fff // CBCR_OFFSET + ONE_HALF-1 - -0: - addiu t7, -1 // --num_rows - lw t6, 0(a1) // t6 = input_buf[0] - lw t0, 0(a2) - lw t1, 4(a2) - lw t2, 8(a2) - sll t3, a3, 2 - lwx t0, t3(t0) // t0 = output_buf[0][output_row] - lwx t1, t3(t1) // t1 = output_buf[1][output_row] - lwx t2, t3(t2) // t2 = output_buf[2][output_row] - - addu t9, t2, a0 // t9 = end address - addiu a3, 1 - -1: - DO_RGB_TO_YCC t3, t4, t5, t6 - - mtlo s5, $ac0 - mtlo t8, $ac1 - mtlo t8, $ac2 - maddu $ac0, s2, t5 - maddu $ac1, s5, t5 - maddu $ac2, s5, t3 - maddu $ac0, s0, t3 - maddu $ac1, s3, t3 - maddu $ac2, s6, t4 - maddu $ac0, s1, t4 - maddu $ac1, s4, t4 - maddu $ac2, s7, t5 - extr.w t3, $ac0, 16 - extr.w t4, $ac1, 16 - extr.w t5, $ac2, 16 - sb t3, 0(t0) - sb t4, 0(t1) - sb t5, 0(t2) - addiu t0, 1 - addiu t2, 1 - bne t2, t9, 1b - addiu t1, 1 - bgtz t7, 0b - addiu a1, 4 - - RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - j ra - nop -END(jsimd_\colorid\()_ycc_convert_mips_dspr2) - -.purgem DO_RGB_TO_YCC - -.endm - -/*------------------------------------------id -- pix R G B */ -GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2 -GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0 -GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2 -GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0 -GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1 -GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3 - -/*****************************************************************************/ -/* - * jsimd_ycc_extrgb_convert_mips_dspr2 - * jsimd_ycc_extbgr_convert_mips_dspr2 - * jsimd_ycc_extrgbx_convert_mips_dspr2 - * jsimd_ycc_extbgrx_convert_mips_dspr2 - * jsimd_ycc_extxbgr_convert_mips_dspr2 - * jsimd_ycc_extxrgb_convert_mips_dspr2 - * - * Colorspace conversion YCbCr -> RGB - */ - -.macro GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs, a_offs - -.macro STORE_YCC_TO_RGB scratch0 \ - scratch1 \ - scratch2 \ - outptr - sb \scratch0, \r_offs(\outptr) - sb \scratch1, \g_offs(\outptr) - sb \scratch2, \b_offs(\outptr) -.if (\pixel_size == 4) - li t0, 0xFF - sb t0, \a_offs(\outptr) -.endif - addiu \outptr, \pixel_size -.endm - -LEAF_MIPS_DSPR2(jsimd_ycc_\colorid\()_convert_mips_dspr2) -/* - * a0 - cinfo->image_width - * a1 - input_buf - * a2 - input_row - * a3 - output_buf - * 16(sp) - num_rows - */ - - SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - lw s1, 48(sp) - li t3, 0x8000 - li t4, 0x166e9 // FIX(1.40200) - li t5, 0x1c5a2 // FIX(1.77200) - li t6, 0xffff492e // -FIX(0.71414) - li t7, 0xffffa7e6 // -FIX(0.34414) - repl.ph t8, 128 - -0: - lw s0, 0(a3) - lw t0, 0(a1) - lw t1, 4(a1) - lw t2, 8(a1) - sll s5, a2, 2 - addiu s1, -1 - lwx s2, s5(t0) - lwx s3, s5(t1) - lwx s4, s5(t2) - addu t9, s2, a0 - addiu a2, 1 - -1: - lbu s7, 0(s4) // cr - lbu s6, 0(s3) // cb - lbu s5, 0(s2) // y - addiu s2, 1 - addiu s4, 1 - addiu s7, -128 - addiu s6, -128 - mul t2, t7, s6 - mul t0, t6, s7 // Crgtab[cr] - sll s7, 15 - mulq_rs.w t1, t4, s7 // Crrtab[cr] - sll s6, 15 - addu t2, t3 // Cbgtab[cb] - addu t2, t0 - - mulq_rs.w t0, t5, s6 // Cbbtab[cb] - sra t2, 16 - addu t1, s5 - addu t2, s5 // add y - ins t2, t1, 16, 16 - subu.ph t2, t2, t8 - addu t0, s5 - shll_s.ph t2, t2, 8 - subu t0, 128 - shra.ph t2, t2, 8 - shll_s.w t0, t0, 24 - addu.ph t2, t2, t8 // clip & store - sra t0, t0, 24 - sra t1, t2, 16 - addiu t0, 128 - - STORE_YCC_TO_RGB t1, t2, t0, s0 - - bne s2, t9, 1b - addiu s3, 1 - bgtz s1, 0b - addiu a3, 4 - - RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - j ra - nop -END(jsimd_ycc_\colorid\()_convert_mips_dspr2) - -.purgem STORE_YCC_TO_RGB - -.endm - -/*------------------------------------------id -- pix R G B A */ -GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2, 3 -GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0, 3 -GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2, 3 -GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0, 3 -GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1, 0 -GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3, 0 - -/*****************************************************************************/ -/* - * jsimd_extrgb_gray_convert_mips_dspr2 - * jsimd_extbgr_gray_convert_mips_dspr2 - * jsimd_extrgbx_gray_convert_mips_dspr2 - * jsimd_extbgrx_gray_convert_mips_dspr2 - * jsimd_extxbgr_gray_convert_mips_dspr2 - * jsimd_extxrgb_gray_convert_mips_dspr2 - * - * Colorspace conversion RGB -> GRAY - */ - -.macro GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs - -.macro DO_RGB_TO_GRAY r, \ - g, \ - b, \ - inptr - lbu \r, \r_offs(\inptr) - lbu \g, \g_offs(\inptr) - lbu \b, \b_offs(\inptr) - addiu \inptr, \pixel_size -.endm - -LEAF_MIPS_DSPR2(jsimd_\colorid\()_gray_convert_mips_dspr2) -/* - * a0 - cinfo->image_width - * a1 - input_buf - * a2 - output_buf - * a3 - output_row - * 16(sp) - num_rows - */ - - SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - li s0, 0x4c8b // s0 = FIX(0.29900) - li s1, 0x9646 // s1 = FIX(0.58700) - li s2, 0x1d2f // s2 = FIX(0.11400) - li s7, 0x8000 // s7 = FIX(0.50000) - lw s6, 48(sp) - andi t7, a0, 3 - -0: - addiu s6, -1 // s6 = num_rows - lw t0, 0(a1) - lw t1, 0(a2) - sll t3, a3, 2 - lwx t1, t3(t1) - addiu a3, 1 - addu t9, t1, a0 - subu t8, t9, t7 - beq t1, t8, 2f - nop - -1: - DO_RGB_TO_GRAY t3, t4, t5, t0 - DO_RGB_TO_GRAY s3, s4, s5, t0 - - mtlo s7, $ac0 - maddu $ac0, s2, t5 - maddu $ac0, s1, t4 - maddu $ac0, s0, t3 - mtlo s7, $ac1 - maddu $ac1, s2, s5 - maddu $ac1, s1, s4 - maddu $ac1, s0, s3 - extr.w t6, $ac0, 16 - - DO_RGB_TO_GRAY t3, t4, t5, t0 - DO_RGB_TO_GRAY s3, s4, s5, t0 - - mtlo s7, $ac0 - maddu $ac0, s2, t5 - maddu $ac0, s1, t4 - extr.w t2, $ac1, 16 - maddu $ac0, s0, t3 - mtlo s7, $ac1 - maddu $ac1, s2, s5 - maddu $ac1, s1, s4 - maddu $ac1, s0, s3 - extr.w t5, $ac0, 16 - sb t6, 0(t1) - sb t2, 1(t1) - extr.w t3, $ac1, 16 - addiu t1, 4 - sb t5, -2(t1) - sb t3, -1(t1) - bne t1, t8, 1b - nop - -2: - beqz t7, 4f - nop - -3: - DO_RGB_TO_GRAY t3, t4, t5, t0 - - mtlo s7, $ac0 - maddu $ac0, s2, t5 - maddu $ac0, s1, t4 - maddu $ac0, s0, t3 - extr.w t6, $ac0, 16 - sb t6, 0(t1) - addiu t1, 1 - bne t1, t9, 3b - nop - -4: - bgtz s6, 0b - addiu a1, 4 - - RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - j ra - nop -END(jsimd_\colorid\()_gray_convert_mips_dspr2) - -.purgem DO_RGB_TO_GRAY - -.endm - -/*------------------------------------------id -- pix R G B */ -GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2 -GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0 -GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2 -GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0 -GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1 -GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3 -/*****************************************************************************/ -/* - * jsimd_h2v2_merged_upsample_mips_dspr2 - * jsimd_h2v2_extrgb_merged_upsample_mips_dspr2 - * jsimd_h2v2_extrgbx_merged_upsample_mips_dspr2 - * jsimd_h2v2_extbgr_merged_upsample_mips_dspr2 - * jsimd_h2v2_extbgrx_merged_upsample_mips_dspr2 - * jsimd_h2v2_extxbgr_merged_upsample_mips_dspr2 - * jsimd_h2v2_extxrgb_merged_upsample_mips_dspr2 - * - * Merged h2v2 upsample routines - */ -.macro GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 colorid, \ - pixel_size, \ - r1_offs, \ - g1_offs, \ - b1_offs, \ - a1_offs, \ - r2_offs, \ - g2_offs, \ - b2_offs, \ - a2_offs - -.macro STORE_H2V2_2_PIXELS scratch0 \ - scratch1 \ - scratch2 \ - scratch3 \ - scratch4 \ - scratch5 \ - outptr - sb \scratch0, \r1_offs(\outptr) - sb \scratch1, \g1_offs(\outptr) - sb \scratch2, \b1_offs(\outptr) - sb \scratch3, \r2_offs(\outptr) - sb \scratch4, \g2_offs(\outptr) - sb \scratch5, \b2_offs(\outptr) -.if (\pixel_size == 8) - li \scratch0, 0xFF - sb \scratch0, \a1_offs(\outptr) - sb \scratch0, \a2_offs(\outptr) -.endif - addiu \outptr, \pixel_size -.endm - -.macro STORE_H2V2_1_PIXEL scratch0 \ - scratch1 \ - scratch2 \ - outptr - sb \scratch0, \r1_offs(\outptr) - sb \scratch1, \g1_offs(\outptr) - sb \scratch2, \b1_offs(\outptr) - -.if (\pixel_size == 8) - li t0, 0xFF - sb t0, \a1_offs(\outptr) -.endif -.endm - -LEAF_MIPS_DSPR2(jsimd_h2v2_\colorid\()_merged_upsample_mips_dspr2) -/* - * a0 - cinfo->output_width - * a1 - input_buf - * a2 - in_row_group_ctr - * a3 - output_buf - * 16(sp) - cinfo->sample_range_limit - */ - - SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra - - lw t9, 56(sp) // cinfo->sample_range_limit - lw v0, 0(a1) - lw v1, 4(a1) - lw t0, 8(a1) - sll t1, a2, 3 - addiu t2, t1, 4 - sll t3, a2, 2 - lw t4, 0(a3) // t4 = output_buf[0] - lwx t1, t1(v0) // t1 = input_buf[0][in_row_group_ctr*2] - lwx t2, t2(v0) // t2 = input_buf[0][in_row_group_ctr*2 + 1] - lwx t5, t3(v1) // t5 = input_buf[1][in_row_group_ctr] - lwx t6, t3(t0) // t6 = input_buf[2][in_row_group_ctr] - lw t7, 4(a3) // t7 = output_buf[1] - li s1, 0xe6ea - addiu t8, s1, 0x7fff // t8 = 0x166e9 [FIX(1.40200)] - addiu s0, t8, 0x5eb9 // s0 = 0x1c5a2 [FIX(1.77200)] - addiu s1, zero, 0xa7e6 // s4 = 0xffffa7e6 [-FIX(0.34414)] - xori s2, s1, 0xeec8 // s3 = 0xffff492e [-FIX(0.71414)] - srl t3, a0, 1 - blez t3, 2f - addu t0, t5, t3 // t0 = end address - 1: - lbu t3, 0(t5) - lbu s3, 0(t6) - addiu t5, t5, 1 - addiu t3, t3, -128 // (cb - 128) - addiu s3, s3, -128 // (cr - 128) - mult $ac1, s1, t3 - madd $ac1, s2, s3 - sll s3, s3, 15 - sll t3, t3, 15 - mulq_rs.w s4, t8, s3 // s4 = (C1 * cr + ONE_HALF)>> SCALEBITS - extr_r.w s5, $ac1, 16 - mulq_rs.w s6, s0, t3 // s6 = (C2 * cb + ONE_HALF)>> SCALEBITS - lbu v0, 0(t1) - addiu t6, t6, 1 - addiu t1, t1, 2 - addu t3, v0, s4 // y+cred - addu s3, v0, s5 // y+cgreen - addu v1, v0, s6 // y+cblue - addu t3, t9, t3 // y+cred - addu s3, t9, s3 // y+cgreen - addu v1, t9, v1 // y+cblue - lbu AT, 0(t3) - lbu s7, 0(s3) - lbu ra, 0(v1) - lbu v0, -1(t1) - addu t3, v0, s4 // y+cred - addu s3, v0, s5 // y+cgreen - addu v1, v0, s6 // y+cblue - addu t3, t9, t3 // y+cred - addu s3, t9, s3 // y+cgreen - addu v1, t9, v1 // y+cblue - lbu t3, 0(t3) - lbu s3, 0(s3) - lbu v1, 0(v1) - lbu v0, 0(t2) - - STORE_H2V2_2_PIXELS AT, s7, ra, t3, s3, v1, t4 - - addu t3, v0, s4 // y+cred - addu s3, v0, s5 // y+cgreen - addu v1, v0, s6 // y+cblue - addu t3, t9, t3 // y+cred - addu s3, t9, s3 // y+cgreen - addu v1, t9, v1 // y+cblue - lbu AT, 0(t3) - lbu s7, 0(s3) - lbu ra, 0(v1) - lbu v0, 1(t2) - addiu t2, t2, 2 - addu t3, v0, s4 // y+cred - addu s3, v0, s5 // y+cgreen - addu v1, v0, s6 // y+cblue - addu t3, t9, t3 // y+cred - addu s3, t9, s3 // y+cgreen - addu v1, t9, v1 // y+cblue - lbu t3, 0(t3) - lbu s3, 0(s3) - lbu v1, 0(v1) - - STORE_H2V2_2_PIXELS AT, s7, ra, t3, s3, v1, t7 - - bne t0, t5, 1b - nop -2: - andi t0, a0, 1 - beqz t0, 4f - lbu t3, 0(t5) - lbu s3, 0(t6) - addiu t3, t3, -128 // (cb - 128) - addiu s3, s3, -128 // (cr - 128) - mult $ac1, s1, t3 - madd $ac1, s2, s3 - sll s3, s3, 15 - sll t3, t3, 15 - lbu v0, 0(t1) - extr_r.w s5, $ac1, 16 - mulq_rs.w s4, t8, s3 // s4 = (C1 * cr + ONE_HALF)>> SCALEBITS - mulq_rs.w s6, s0, t3 // s6 = (C2 * cb + ONE_HALF)>> SCALEBITS - addu t3, v0, s4 // y+cred - addu s3, v0, s5 // y+cgreen - addu v1, v0, s6 // y+cblue - addu t3, t9, t3 // y+cred - addu s3, t9, s3 // y+cgreen - addu v1, t9, v1 // y+cblue - lbu t3, 0(t3) - lbu s3, 0(s3) - lbu v1, 0(v1) - lbu v0, 0(t2) - - STORE_H2V2_1_PIXEL t3, s3, v1, t4 - - addu t3, v0, s4 // y+cred - addu s3, v0, s5 // y+cgreen - addu v1, v0, s6 // y+cblue - addu t3, t9, t3 // y+cred - addu s3, t9, s3 // y+cgreen - addu v1, t9, v1 // y+cblue - lbu t3, 0(t3) - lbu s3, 0(s3) - lbu v1, 0(v1) - - STORE_H2V2_1_PIXEL t3, s3, v1, t7 -4: - RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra - - j ra - nop - -END(jsimd_h2v2_\colorid\()_merged_upsample_mips_dspr2) - -.purgem STORE_H2V2_1_PIXEL -.purgem STORE_H2V2_2_PIXELS -.endm - -/*-----------------------------------------id -- pix R1 G1 B1 A1 R2 G2 B2 A2 */ -GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extrgb, 6, 0, 1, 2, 6, 3, 4, 5, 6 -GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extbgr, 6, 2, 1, 0, 3, 5, 4, 3, 6 -GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extrgbx, 8, 0, 1, 2, 3, 4, 5, 6, 7 -GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extbgrx, 8, 2, 1, 0, 3, 6, 5, 4, 7 -GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extxbgr, 8, 3, 2, 1, 0, 7, 6, 5, 4 -GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extxrgb, 8, 1, 2, 3, 0, 5, 6, 7, 4 -/*****************************************************************************/ -/* - * jsimd_h2v1_merged_upsample_mips_dspr2 - * jsimd_h2v1_extrgb_merged_upsample_mips_dspr2 - * jsimd_h2v1_extrgbx_merged_upsample_mips_dspr2 - * jsimd_h2v1_extbgr_merged_upsample_mips_dspr2 - * jsimd_h2v1_extbgrx_merged_upsample_mips_dspr2 - * jsimd_h2v1_extxbgr_merged_upsample_mips_dspr2 - * jsimd_h2v1_extxrgb_merged_upsample_mips_dspr2 - * - * Merged h2v1 upsample routines - */ - -.macro GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 colorid, \ - pixel_size, \ - r1_offs, \ - g1_offs, \ - b1_offs, \ - a1_offs, \ - r2_offs, \ - g2_offs, \ - b2_offs, \ - a2_offs - -.macro STORE_H2V1_2_PIXELS scratch0 \ - scratch1 \ - scratch2 \ - scratch3 \ - scratch4 \ - scratch5 \ - outptr - sb \scratch0, \r1_offs(\outptr) - sb \scratch1, \g1_offs(\outptr) - sb \scratch2, \b1_offs(\outptr) - sb \scratch3, \r2_offs(\outptr) - sb \scratch4, \g2_offs(\outptr) - sb \scratch5, \b2_offs(\outptr) -.if (\pixel_size == 8) - li t0, 0xFF - sb t0, \a1_offs(\outptr) - sb t0, \a2_offs(\outptr) -.endif - addiu \outptr, \pixel_size -.endm - -.macro STORE_H2V1_1_PIXEL scratch0 \ - scratch1 \ - scratch2 \ - outptr - sb \scratch0, \r1_offs(\outptr) - sb \scratch1, \g1_offs(\outptr) - sb \scratch2, \b1_offs(\outptr) -.if (\pixel_size == 8) - li t0, 0xFF - sb t0, \a1_offs(\outptr) -.endif -.endm - -LEAF_MIPS_DSPR2(jsimd_h2v1_\colorid\()_merged_upsample_mips_dspr2) -/* - * a0 - cinfo->output_width - * a1 - input_buf - * a2 - in_row_group_ctr - * a3 - output_buf - * 16(sp) - range_limit - */ - - SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra - - li t0, 0xe6ea - lw t1, 0(a1) // t1 = input_buf[0] - lw t2, 4(a1) // t2 = input_buf[1] - lw t3, 8(a1) // t3 = input_buf[2] - lw t8, 56(sp) // t8 = range_limit - addiu s1, t0, 0x7fff // s1 = 0x166e9 [FIX(1.40200)] - addiu s2, s1, 0x5eb9 // s2 = 0x1c5a2 [FIX(1.77200)] - addiu s0, t0, 0x9916 // s0 = 0x8000 - addiu s4, zero, 0xa7e6 // s4 = 0xffffa7e6 [-FIX(0.34414)] - xori s3, s4, 0xeec8 // s3 = 0xffff492e [-FIX(0.71414)] - srl t0, a0, 1 - sll t4, a2, 2 - lwx s5, t4(t1) // s5 = inptr0 - lwx s6, t4(t2) // s6 = inptr1 - lwx s7, t4(t3) // s7 = inptr2 - lw t7, 0(a3) // t7 = outptr - blez t0, 2f - addu t9, s6, t0 // t9 = end address -1: - lbu t2, 0(s6) // t2 = cb - lbu t0, 0(s7) // t0 = cr - lbu t1, 0(s5) // t1 = y - addiu t2, t2, -128 // t2 = cb - 128 - addiu t0, t0, -128 // t0 = cr - 128 - mult $ac1, s4, t2 - madd $ac1, s3, t0 - sll t0, t0, 15 - sll t2, t2, 15 - mulq_rs.w t0, s1, t0 // t0 = (C1*cr + ONE_HALF)>> SCALEBITS - extr_r.w t5, $ac1, 16 - mulq_rs.w t6, s2, t2 // t6 = (C2*cb + ONE_HALF)>> SCALEBITS - addiu s7, s7, 1 - addiu s6, s6, 1 - addu t2, t1, t0 // t2 = y + cred - addu t3, t1, t5 // t3 = y + cgreen - addu t4, t1, t6 // t4 = y + cblue - addu t2, t8, t2 - addu t3, t8, t3 - addu t4, t8, t4 - lbu t1, 1(s5) - lbu v0, 0(t2) - lbu v1, 0(t3) - lbu ra, 0(t4) - addu t2, t1, t0 - addu t3, t1, t5 - addu t4, t1, t6 - addu t2, t8, t2 - addu t3, t8, t3 - addu t4, t8, t4 - lbu t2, 0(t2) - lbu t3, 0(t3) - lbu t4, 0(t4) - - STORE_H2V1_2_PIXELS v0, v1, ra, t2, t3, t4, t7 - - bne t9, s6, 1b - addiu s5, s5, 2 -2: - andi t0, a0, 1 - beqz t0, 4f - nop -3: - lbu t2, 0(s6) - lbu t0, 0(s7) - lbu t1, 0(s5) - addiu t2, t2, -128 //(cb - 128) - addiu t0, t0, -128 //(cr - 128) - mul t3, s4, t2 - mul t4, s3, t0 - sll t0, t0, 15 - sll t2, t2, 15 - mulq_rs.w t0, s1, t0 // (C1*cr + ONE_HALF)>> SCALEBITS - mulq_rs.w t6, s2, t2 // (C2*cb + ONE_HALF)>> SCALEBITS - addu t3, t3, s0 - addu t3, t4, t3 - sra t5, t3, 16 // (C4*cb + ONE_HALF + C3*cr)>> SCALEBITS - addu t2, t1, t0 // y + cred - addu t3, t1, t5 // y + cgreen - addu t4, t1, t6 // y + cblue - addu t2, t8, t2 - addu t3, t8, t3 - addu t4, t8, t4 - lbu t2, 0(t2) - lbu t3, 0(t3) - lbu t4, 0(t4) - - STORE_H2V1_1_PIXEL t2, t3, t4, t7 -4: - RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra - - j ra - nop - -END(jsimd_h2v1_\colorid\()_merged_upsample_mips_dspr2) - -.purgem STORE_H2V1_1_PIXEL -.purgem STORE_H2V1_2_PIXELS -.endm - -/*-----------------------------------------id -- pix R1 G1 B1 A1 R2 G2 B2 A2 */ -GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extrgb, 6, 0, 1, 2, 6, 3, 4, 5, 6 -GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extbgr, 6, 2, 1, 0, 3, 5, 4, 3, 6 -GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extrgbx, 8, 0, 1, 2, 3, 4, 5, 6, 7 -GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extbgrx, 8, 2, 1, 0, 3, 6, 5, 4, 7 -GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extxbgr, 8, 3, 2, 1, 0, 7, 6, 5, 4 -GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extxrgb, 8, 1, 2, 3, 0, 5, 6, 7, 4 -/*****************************************************************************/ -/* - * jsimd_h2v2_fancy_upsample_mips_dspr2 - * - * Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. - */ -LEAF_MIPS_DSPR2(jsimd_h2v2_fancy_upsample_mips_dspr2) -/* - * a0 - cinfo->max_v_samp_factor - * a1 - downsampled_width - * a2 - input_data - * a3 - output_data_ptr - */ - - SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, s5 - - li s4, 0 - lw s2, 0(a3) // s2 = *output_data_ptr -0: - li t9, 2 - lw s1, -4(a2) // s1 = inptr1 - -1: - lw s0, 0(a2) // s0 = inptr0 - lwx s3, s4(s2) - addiu s5, a1, -2 // s5 = downsampled_width - 2 - srl t4, s5, 1 - sll t4, t4, 1 - lbu t0, 0(s0) - lbu t1, 1(s0) - lbu t2, 0(s1) - lbu t3, 1(s1) - addiu s0, 2 - addiu s1, 2 - addu t8, s0, t4 // t8 = end address - andi s5, s5, 1 // s5 = residual - sll t4, t0, 1 - sll t6, t1, 1 - addu t0, t0, t4 // t0 = (*inptr0++) * 3 - addu t1, t1, t6 // t1 = (*inptr0++) * 3 - addu t7, t0, t2 // t7 = thiscolsum - addu t6, t1, t3 // t5 = nextcolsum - sll t0, t7, 2 // t0 = thiscolsum * 4 - subu t1, t0, t7 // t1 = thiscolsum * 3 - shra_r.w t0, t0, 4 - addiu t1, 7 - addu t1, t1, t6 - srl t1, t1, 4 - sb t0, 0(s3) - sb t1, 1(s3) - beq t8, s0, 22f // skip to final iteration if width == 3 - addiu s3, 2 -2: - lh t0, 0(s0) // t0 = A3|A2 - lh t2, 0(s1) // t2 = B3|B2 - addiu s0, 2 - addiu s1, 2 - preceu.ph.qbr t0, t0 // t0 = 0|A3|0|A2 - preceu.ph.qbr t2, t2 // t2 = 0|B3|0|B2 - shll.ph t1, t0, 1 - sll t3, t6, 1 - addu.ph t0, t1, t0 // t0 = A3*3|A2*3 - addu t3, t3, t6 // t3 = this * 3 - addu.ph t0, t0, t2 // t0 = next2|next1 - addu t1, t3, t7 - andi t7, t0, 0xFFFF // t7 = next1 - sll t2, t7, 1 - addu t2, t7, t2 // t2 = next1*3 - addu t4, t2, t6 - srl t6, t0, 16 // t6 = next2 - shra_r.w t1, t1, 4 // t1 = (this*3 + last + 8) >> 4 - addu t0, t3, t7 - addiu t0, 7 - srl t0, t0, 4 // t0 = (this*3 + next1 + 7) >> 4 - shra_r.w t4, t4, 4 // t3 = (next1*3 + this + 8) >> 4 - addu t2, t2, t6 - addiu t2, 7 - srl t2, t2, 4 // t2 = (next1*3 + next2 + 7) >> 4 - sb t1, 0(s3) - sb t0, 1(s3) - sb t4, 2(s3) - sb t2, 3(s3) - bne t8, s0, 2b - addiu s3, 4 -22: - beqz s5, 4f - addu t8, s0, s5 -3: - lbu t0, 0(s0) - lbu t2, 0(s1) - addiu s0, 1 - addiu s1, 1 - sll t3, t6, 1 - sll t1, t0, 1 - addu t1, t0, t1 // t1 = inptr0 * 3 - addu t3, t3, t6 // t3 = thiscolsum * 3 - addu t5, t1, t2 - addu t1, t3, t7 - shra_r.w t1, t1, 4 - addu t0, t3, t5 - addiu t0, 7 - srl t0, t0, 4 - sb t1, 0(s3) - sb t0, 1(s3) - addiu s3, 2 - move t7, t6 - bne t8, s0, 3b - move t6, t5 -4: - sll t0, t6, 2 // t0 = thiscolsum * 4 - subu t1, t0, t6 // t1 = thiscolsum * 3 - addu t1, t1, t7 - addiu s4, 4 - shra_r.w t1, t1, 4 - addiu t0, 7 - srl t0, t0, 4 - sb t1, 0(s3) - sb t0, 1(s3) - addiu t9, -1 - addiu s3, 2 - bnez t9, 1b - lw s1, 4(a2) - srl t0, s4, 2 - subu t0, a0, t0 - bgtz t0, 0b - addiu a2, 4 - - RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, s5 - - j ra - nop -END(jsimd_h2v2_fancy_upsample_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_h2v1_fancy_upsample_mips_dspr2) -/* - * a0 - cinfo->max_v_samp_factor - * a1 - downsampled_width - * a2 - input_data - * a3 - output_data_ptr - */ - - SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 - - .set at - - beqz a0, 3f - sll t0, a0, 2 - lw s1, 0(a3) - li s3, 0x10001 - addu s0, s1, t0 -0: - addiu t8, a1, -2 - srl t9, t8, 2 - lw t7, 0(a2) - lw s2, 0(s1) - lbu t0, 0(t7) - lbu t1, 1(t7) // t1 = inptr[1] - sll t2, t0, 1 - addu t2, t2, t0 // t2 = invalue*3 - addu t2, t2, t1 - shra_r.w t2, t2, 2 - sb t0, 0(s2) - sb t2, 1(s2) - beqz t9, 11f - addiu s2, 2 -1: - ulw t0, 0(t7) // t0 = |P3|P2|P1|P0| - ulw t1, 1(t7) - ulh t2, 4(t7) // t2 = |0|0|P5|P4| - preceu.ph.qbl t3, t0 // t3 = |0|P3|0|P2| - preceu.ph.qbr t0, t0 // t0 = |0|P1|0|P0| - preceu.ph.qbr t2, t2 // t2 = |0|P5|0|P4| - preceu.ph.qbl t4, t1 // t4 = |0|P4|0|P3| - preceu.ph.qbr t1, t1 // t1 = |0|P2|0|P1| - shll.ph t5, t4, 1 - shll.ph t6, t1, 1 - addu.ph t5, t5, t4 // t5 = |P4*3|P3*3| - addu.ph t6, t6, t1 // t6 = |P2*3|P1*3| - addu.ph t4, t3, s3 - addu.ph t0, t0, s3 - addu.ph t4, t4, t5 - addu.ph t0, t0, t6 - shrl.ph t4, t4, 2 // t4 = |0|P3|0|P2| - shrl.ph t0, t0, 2 // t0 = |0|P1|0|P0| - addu.ph t2, t2, t5 - addu.ph t3, t3, t6 - shra_r.ph t2, t2, 2 // t2 = |0|P5|0|P4| - shra_r.ph t3, t3, 2 // t3 = |0|P3|0|P2| - shll.ph t2, t2, 8 - shll.ph t3, t3, 8 - or t2, t4, t2 - or t3, t3, t0 - addiu t9, -1 - usw t3, 0(s2) - usw t2, 4(s2) - addiu s2, 8 - bgtz t9, 1b - addiu t7, 4 -11: - andi t8, 3 - beqz t8, 22f - addiu t7, 1 - -2: - lbu t0, 0(t7) - addiu t7, 1 - sll t1, t0, 1 - addu t2, t0, t1 // t2 = invalue - lbu t3, -2(t7) - lbu t4, 0(t7) - addiu t3, 1 - addiu t4, 2 - addu t3, t3, t2 - addu t4, t4, t2 - srl t3, 2 - srl t4, 2 - sb t3, 0(s2) - sb t4, 1(s2) - addiu t8, -1 - bgtz t8, 2b - addiu s2, 2 - -22: - lbu t0, 0(t7) - lbu t2, -1(t7) - sll t1, t0, 1 - addu t1, t1, t0 // t1 = invalue * 3 - addu t1, t1, t2 - addiu t1, 1 - srl t1, t1, 2 - sb t1, 0(s2) - sb t0, 1(s2) - addiu s1, 4 - bne s1, s0, 0b - addiu a2, 4 -3: - RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 - - j ra - nop -END(jsimd_h2v1_fancy_upsample_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_h2v1_downsample_mips_dspr2) -/* - * a0 - cinfo->image_width - * a1 - cinfo->max_v_samp_factor - * a2 - compptr->v_samp_factor - * a3 - compptr->width_in_blocks - * 16(sp) - input_data - * 20(sp) - output_data - */ - .set at - - SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4 - - beqz a2, 7f - lw s1, 44(sp) // s1 = output_data - lw s0, 40(sp) // s0 = input_data - srl s2, a0, 2 - andi t9, a0, 2 - srl t7, t9, 1 - addu s2, t7, s2 - sll t0, a3, 3 // t0 = width_in_blocks*DCT - srl t7, t0, 1 - subu s2, t7, s2 -0: - andi t6, a0, 1 // t6 = temp_index - addiu t6, -1 - lw t4, 0(s1) // t4 = outptr - lw t5, 0(s0) // t5 = inptr0 - li s3, 0 // s3 = bias - srl t7, a0, 1 // t7 = image_width1 - srl s4, t7, 2 - andi t8, t7, 3 -1: - ulhu t0, 0(t5) - ulhu t1, 2(t5) - ulhu t2, 4(t5) - ulhu t3, 6(t5) - raddu.w.qb t0, t0 - raddu.w.qb t1, t1 - raddu.w.qb t2, t2 - raddu.w.qb t3, t3 - shra.ph t0, t0, 1 - shra_r.ph t1, t1, 1 - shra.ph t2, t2, 1 - shra_r.ph t3, t3, 1 - sb t0, 0(t4) - sb t1, 1(t4) - sb t2, 2(t4) - sb t3, 3(t4) - addiu s4, -1 - addiu t4, 4 - bgtz s4, 1b - addiu t5, 8 - beqz t8, 3f - addu s4, t4, t8 -2: - ulhu t0, 0(t5) - raddu.w.qb t0, t0 - addqh.w t0, t0, s3 - xori s3, s3, 1 - sb t0, 0(t4) - addiu t4, 1 - bne t4, s4, 2b - addiu t5, 2 -3: - lbux t1, t6(t5) - sll t1, 1 - addqh.w t2, t1, s3 // t2 = pixval1 - xori s3, s3, 1 - addqh.w t3, t1, s3 // t3 = pixval2 - blez s2, 5f - append t3, t2, 8 - addu t5, t4, s2 // t5 = loop_end2 -4: - ush t3, 0(t4) - addiu s2, -1 - bgtz s2, 4b - addiu t4, 2 -5: - beqz t9, 6f - nop - sb t2, 0(t4) -6: - addiu s1, 4 - addiu a2, -1 - bnez a2, 0b - addiu s0, 4 -7: - RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4 - - j ra - nop -END(jsimd_h2v1_downsample_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_h2v2_downsample_mips_dspr2) - -/* - * a0 - cinfo->image_width - * a1 - cinfo->max_v_samp_factor - * a2 - compptr->v_samp_factor - * a3 - compptr->width_in_blocks - * 16(sp) - input_data - * 20(sp) - output_data - */ - .set at - SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - beqz a2, 8f - lw s1, 52(sp) // s1 = output_data - lw s0, 48(sp) // s0 = input_data - - andi t6, a0, 1 // t6 = temp_index - addiu t6, -1 - srl t7, a0, 1 // t7 = image_width1 - srl s4, t7, 2 - andi t8, t7, 3 - andi t9, a0, 2 - srl s2, a0, 2 - srl t7, t9, 1 - addu s2, t7, s2 - sll t0, a3, 3 // s2 = width_in_blocks*DCT - srl t7, t0, 1 - subu s2, t7, s2 -0: - lw t4, 0(s1) // t4 = outptr - lw t5, 0(s0) // t5 = inptr0 - lw s7, 4(s0) // s7 = inptr1 - li s6, 1 // s6 = bias -2: - ulw t0, 0(t5) // t0 = |P3|P2|P1|P0| - ulw t1, 0(s7) // t1 = |Q3|Q2|Q1|Q0| - ulw t2, 4(t5) - ulw t3, 4(s7) - precrq.ph.w t7, t0, t1 // t2 = |P3|P2|Q3|Q2| - ins t0, t1, 16, 16 // t0 = |Q1|Q0|P1|P0| - raddu.w.qb t1, t7 - raddu.w.qb t0, t0 - shra_r.w t1, t1, 2 - addiu t0, 1 - srl t0, 2 - precrq.ph.w t7, t2, t3 - ins t2, t3, 16, 16 - raddu.w.qb t7, t7 - raddu.w.qb t2, t2 - shra_r.w t7, t7, 2 - addiu t2, 1 - srl t2, 2 - sb t0, 0(t4) - sb t1, 1(t4) - sb t2, 2(t4) - sb t7, 3(t4) - addiu t4, 4 - addiu t5, 8 - addiu s4, s4, -1 - bgtz s4, 2b - addiu s7, 8 - beqz t8, 4f - addu t8, t4, t8 -3: - ulhu t0, 0(t5) - ulhu t1, 0(s7) - ins t0, t1, 16, 16 - raddu.w.qb t0, t0 - addu t0, t0, s6 - srl t0, 2 - xori s6, s6, 3 - sb t0, 0(t4) - addiu t5, 2 - addiu t4, 1 - bne t8, t4, 3b - addiu s7, 2 -4: - lbux t1, t6(t5) - sll t1, 1 - lbux t0, t6(s7) - sll t0, 1 - addu t1, t1, t0 - addu t3, t1, s6 - srl t0, t3, 2 // t2 = pixval1 - xori s6, s6, 3 - addu t2, t1, s6 - srl t1, t2, 2 // t3 = pixval2 - blez s2, 6f - append t1, t0, 8 -5: - ush t1, 0(t4) - addiu s2, -1 - bgtz s2, 5b - addiu t4, 2 -6: - beqz t9, 7f - nop - sb t0, 0(t4) -7: - addiu s1, 4 - addiu a2, -1 - bnez a2, 0b - addiu s0, 8 -8: - RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - j ra - nop -END(jsimd_h2v2_downsample_mips_dspr2) -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_h2v2_smooth_downsample_mips_dspr2) -/* - * a0 - input_data - * a1 - output_data - * a2 - compptr->v_samp_factor - * a3 - cinfo->max_v_samp_factor - * 16(sp) - cinfo->smoothing_factor - * 20(sp) - compptr->width_in_blocks - * 24(sp) - cinfo->image_width - */ - - .set at - - SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - lw s7, 52(sp) // compptr->width_in_blocks - lw s0, 56(sp) // cinfo->image_width - lw s6, 48(sp) // cinfo->smoothing_factor - sll s7, 3 // output_cols = width_in_blocks * DCTSIZE - sll v0, s7, 1 - subu v0, v0, s0 - blez v0, 2f - move v1, zero - addiu t0, a3, 2 // t0 = cinfo->max_v_samp_factor + 2 -0: - addiu t1, a0, -4 - sll t2, v1, 2 - lwx t1, t2(t1) - move t3, v0 - addu t1, t1, s0 - lbu t2, -1(t1) -1: - addiu t3, t3, -1 - sb t2, 0(t1) - bgtz t3, 1b - addiu t1, t1, 1 - addiu v1, v1, 1 - bne v1, t0, 0b - nop -2: - li v0, 80 - mul v0, s6, v0 - li v1, 16384 - move t4, zero - move t5, zero - subu t6, v1, v0 // t6 = 16384 - tmp_smoot_f * 80 - sll t7, s6, 4 // t7 = tmp_smoot_f * 16 -3: -/* Special case for first column: pretend column -1 is same as column 0 */ - sll v0, t4, 2 - lwx t8, v0(a1) // outptr = output_data[outrow] - sll v1, t5, 2 - addiu t9, v1, 4 - addiu s0, v1, -4 - addiu s1, v1, 8 - lwx s2, v1(a0) // inptr0 = input_data[inrow] - lwx t9, t9(a0) // inptr1 = input_data[inrow+1] - lwx s0, s0(a0) // above_ptr = input_data[inrow-1] - lwx s1, s1(a0) // below_ptr = input_data[inrow+2] - lh v0, 0(s2) - lh v1, 0(t9) - lh t0, 0(s0) - lh t1, 0(s1) - ins v0, v1, 16, 16 - ins t0, t1, 16, 16 - raddu.w.qb t2, v0 - raddu.w.qb s3, t0 - lbu v0, 0(s2) - lbu v1, 2(s2) - lbu t0, 0(t9) - lbu t1, 2(t9) - addu v0, v0, v1 - mult $ac1,t2, t6 - addu t0, t0, t1 - lbu t2, 2(s0) - addu t0, t0, v0 - lbu t3, 2(s1) - addu s3, t0, s3 - lbu v0, 0(s0) - lbu t0, 0(s1) - sll s3, s3, 1 - addu v0, v0, t2 - addu t0, t0, t3 - addu t0, t0, v0 - addu s3, t0, s3 - madd $ac1,s3, t7 - extr_r.w v0, $ac1, 16 - addiu t8, t8, 1 - addiu s2, s2, 2 - addiu t9, t9, 2 - addiu s0, s0, 2 - addiu s1, s1, 2 - sb v0, -1(t8) - addiu s4, s7, -2 - and s4, s4, 3 - addu s5, s4, t8 //end adress -4: - lh v0, 0(s2) - lh v1, 0(t9) - lh t0, 0(s0) - lh t1, 0(s1) - ins v0, v1, 16, 16 - ins t0, t1, 16, 16 - raddu.w.qb t2, v0 - raddu.w.qb s3, t0 - lbu v0, -1(s2) - lbu v1, 2(s2) - lbu t0, -1(t9) - lbu t1, 2(t9) - addu v0, v0, v1 - mult $ac1, t2, t6 - addu t0, t0, t1 - lbu t2, 2(s0) - addu t0, t0, v0 - lbu t3, 2(s1) - addu s3, t0, s3 - lbu v0, -1(s0) - lbu t0, -1(s1) - sll s3, s3, 1 - addu v0, v0, t2 - addu t0, t0, t3 - addu t0, t0, v0 - addu s3, t0, s3 - madd $ac1, s3, t7 - extr_r.w t2, $ac1, 16 - addiu t8, t8, 1 - addiu s2, s2, 2 - addiu t9, t9, 2 - addiu s0, s0, 2 - sb t2, -1(t8) - bne s5, t8, 4b - addiu s1, s1, 2 - addiu s5, s7, -2 - subu s5, s5, s4 - addu s5, s5, t8 //end adress -5: - lh v0, 0(s2) - lh v1, 0(t9) - lh t0, 0(s0) - lh t1, 0(s1) - ins v0, v1, 16, 16 - ins t0, t1, 16, 16 - raddu.w.qb t2, v0 - raddu.w.qb s3, t0 - lbu v0, -1(s2) - lbu v1, 2(s2) - lbu t0, -1(t9) - lbu t1, 2(t9) - addu v0, v0, v1 - mult $ac1, t2, t6 - addu t0, t0, t1 - lbu t2, 2(s0) - addu t0, t0, v0 - lbu t3, 2(s1) - addu s3, t0, s3 - lbu v0, -1(s0) - lbu t0, -1(s1) - sll s3, s3, 1 - addu v0, v0, t2 - addu t0, t0, t3 - lh v1, 2(t9) - addu t0, t0, v0 - lh v0, 2(s2) - addu s3, t0, s3 - lh t0, 2(s0) - lh t1, 2(s1) - madd $ac1, s3, t7 - extr_r.w t2, $ac1, 16 - ins t0, t1, 16, 16 - ins v0, v1, 16, 16 - raddu.w.qb s3, t0 - lbu v1, 4(s2) - lbu t0, 1(t9) - lbu t1, 4(t9) - sb t2, 0(t8) - raddu.w.qb t3, v0 - lbu v0, 1(s2) - addu t0, t0, t1 - mult $ac1, t3, t6 - addu v0, v0, v1 - lbu t2, 4(s0) - addu t0, t0, v0 - lbu v0, 1(s0) - addu s3, t0, s3 - lbu t0, 1(s1) - lbu t3, 4(s1) - addu v0, v0, t2 - sll s3, s3, 1 - addu t0, t0, t3 - lh v1, 4(t9) - addu t0, t0, v0 - lh v0, 4(s2) - addu s3, t0, s3 - lh t0, 4(s0) - lh t1, 4(s1) - madd $ac1, s3, t7 - extr_r.w t2, $ac1, 16 - ins t0, t1, 16, 16 - ins v0, v1, 16, 16 - raddu.w.qb s3, t0 - lbu v1, 6(s2) - lbu t0, 3(t9) - lbu t1, 6(t9) - sb t2, 1(t8) - raddu.w.qb t3, v0 - lbu v0, 3(s2) - addu t0, t0,t1 - mult $ac1, t3, t6 - addu v0, v0, v1 - lbu t2, 6(s0) - addu t0, t0, v0 - lbu v0, 3(s0) - addu s3, t0, s3 - lbu t0, 3(s1) - lbu t3, 6(s1) - addu v0, v0, t2 - sll s3, s3, 1 - addu t0, t0, t3 - lh v1, 6(t9) - addu t0, t0, v0 - lh v0, 6(s2) - addu s3, t0, s3 - lh t0, 6(s0) - lh t1, 6(s1) - madd $ac1, s3, t7 - extr_r.w t3, $ac1, 16 - ins t0, t1, 16, 16 - ins v0, v1, 16, 16 - raddu.w.qb s3, t0 - lbu v1, 8(s2) - lbu t0, 5(t9) - lbu t1, 8(t9) - sb t3, 2(t8) - raddu.w.qb t2, v0 - lbu v0, 5(s2) - addu t0, t0, t1 - mult $ac1, t2, t6 - addu v0, v0, v1 - lbu t2, 8(s0) - addu t0, t0, v0 - lbu v0, 5(s0) - addu s3, t0, s3 - lbu t0, 5(s1) - lbu t3, 8(s1) - addu v0, v0, t2 - sll s3, s3, 1 - addu t0, t0, t3 - addiu t8, t8, 4 - addu t0, t0, v0 - addiu s2, s2, 8 - addu s3, t0, s3 - addiu t9, t9, 8 - madd $ac1, s3, t7 - extr_r.w t1, $ac1, 16 - addiu s0, s0, 8 - addiu s1, s1, 8 - bne s5, t8, 5b - sb t1, -1(t8) -/* Special case for last column */ - lh v0, 0(s2) - lh v1, 0(t9) - lh t0, 0(s0) - lh t1, 0(s1) - ins v0, v1, 16, 16 - ins t0, t1, 16, 16 - raddu.w.qb t2, v0 - raddu.w.qb s3, t0 - lbu v0, -1(s2) - lbu v1, 1(s2) - lbu t0, -1(t9) - lbu t1, 1(t9) - addu v0, v0, v1 - mult $ac1, t2, t6 - addu t0, t0, t1 - lbu t2, 1(s0) - addu t0, t0, v0 - lbu t3, 1(s1) - addu s3, t0, s3 - lbu v0, -1(s0) - lbu t0, -1(s1) - sll s3, s3, 1 - addu v0, v0, t2 - addu t0, t0, t3 - addu t0, t0, v0 - addu s3, t0, s3 - madd $ac1, s3, t7 - extr_r.w t0, $ac1, 16 - addiu t5, t5, 2 - sb t0, 0(t8) - addiu t4, t4, 1 - bne t4, a2, 3b - addiu t5, t5, 2 - - RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - j ra - nop - -END(jsimd_h2v2_smooth_downsample_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_int_upsample_mips_dspr2) -/* - * a0 - upsample->h_expand[compptr->component_index] - * a1 - upsample->v_expand[compptr->component_index] - * a2 - input_data - * a3 - output_data_ptr - * 16(sp) - cinfo->output_width - * 20(sp) - cinfo->max_v_samp_factor - */ - .set at - - SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 - - lw s0, 0(a3) // s0 = output_data - lw s1, 32(sp) // s1 = cinfo->output_width - lw s2, 36(sp) // s2 = cinfo->max_v_samp_factor - li t6, 0 // t6 = inrow - beqz s2, 10f - li s3, 0 // s3 = outrow -0: - addu t0, a2, t6 - addu t7, s0, s3 - lw t3, 0(t0) // t3 = inptr - lw t8, 0(t7) // t8 = outptr - beqz s1, 4f - addu t5, t8, s1 // t5 = outend -1: - lb t2, 0(t3) // t2 = invalue = *inptr++ - addiu t3, 1 - beqz a0, 3f - move t0, a0 // t0 = h_expand -2: - sb t2, 0(t8) - addiu t0, -1 - bgtz t0, 2b - addiu t8, 1 -3: - bgt t5, t8, 1b - nop -4: - addiu t9, a1, -1 // t9 = v_expand - 1 - blez t9, 9f - nop -5: - lw t3, 0(s0) - lw t4, 4(s0) - subu t0, s1, 0xF - blez t0, 7f - addu t5, t3, s1 // t5 = end address - andi t7, s1, 0xF // t7 = residual - subu t8, t5, t7 -6: - ulw t0, 0(t3) - ulw t1, 4(t3) - ulw t2, 8(t3) - usw t0, 0(t4) - ulw t0, 12(t3) - usw t1, 4(t4) - usw t2, 8(t4) - usw t0, 12(t4) - addiu t3, 16 - bne t3, t8, 6b - addiu t4, 16 - beqz t7, 8f - nop -7: - lbu t0, 0(t3) - sb t0, 0(t4) - addiu t3, 1 - bne t3, t5, 7b - addiu t4, 1 -8: - addiu t9, -1 - bgtz t9, 5b - addiu s0, 8 -9: - addu s3, s3, a1 - bne s3, s2, 0b - addiu t6, 1 -10: - RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 - - j ra - nop -END(jsimd_int_upsample_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_h2v1_upsample_mips_dspr2) -/* - * a0 - cinfo->max_v_samp_factor - * a1 - cinfo->output_width - * a2 - input_data - * a3 - output_data_ptr - */ - lw t7, 0(a3) // t7 = output_data - andi t8, a1, 0xf // t8 = residual - sll t0, a0, 2 - blez a0, 4f - addu t9, t7, t0 // t9 = output_data end address -0: - lw t5, 0(t7) // t5 = outptr - lw t6, 0(a2) // t6 = inptr - addu t3, t5, a1 // t3 = outptr + output_width (end address) - subu t3, t8 // t3 = end address - residual - beq t5, t3, 2f - move t4, t8 -1: - ulw t0, 0(t6) // t0 = |P3|P2|P1|P0| - ulw t2, 4(t6) // t2 = |P7|P6|P5|P4| - srl t1, t0, 16 // t1 = |X|X|P3|P2| - ins t0, t0, 16, 16 // t0 = |P1|P0|P1|P0| - ins t1, t1, 16, 16 // t1 = |P3|P2|P3|P2| - ins t0, t0, 8, 16 // t0 = |P1|P1|P0|P0| - ins t1, t1, 8, 16 // t1 = |P3|P3|P2|P2| - usw t0, 0(t5) - usw t1, 4(t5) - srl t0, t2, 16 // t0 = |X|X|P7|P6| - ins t2, t2, 16, 16 // t2 = |P5|P4|P5|P4| - ins t0, t0, 16, 16 // t0 = |P7|P6|P7|P6| - ins t2, t2, 8, 16 // t2 = |P5|P5|P4|P4| - ins t0, t0, 8, 16 // t0 = |P7|P7|P6|P6| - usw t2, 8(t5) - usw t0, 12(t5) - addiu t5, 16 - bne t5, t3, 1b - addiu t6, 8 - beqz t8, 3f - move t4, t8 -2: - lbu t1, 0(t6) - sb t1, 0(t5) - sb t1, 1(t5) - addiu t4, -2 - addiu t6, 1 - bgtz t4, 2b - addiu t5, 2 -3: - addiu t7, 4 - bne t9, t7, 0b - addiu a2, 4 -4: - j ra - nop -END(jsimd_h2v1_upsample_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_h2v2_upsample_mips_dspr2) -/* - * a0 - cinfo->max_v_samp_factor - * a1 - cinfo->output_width - * a2 - input_data - * a3 - output_data_ptr - */ - lw t7, 0(a3) - blez a0, 7f - andi t9, a1, 0xf // t9 = residual -0: - lw t6, 0(a2) // t6 = inptr - lw t5, 0(t7) // t5 = outptr - addu t8, t5, a1 // t8 = outptr end address - subu t8, t9 // t8 = end address - residual - beq t5, t8, 2f - move t4, t9 -1: - ulw t0, 0(t6) - srl t1, t0, 16 - ins t0, t0, 16, 16 - ins t0, t0, 8, 16 - ins t1, t1, 16, 16 - ins t1, t1, 8, 16 - ulw t2, 4(t6) - usw t0, 0(t5) - usw t1, 4(t5) - srl t3, t2, 16 - ins t2, t2, 16, 16 - ins t2, t2, 8, 16 - ins t3, t3, 16, 16 - ins t3, t3, 8, 16 - usw t2, 8(t5) - usw t3, 12(t5) - addiu t5, 16 - bne t5, t8, 1b - addiu t6, 8 - beqz t9, 3f - move t4, t9 -2: - lbu t0, 0(t6) - sb t0, 0(t5) - sb t0, 1(t5) - addiu t4, -2 - addiu t6, 1 - bgtz t4, 2b - addiu t5, 2 -3: - lw t6, 0(t7) // t6 = outptr[0] - lw t5, 4(t7) // t5 = outptr[1] - addu t4, t6, a1 // t4 = new end address - beq a1, t9, 5f - subu t8, t4, t9 -4: - ulw t0, 0(t6) - ulw t1, 4(t6) - ulw t2, 8(t6) - usw t0, 0(t5) - ulw t0, 12(t6) - usw t1, 4(t5) - usw t2, 8(t5) - usw t0, 12(t5) - addiu t6, 16 - bne t6, t8, 4b - addiu t5, 16 - beqz t9, 6f - nop -5: - lbu t0, 0(t6) - sb t0, 0(t5) - addiu t6, 1 - bne t6, t4, 5b - addiu t5, 1 -6: - addiu t7, 8 - addiu a0, -2 - bgtz a0, 0b - addiu a2, 4 -7: - j ra - nop -END(jsimd_h2v2_upsample_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_idct_islow_mips_dspr2) -/* - * a0 - coef_block - * a1 - compptr->dcttable - * a2 - output - * a3 - range_limit - */ - - SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - addiu sp, sp, -256 - move v0, sp - addiu v1, zero, 8 // v1 = DCTSIZE = 8 -1: - lh s4, 32(a0) // s4 = inptr[16] - lh s5, 64(a0) // s5 = inptr[32] - lh s6, 96(a0) // s6 = inptr[48] - lh t1, 112(a0) // t1 = inptr[56] - lh t7, 16(a0) // t7 = inptr[8] - lh t5, 80(a0) // t5 = inptr[40] - lh t3, 48(a0) // t3 = inptr[24] - or s4, s4, t1 - or s4, s4, t3 - or s4, s4, t5 - or s4, s4, t7 - or s4, s4, s5 - or s4, s4, s6 - bnez s4, 2f - addiu v1, v1, -1 - lh s5, 0(a1) // quantptr[DCTSIZE*0] - lh s6, 0(a0) // inptr[DCTSIZE*0] - mul s5, s5, s6 // DEQUANTIZE(inptr[0], quantptr[0]) - sll s5, s5, 2 - sw s5, 0(v0) - sw s5, 32(v0) - sw s5, 64(v0) - sw s5, 96(v0) - sw s5, 128(v0) - sw s5, 160(v0) - sw s5, 192(v0) - b 3f - sw s5, 224(v0) -2: - lh t0, 112(a1) - lh t2, 48(a1) - lh t4, 80(a1) - lh t6, 16(a1) - mul t0, t0, t1 // DEQUANTIZE(inptr[DCTSIZE*7],quant[DCTSIZE*7]) - mul t1, t2, t3 // DEQUANTIZE(inptr[DCTSIZE*3],quant[DCTSIZE*3]) - mul t2, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*5],quant[DCTSIZE*5]) - mul t3, t6, t7 // DEQUANTIZE(inptr[DCTSIZE*1],quant[DCTSIZE*1]) - lh t4, 32(a1) - lh t5, 32(a0) - lh t6, 96(a1) - lh t7, 96(a0) - addu s0, t0, t1 // z3 = tmp0 + tmp2 - addu s1, t1, t2 // z2 = tmp1 + tmp2 - addu s2, t2, t3 // z4 = tmp1 + tmp3 - addu s3, s0, s2 // z3 + z4 - addiu t9, zero, 9633 // FIX_1_175875602 - mul s3, s3, t9 // z5 = MULTIPLY(z3 + z4, FIX_1_175875602) - addu t8, t0, t3 // z1 = tmp0 + tmp3 - addiu t9, zero, 2446 // FIX_0_298631336 - mul t0, t0, t9 // tmp0 = MULTIPLY(tmp0, FIX_0_298631336) - addiu t9, zero, 16819 // FIX_2_053119869 - mul t2, t2, t9 // tmp1 = MULTIPLY(tmp1, FIX_2_053119869) - addiu t9, zero, 25172 // FIX_3_072711026 - mul t1, t1, t9 // tmp2 = MULTIPLY(tmp2, FIX_3_072711026) - addiu t9, zero, 12299 // FIX_1_501321110 - mul t3, t3, t9 // tmp3 = MULTIPLY(tmp3, FIX_1_501321110) - addiu t9, zero, 16069 // FIX_1_961570560 - mul s0, s0, t9 // -z3 = MULTIPLY(z3, FIX_1_961570560) - addiu t9, zero, 3196 // FIX_0_390180644 - mul s2, s2, t9 // -z4 = MULTIPLY(z4, FIX_0_390180644) - addiu t9, zero, 7373 // FIX_0_899976223 - mul t8, t8, t9 // -z1 = MULTIPLY(z1, FIX_0_899976223) - addiu t9, zero, 20995 // FIX_2_562915447 - mul s1, s1, t9 // -z2 = MULTIPLY(z2, FIX_2_562915447) - subu s0, s3, s0 // z3 += z5 - addu t0, t0, s0 // tmp0 += z3 - addu t1, t1, s0 // tmp2 += z3 - subu s2, s3, s2 // z4 += z5 - addu t2, t2, s2 // tmp1 += z4 - addu t3, t3, s2 // tmp3 += z4 - subu t0, t0, t8 // tmp0 += z1 - subu t1, t1, s1 // tmp2 += z2 - subu t2, t2, s1 // tmp1 += z2 - subu t3, t3, t8 // tmp3 += z1 - mul s0, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*2],quant[DCTSIZE*2]) - addiu t9, zero, 6270 // FIX_0_765366865 - mul s1, t6, t7 // DEQUANTIZE(inptr[DCTSIZE*6],quant[DCTSIZE*6]) - lh t4, 0(a1) - lh t5, 0(a0) - lh t6, 64(a1) - lh t7, 64(a0) - mul s2, t9, s0 // MULTIPLY(z2, FIX_0_765366865) - mul t5, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*0],quant[DCTSIZE*0]) - mul t6, t6, t7 // DEQUANTIZE(inptr[DCTSIZE*4],quant[DCTSIZE*4]) - addiu t9, zero, 4433 // FIX_0_541196100 - addu s3, s0, s1 // z2 + z3 - mul s3, s3, t9 // z1 = MULTIPLY(z2 + z3, FIX_0_541196100) - addiu t9, zero, 15137 // FIX_1_847759065 - mul t8, s1, t9 // MULTIPLY(z3, FIX_1_847759065) - addu t4, t5, t6 - subu t5, t5, t6 - sll t4, t4, 13 // tmp0 = (z2 + z3) << CONST_BITS - sll t5, t5, 13 // tmp1 = (z2 - z3) << CONST_BITS - addu t7, s3, s2 // tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865) - subu t6, s3, t8 // tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065) - addu s0, t4, t7 - subu s1, t4, t7 - addu s2, t5, t6 - subu s3, t5, t6 - addu t4, s0, t3 - subu s0, s0, t3 - addu t3, s2, t1 - subu s2, s2, t1 - addu t1, s3, t2 - subu s3, s3, t2 - addu t2, s1, t0 - subu s1, s1, t0 - shra_r.w t4, t4, 11 - shra_r.w t3, t3, 11 - shra_r.w t1, t1, 11 - shra_r.w t2, t2, 11 - shra_r.w s1, s1, 11 - shra_r.w s3, s3, 11 - shra_r.w s2, s2, 11 - shra_r.w s0, s0, 11 - sw t4, 0(v0) - sw t3, 32(v0) - sw t1, 64(v0) - sw t2, 96(v0) - sw s1, 128(v0) - sw s3, 160(v0) - sw s2, 192(v0) - sw s0, 224(v0) -3: - addiu a1, a1, 2 - addiu a0, a0, 2 - bgtz v1, 1b - addiu v0, v0, 4 - move v0, sp - addiu v1, zero, 8 -4: - lw t0, 8(v0) // z2 = (JLONG) wsptr[2] - lw t1, 24(v0) // z3 = (JLONG) wsptr[6] - lw t2, 0(v0) // (JLONG) wsptr[0] - lw t3, 16(v0) // (JLONG) wsptr[4] - lw s4, 4(v0) // (JLONG) wsptr[1] - lw s5, 12(v0) // (JLONG) wsptr[3] - lw s6, 20(v0) // (JLONG) wsptr[5] - lw s7, 28(v0) // (JLONG) wsptr[7] - or s4, s4, t0 - or s4, s4, t1 - or s4, s4, t3 - or s4, s4, s7 - or s4, s4, s5 - or s4, s4, s6 - bnez s4, 5f - addiu v1, v1, -1 - shra_r.w s5, t2, 5 - andi s5, s5, 0x3ff - lbux s5, s5(a3) - lw s1, 0(a2) - replv.qb s5, s5 - usw s5, 0(s1) - usw s5, 4(s1) - b 6f - nop -5: - addu t4, t0, t1 // z2 + z3 - addiu t8, zero, 4433 // FIX_0_541196100 - mul t5, t4, t8 // z1 = MULTIPLY(z2 + z3, FIX_0_541196100) - addiu t8, zero, 15137 // FIX_1_847759065 - mul t1, t1, t8 // MULTIPLY(z3, FIX_1_847759065) - addiu t8, zero, 6270 // FIX_0_765366865 - mul t0, t0, t8 // MULTIPLY(z2, FIX_0_765366865) - addu t4, t2, t3 // (JLONG) wsptr[0] + (JLONG) wsptr[4] - subu t2, t2, t3 // (JLONG) wsptr[0] - (JLONG) wsptr[4] - sll t4, t4, 13 // tmp0 = ((wsptr[0] + wsptr[4]) << CONST_BITS - sll t2, t2, 13 // tmp1 = ((wsptr[0] - wsptr[4]) << CONST_BITS - subu t1, t5, t1 // tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065) - subu t3, t2, t1 // tmp12 = tmp1 - tmp2 - addu t2, t2, t1 // tmp11 = tmp1 + tmp2 - addu t5, t5, t0 // tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865) - subu t1, t4, t5 // tmp13 = tmp0 - tmp3 - addu t0, t4, t5 // tmp10 = tmp0 + tmp3 - lw t4, 28(v0) // tmp0 = (JLONG) wsptr[7] - lw t6, 12(v0) // tmp2 = (JLONG) wsptr[3] - lw t5, 20(v0) // tmp1 = (JLONG) wsptr[5] - lw t7, 4(v0) // tmp3 = (JLONG) wsptr[1] - addu s0, t4, t6 // z3 = tmp0 + tmp2 - addiu t8, zero, 9633 // FIX_1_175875602 - addu s1, t5, t7 // z4 = tmp1 + tmp3 - addu s2, s0, s1 // z3 + z4 - mul s2, s2, t8 // z5 = MULTIPLY(z3 + z4, FIX_1_175875602) - addu s3, t4, t7 // z1 = tmp0 + tmp3 - addu t9, t5, t6 // z2 = tmp1 + tmp2 - addiu t8, zero, 16069 // FIX_1_961570560 - mul s0, s0, t8 // -z3 = MULTIPLY(z3, FIX_1_961570560) - addiu t8, zero, 3196 // FIX_0_390180644 - mul s1, s1, t8 // -z4 = MULTIPLY(z4, FIX_0_390180644) - addiu t8, zero, 2446 // FIX_0_298631336 - mul t4, t4, t8 // tmp0 = MULTIPLY(tmp0, FIX_0_298631336) - addiu t8, zero, 7373 // FIX_0_899976223 - mul s3, s3, t8 // -z1 = MULTIPLY(z1, FIX_0_899976223) - addiu t8, zero, 16819 // FIX_2_053119869 - mul t5, t5, t8 // tmp1 = MULTIPLY(tmp1, FIX_2_053119869) - addiu t8, zero, 20995 // FIX_2_562915447 - mul t9, t9, t8 // -z2 = MULTIPLY(z2, FIX_2_562915447) - addiu t8, zero, 25172 // FIX_3_072711026 - mul t6, t6, t8 // tmp2 = MULTIPLY(tmp2, FIX_3_072711026) - addiu t8, zero, 12299 // FIX_1_501321110 - mul t7, t7, t8 // tmp3 = MULTIPLY(tmp3, FIX_1_501321110) - subu s0, s2, s0 // z3 += z5 - subu s1, s2, s1 // z4 += z5 - addu t4, t4, s0 - subu t4, t4, s3 // tmp0 - addu t5, t5, s1 - subu t5, t5, t9 // tmp1 - addu t6, t6, s0 - subu t6, t6, t9 // tmp2 - addu t7, t7, s1 - subu t7, t7, s3 // tmp3 - addu s0, t0, t7 - subu t0, t0, t7 - addu t7, t2, t6 - subu t2, t2, t6 - addu t6, t3, t5 - subu t3, t3, t5 - addu t5, t1, t4 - subu t1, t1, t4 - shra_r.w s0, s0, 18 - shra_r.w t7, t7, 18 - shra_r.w t6, t6, 18 - shra_r.w t5, t5, 18 - shra_r.w t1, t1, 18 - shra_r.w t3, t3, 18 - shra_r.w t2, t2, 18 - shra_r.w t0, t0, 18 - andi s0, s0, 0x3ff - andi t7, t7, 0x3ff - andi t6, t6, 0x3ff - andi t5, t5, 0x3ff - andi t1, t1, 0x3ff - andi t3, t3, 0x3ff - andi t2, t2, 0x3ff - andi t0, t0, 0x3ff - lw s1, 0(a2) - lbux s0, s0(a3) - lbux t7, t7(a3) - lbux t6, t6(a3) - lbux t5, t5(a3) - lbux t1, t1(a3) - lbux t3, t3(a3) - lbux t2, t2(a3) - lbux t0, t0(a3) - sb s0, 0(s1) - sb t7, 1(s1) - sb t6, 2(s1) - sb t5, 3(s1) - sb t1, 4(s1) - sb t3, 5(s1) - sb t2, 6(s1) - sb t0, 7(s1) -6: - addiu v0, v0, 32 - bgtz v1, 4b - addiu a2, a2, 4 - addiu sp, sp, 256 - - RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - j ra - nop - -END(jsimd_idct_islow_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_idct_ifast_cols_mips_dspr2) -/* - * a0 - inptr - * a1 - quantptr - * a2 - wsptr - * a3 - mips_idct_ifast_coefs - */ - - SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - addiu t9, a0, 16 // end address - or AT, a3, zero - -0: - lw s0, 0(a1) // quantptr[DCTSIZE*0] - lw t0, 0(a0) // inptr[DCTSIZE*0] - lw t1, 16(a0) // inptr[DCTSIZE*1] - muleq_s.w.phl v0, t0, s0 // tmp0 ... - lw t2, 32(a0) // inptr[DCTSIZE*2] - lw t3, 48(a0) // inptr[DCTSIZE*3] - lw t4, 64(a0) // inptr[DCTSIZE*4] - lw t5, 80(a0) // inptr[DCTSIZE*5] - muleq_s.w.phr t0, t0, s0 // ... tmp0 ... - lw t6, 96(a0) // inptr[DCTSIZE*6] - lw t7, 112(a0) // inptr[DCTSIZE*7] - or s4, t1, t2 - or s5, t3, t4 - bnez s4, 1f - ins t0, v0, 16, 16 // ... tmp0 - bnez s5, 1f - or s6, t5, t6 - or s6, s6, t7 - bnez s6, 1f - sw t0, 0(a2) // wsptr[DCTSIZE*0] - sw t0, 16(a2) // wsptr[DCTSIZE*1] - sw t0, 32(a2) // wsptr[DCTSIZE*2] - sw t0, 48(a2) // wsptr[DCTSIZE*3] - sw t0, 64(a2) // wsptr[DCTSIZE*4] - sw t0, 80(a2) // wsptr[DCTSIZE*5] - sw t0, 96(a2) // wsptr[DCTSIZE*6] - sw t0, 112(a2) // wsptr[DCTSIZE*7] - addiu a0, a0, 4 - b 2f - addiu a1, a1, 4 - -1: - lw s1, 32(a1) // quantptr[DCTSIZE*2] - lw s2, 64(a1) // quantptr[DCTSIZE*4] - muleq_s.w.phl v0, t2, s1 // tmp1 ... - muleq_s.w.phr t2, t2, s1 // ... tmp1 ... - lw s0, 16(a1) // quantptr[DCTSIZE*1] - lw s1, 48(a1) // quantptr[DCTSIZE*3] - lw s3, 96(a1) // quantptr[DCTSIZE*6] - muleq_s.w.phl v1, t4, s2 // tmp2 ... - muleq_s.w.phr t4, t4, s2 // ... tmp2 ... - lw s2, 80(a1) // quantptr[DCTSIZE*5] - lw t8, 4(AT) // FIX(1.414213562) - ins t2, v0, 16, 16 // ... tmp1 - muleq_s.w.phl v0, t6, s3 // tmp3 ... - muleq_s.w.phr t6, t6, s3 // ... tmp3 ... - ins t4, v1, 16, 16 // ... tmp2 - addq.ph s4, t0, t4 // tmp10 - subq.ph s5, t0, t4 // tmp11 - ins t6, v0, 16, 16 // ... tmp3 - subq.ph s6, t2, t6 // tmp12 ... - addq.ph s7, t2, t6 // tmp13 - mulq_s.ph s6, s6, t8 // ... tmp12 ... - addq.ph t0, s4, s7 // tmp0 - subq.ph t6, s4, s7 // tmp3 - muleq_s.w.phl v0, t1, s0 // tmp4 ... - muleq_s.w.phr t1, t1, s0 // ... tmp4 ... - shll_s.ph s6, s6, 1 // x2 - lw s3, 112(a1) // quantptr[DCTSIZE*7] - subq.ph s6, s6, s7 // ... tmp12 - muleq_s.w.phl v1, t7, s3 // tmp7 ... - muleq_s.w.phr t7, t7, s3 // ... tmp7 ... - ins t1, v0, 16, 16 // ... tmp4 - addq.ph t2, s5, s6 // tmp1 - subq.ph t4, s5, s6 // tmp2 - muleq_s.w.phl v0, t5, s2 // tmp6 ... - muleq_s.w.phr t5, t5, s2 // ... tmp6 ... - ins t7, v1, 16, 16 // ... tmp7 - addq.ph s5, t1, t7 // z11 - subq.ph s6, t1, t7 // z12 - muleq_s.w.phl v1, t3, s1 // tmp5 ... - muleq_s.w.phr t3, t3, s1 // ... tmp5 ... - ins t5, v0, 16, 16 // ... tmp6 - ins t3, v1, 16, 16 // ... tmp5 - addq.ph s7, t5, t3 // z13 - subq.ph v0, t5, t3 // z10 - addq.ph t7, s5, s7 // tmp7 - subq.ph s5, s5, s7 // tmp11 ... - addq.ph v1, v0, s6 // z5 ... - mulq_s.ph s5, s5, t8 // ... tmp11 - lw t8, 8(AT) // FIX(1.847759065) - lw s4, 0(AT) // FIX(1.082392200) - addq.ph s0, t0, t7 - subq.ph s1, t0, t7 - mulq_s.ph v1, v1, t8 // ... z5 - shll_s.ph s5, s5, 1 // x2 - lw t8, 12(AT) // FIX(-2.613125930) - sw s0, 0(a2) // wsptr[DCTSIZE*0] - shll_s.ph v0, v0, 1 // x4 - mulq_s.ph v0, v0, t8 // tmp12 ... - mulq_s.ph s4, s6, s4 // tmp10 ... - shll_s.ph v1, v1, 1 // x2 - addiu a0, a0, 4 - addiu a1, a1, 4 - sw s1, 112(a2) // wsptr[DCTSIZE*7] - shll_s.ph s6, v0, 1 // x4 - shll_s.ph s4, s4, 1 // x2 - addq.ph s6, s6, v1 // ... tmp12 - subq.ph t5, s6, t7 // tmp6 - subq.ph s4, s4, v1 // ... tmp10 - subq.ph t3, s5, t5 // tmp5 - addq.ph s2, t2, t5 - addq.ph t1, s4, t3 // tmp4 - subq.ph s3, t2, t5 - sw s2, 16(a2) // wsptr[DCTSIZE*1] - sw s3, 96(a2) // wsptr[DCTSIZE*6] - addq.ph v0, t4, t3 - subq.ph v1, t4, t3 - sw v0, 32(a2) // wsptr[DCTSIZE*2] - sw v1, 80(a2) // wsptr[DCTSIZE*5] - addq.ph v0, t6, t1 - subq.ph v1, t6, t1 - sw v0, 64(a2) // wsptr[DCTSIZE*4] - sw v1, 48(a2) // wsptr[DCTSIZE*3] - -2: - bne a0, t9, 0b - addiu a2, a2, 4 - - RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - j ra - nop - -END(jsimd_idct_ifast_cols_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_idct_ifast_rows_mips_dspr2) -/* - * a0 - wsptr - * a1 - output_buf - * a2 - output_col - * a3 - mips_idct_ifast_coefs - */ - - SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8, a3 - - addiu t9, a0, 128 // end address - lui s8, 0x8080 - ori s8, s8, 0x8080 - -0: - lw AT, 36(sp) // restore $a3 (mips_idct_ifast_coefs) - lw t0, 0(a0) // wsptr[DCTSIZE*0+0/1] b a - lw s0, 16(a0) // wsptr[DCTSIZE*1+0/1] B A - lw t2, 4(a0) // wsptr[DCTSIZE*0+2/3] d c - lw s2, 20(a0) // wsptr[DCTSIZE*1+2/3] D C - lw t4, 8(a0) // wsptr[DCTSIZE*0+4/5] f e - lw s4, 24(a0) // wsptr[DCTSIZE*1+4/5] F E - lw t6, 12(a0) // wsptr[DCTSIZE*0+6/7] h g - lw s6, 28(a0) // wsptr[DCTSIZE*1+6/7] H G - precrq.ph.w t1, s0, t0 // B b - ins t0, s0, 16, 16 // A a - bnez t1, 1f - or s0, t2, s2 - bnez s0, 1f - or s0, t4, s4 - bnez s0, 1f - or s0, t6, s6 - bnez s0, 1f - shll_s.ph s0, t0, 2 // A a - lw a3, 0(a1) - lw AT, 4(a1) - precrq.ph.w t0, s0, s0 // A A - ins s0, s0, 16, 16 // a a - addu a3, a3, a2 - addu AT, AT, a2 - precrq.qb.ph t0, t0, t0 // A A A A - precrq.qb.ph s0, s0, s0 // a a a a - addu.qb s0, s0, s8 - addu.qb t0, t0, s8 - sw s0, 0(a3) - sw s0, 4(a3) - sw t0, 0(AT) - sw t0, 4(AT) - addiu a0, a0, 32 - bne a0, t9, 0b - addiu a1, a1, 8 - b 2f - nop - -1: - precrq.ph.w t3, s2, t2 - ins t2, s2, 16, 16 - precrq.ph.w t5, s4, t4 - ins t4, s4, 16, 16 - precrq.ph.w t7, s6, t6 - ins t6, s6, 16, 16 - lw t8, 4(AT) // FIX(1.414213562) - addq.ph s4, t0, t4 // tmp10 - subq.ph s5, t0, t4 // tmp11 - subq.ph s6, t2, t6 // tmp12 ... - addq.ph s7, t2, t6 // tmp13 - mulq_s.ph s6, s6, t8 // ... tmp12 ... - addq.ph t0, s4, s7 // tmp0 - subq.ph t6, s4, s7 // tmp3 - shll_s.ph s6, s6, 1 // x2 - subq.ph s6, s6, s7 // ... tmp12 - addq.ph t2, s5, s6 // tmp1 - subq.ph t4, s5, s6 // tmp2 - addq.ph s5, t1, t7 // z11 - subq.ph s6, t1, t7 // z12 - addq.ph s7, t5, t3 // z13 - subq.ph v0, t5, t3 // z10 - addq.ph t7, s5, s7 // tmp7 - subq.ph s5, s5, s7 // tmp11 ... - addq.ph v1, v0, s6 // z5 ... - mulq_s.ph s5, s5, t8 // ... tmp11 - lw t8, 8(AT) // FIX(1.847759065) - lw s4, 0(AT) // FIX(1.082392200) - addq.ph s0, t0, t7 // tmp0 + tmp7 - subq.ph s7, t0, t7 // tmp0 - tmp7 - mulq_s.ph v1, v1, t8 // ... z5 - lw a3, 0(a1) - lw t8, 12(AT) // FIX(-2.613125930) - shll_s.ph s5, s5, 1 // x2 - addu a3, a3, a2 - shll_s.ph v0, v0, 1 // x4 - mulq_s.ph v0, v0, t8 // tmp12 ... - mulq_s.ph s4, s6, s4 // tmp10 ... - shll_s.ph v1, v1, 1 // x2 - addiu a0, a0, 32 - addiu a1, a1, 8 - shll_s.ph s6, v0, 1 // x4 - shll_s.ph s4, s4, 1 // x2 - addq.ph s6, s6, v1 // ... tmp12 - shll_s.ph s0, s0, 2 - subq.ph t5, s6, t7 // tmp6 - subq.ph s4, s4, v1 // ... tmp10 - subq.ph t3, s5, t5 // tmp5 - shll_s.ph s7, s7, 2 - addq.ph t1, s4, t3 // tmp4 - addq.ph s1, t2, t5 // tmp1 + tmp6 - subq.ph s6, t2, t5 // tmp1 - tmp6 - addq.ph s2, t4, t3 // tmp2 + tmp5 - subq.ph s5, t4, t3 // tmp2 - tmp5 - addq.ph s4, t6, t1 // tmp3 + tmp4 - subq.ph s3, t6, t1 // tmp3 - tmp4 - shll_s.ph s1, s1, 2 - shll_s.ph s2, s2, 2 - shll_s.ph s3, s3, 2 - shll_s.ph s4, s4, 2 - shll_s.ph s5, s5, 2 - shll_s.ph s6, s6, 2 - precrq.ph.w t0, s1, s0 // B A - ins s0, s1, 16, 16 // b a - precrq.ph.w t2, s3, s2 // D C - ins s2, s3, 16, 16 // d c - precrq.ph.w t4, s5, s4 // F E - ins s4, s5, 16, 16 // f e - precrq.ph.w t6, s7, s6 // H G - ins s6, s7, 16, 16 // h g - precrq.qb.ph t0, t2, t0 // D C B A - precrq.qb.ph s0, s2, s0 // d c b a - precrq.qb.ph t4, t6, t4 // H G F E - precrq.qb.ph s4, s6, s4 // h g f e - addu.qb s0, s0, s8 - addu.qb s4, s4, s8 - sw s0, 0(a3) // outptr[0/1/2/3] d c b a - sw s4, 4(a3) // outptr[4/5/6/7] h g f e - lw a3, -4(a1) - addu.qb t0, t0, s8 - addu a3, a3, a2 - addu.qb t4, t4, s8 - sw t0, 0(a3) // outptr[0/1/2/3] D C B A - bne a0, t9, 0b - sw t4, 4(a3) // outptr[4/5/6/7] H G F E - -2: - - RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8, a3 - - j ra - nop - -END(jsimd_idct_ifast_rows_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_fdct_islow_mips_dspr2) -/* - * a0 - data - */ - - SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8 - - lui t0, 6437 - ori t0, 2260 - lui t1, 9633 - ori t1, 11363 - lui t2, 0xd39e - ori t2, 0xe6dc - lui t3, 0xf72d - ori t3, 9633 - lui t4, 2261 - ori t4, 9633 - lui t5, 0xd39e - ori t5, 6437 - lui t6, 9633 - ori t6, 0xd39d - lui t7, 0xe6dc - ori t7, 2260 - lui t8, 4433 - ori t8, 10703 - lui t9, 0xd630 - ori t9, 4433 - li s8, 8 - move a1, a0 -1: - lw s0, 0(a1) // tmp0 = 1|0 - lw s1, 4(a1) // tmp1 = 3|2 - lw s2, 8(a1) // tmp2 = 5|4 - lw s3, 12(a1) // tmp3 = 7|6 - packrl.ph s1, s1, s1 // tmp1 = 2|3 - packrl.ph s3, s3, s3 // tmp3 = 6|7 - subq.ph s7, s1, s2 // tmp7 = 2-5|3-4 = t5|t4 - subq.ph s5, s0, s3 // tmp5 = 1-6|0-7 = t6|t7 - mult $0, $0 // ac0 = 0 - dpa.w.ph $ac0, s7, t0 // ac0 += t5* 6437 + t4* 2260 - dpa.w.ph $ac0, s5, t1 // ac0 += t6* 9633 + t7* 11363 - mult $ac1, $0, $0 // ac1 = 0 - dpa.w.ph $ac1, s7, t2 // ac1 += t5*-11362 + t4* -6436 - dpa.w.ph $ac1, s5, t3 // ac1 += t6* -2259 + t7* 9633 - mult $ac2, $0, $0 // ac2 = 0 - dpa.w.ph $ac2, s7, t4 // ac2 += t5* 2261 + t4* 9633 - dpa.w.ph $ac2, s5, t5 // ac2 += t6*-11362 + t7* 6437 - mult $ac3, $0, $0 // ac3 = 0 - dpa.w.ph $ac3, s7, t6 // ac3 += t5* 9633 + t4*-11363 - dpa.w.ph $ac3, s5, t7 // ac3 += t6* -6436 + t7* 2260 - addq.ph s6, s1, s2 // tmp6 = 2+5|3+4 = t2|t3 - addq.ph s4, s0, s3 // tmp4 = 1+6|0+7 = t1|t0 - extr_r.w s0, $ac0, 11 // tmp0 = (ac0 + 1024) >> 11 - extr_r.w s1, $ac1, 11 // tmp1 = (ac1 + 1024) >> 11 - extr_r.w s2, $ac2, 11 // tmp2 = (ac2 + 1024) >> 11 - extr_r.w s3, $ac3, 11 // tmp3 = (ac3 + 1024) >> 11 - addq.ph s5, s4, s6 // tmp5 = t1+t2|t0+t3 = t11|t10 - subq.ph s7, s4, s6 // tmp7 = t1-t2|t0-t3 = t12|t13 - sh s0, 2(a1) - sh s1, 6(a1) - sh s2, 10(a1) - sh s3, 14(a1) - mult $0, $0 // ac0 = 0 - dpa.w.ph $ac0, s7, t8 // ac0 += t12* 4433 + t13* 10703 - mult $ac1, $0, $0 // ac1 = 0 - dpa.w.ph $ac1, s7, t9 // ac1 += t12*-10704 + t13* 4433 - sra s4, s5, 16 // tmp4 = t11 - addiu a1, a1, 16 - addiu s8, s8, -1 - extr_r.w s0, $ac0, 11 // tmp0 = (ac0 + 1024) >> 11 - extr_r.w s1, $ac1, 11 // tmp1 = (ac1 + 1024) >> 11 - addu s2, s5, s4 // tmp2 = t10 + t11 - subu s3, s5, s4 // tmp3 = t10 - t11 - sll s2, s2, 2 // tmp2 = (t10 + t11) << 2 - sll s3, s3, 2 // tmp3 = (t10 - t11) << 2 - sh s2, -16(a1) - sh s3, -8(a1) - sh s0, -12(a1) - bgtz s8, 1b - sh s1, -4(a1) - li t0, 2260 - li t1, 11363 - li t2, 9633 - li t3, 6436 - li t4, 6437 - li t5, 2261 - li t6, 11362 - li t7, 2259 - li t8, 4433 - li t9, 10703 - li a1, 10704 - li s8, 8 - -2: - lh a2, 0(a0) // 0 - lh a3, 16(a0) // 8 - lh v0, 32(a0) // 16 - lh v1, 48(a0) // 24 - lh s4, 64(a0) // 32 - lh s5, 80(a0) // 40 - lh s6, 96(a0) // 48 - lh s7, 112(a0) // 56 - addu s2, v0, s5 // tmp2 = 16 + 40 - subu s5, v0, s5 // tmp5 = 16 - 40 - addu s3, v1, s4 // tmp3 = 24 + 32 - subu s4, v1, s4 // tmp4 = 24 - 32 - addu s0, a2, s7 // tmp0 = 0 + 56 - subu s7, a2, s7 // tmp7 = 0 - 56 - addu s1, a3, s6 // tmp1 = 8 + 48 - subu s6, a3, s6 // tmp6 = 8 - 48 - addu a2, s0, s3 // tmp10 = tmp0 + tmp3 - subu v1, s0, s3 // tmp13 = tmp0 - tmp3 - addu a3, s1, s2 // tmp11 = tmp1 + tmp2 - subu v0, s1, s2 // tmp12 = tmp1 - tmp2 - mult s7, t1 // ac0 = tmp7 * c1 - madd s4, t0 // ac0 += tmp4 * c0 - madd s5, t4 // ac0 += tmp5 * c4 - madd s6, t2 // ac0 += tmp6 * c2 - mult $ac1, s7, t2 // ac1 = tmp7 * c2 - msub $ac1, s4, t3 // ac1 -= tmp4 * c3 - msub $ac1, s5, t6 // ac1 -= tmp5 * c6 - msub $ac1, s6, t7 // ac1 -= tmp6 * c7 - mult $ac2, s7, t4 // ac2 = tmp7 * c4 - madd $ac2, s4, t2 // ac2 += tmp4 * c2 - madd $ac2, s5, t5 // ac2 += tmp5 * c5 - msub $ac2, s6, t6 // ac2 -= tmp6 * c6 - mult $ac3, s7, t0 // ac3 = tmp7 * c0 - msub $ac3, s4, t1 // ac3 -= tmp4 * c1 - madd $ac3, s5, t2 // ac3 += tmp5 * c2 - msub $ac3, s6, t3 // ac3 -= tmp6 * c3 - extr_r.w s0, $ac0, 15 // tmp0 = (ac0 + 16384) >> 15 - extr_r.w s1, $ac1, 15 // tmp1 = (ac1 + 16384) >> 15 - extr_r.w s2, $ac2, 15 // tmp2 = (ac2 + 16384) >> 15 - extr_r.w s3, $ac3, 15 // tmp3 = (ac3 + 16384) >> 15 - addiu s8, s8, -1 - addu s4, a2, a3 // tmp4 = tmp10 + tmp11 - subu s5, a2, a3 // tmp5 = tmp10 - tmp11 - sh s0, 16(a0) - sh s1, 48(a0) - sh s2, 80(a0) - sh s3, 112(a0) - mult v0, t8 // ac0 = tmp12 * c8 - madd v1, t9 // ac0 += tmp13 * c9 - mult $ac1, v1, t8 // ac1 = tmp13 * c8 - msub $ac1, v0, a1 // ac1 -= tmp12 * c10 - addiu a0, a0, 2 - extr_r.w s6, $ac0, 15 // tmp6 = (ac0 + 16384) >> 15 - extr_r.w s7, $ac1, 15 // tmp7 = (ac1 + 16384) >> 15 - shra_r.w s4, s4, 2 // tmp4 = (tmp4 + 2) >> 2 - shra_r.w s5, s5, 2 // tmp5 = (tmp5 + 2) >> 2 - sh s4, -2(a0) - sh s5, 62(a0) - sh s6, 30(a0) - bgtz s8, 2b - sh s7, 94(a0) - - RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8 - - jr ra - nop - -END(jsimd_fdct_islow_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_fdct_ifast_mips_dspr2) -/* - * a0 - data - */ - .set at - SAVE_REGS_ON_STACK 8, s0, s1 - li a1, 0x014e014e // FIX_1_306562965 (334 << 16)|(334 & 0xffff) - li a2, 0x008b008b // FIX_0_541196100 (139 << 16)|(139 & 0xffff) - li a3, 0x00620062 // FIX_0_382683433 (98 << 16) |(98 & 0xffff) - li s1, 0x00b500b5 // FIX_0_707106781 (181 << 16)|(181 & 0xffff) - - move v0, a0 - addiu v1, v0, 128 // end address - -0: - lw t0, 0(v0) // tmp0 = 1|0 - lw t1, 4(v0) // tmp1 = 3|2 - lw t2, 8(v0) // tmp2 = 5|4 - lw t3, 12(v0) // tmp3 = 7|6 - packrl.ph t1, t1, t1 // tmp1 = 2|3 - packrl.ph t3, t3, t3 // tmp3 = 6|7 - subq.ph t7, t1, t2 // tmp7 = 2-5|3-4 = t5|t4 - subq.ph t5, t0, t3 // tmp5 = 1-6|0-7 = t6|t7 - addq.ph t6, t1, t2 // tmp6 = 2+5|3+4 = t2|t3 - addq.ph t4, t0, t3 // tmp4 = 1+6|0+7 = t1|t0 - addq.ph t8, t4, t6 // tmp5 = t1+t2|t0+t3 = t11|t10 - subq.ph t9, t4, t6 // tmp7 = t1-t2|t0-t3 = t12|t13 - sra t4, t8, 16 // tmp4 = t11 - mult $0, $0 // ac0 = 0 - dpa.w.ph $ac0, t9, s1 - mult $ac1, $0, $0 // ac1 = 0 - dpa.w.ph $ac1, t7, a3 // ac1 += t4*98 + t5*98 - dpsx.w.ph $ac1, t5, a3 // ac1 += t6*98 + t7*98 - mult $ac2, $0, $0 // ac2 = 0 - dpa.w.ph $ac2, t7, a2 // ac2 += t4*139 + t5*139 - mult $ac3, $0, $0 // ac3 = 0 - dpa.w.ph $ac3, t5, a1 // ac3 += t6*334 + t7*334 - precrq.ph.w t0, t5, t7 // t0 = t5|t6 - addq.ph t2, t8, t4 // tmp2 = t10 + t11 - subq.ph t3, t8, t4 // tmp3 = t10 - t11 - extr.w t4, $ac0, 8 - mult $0, $0 // ac0 = 0 - dpa.w.ph $ac0, t0, s1 // ac0 += t5*181 + t6*181 - extr.w t0, $ac1, 8 // t0 = z5 - extr.w t1, $ac2, 8 // t1 = MULTIPLY(tmp10, 139) - extr.w t7, $ac3, 8 // t2 = MULTIPLY(tmp12, 334) - extr.w t8, $ac0, 8 // t8 = z3 = MULTIPLY(tmp11, 181) - add t6, t1, t0 // t6 = z2 - add t7, t7, t0 // t7 = z4 - subq.ph t0, t5, t8 // t0 = z13 = tmp7 - z3 - addq.ph t8, t5, t8 // t9 = z11 = tmp7 + z3 - addq.ph t1, t0, t6 // t1 = z13 + z2 - subq.ph t6, t0, t6 // t6 = z13 - z2 - addq.ph t0, t8, t7 // t0 = z11 + z4 - subq.ph t7, t8, t7 // t7 = z11 - z4 - addq.ph t5, t4, t9 - subq.ph t4, t9, t4 - sh t2, 0(v0) - sh t5, 4(v0) - sh t3, 8(v0) - sh t4, 12(v0) - sh t1, 10(v0) - sh t6, 6(v0) - sh t0, 2(v0) - sh t7, 14(v0) - addiu v0, 16 - bne v1, v0, 0b - nop - move v0, a0 - addiu v1, v0, 16 - -1: - lh t0, 0(v0) // 0 - lh t1, 16(v0) // 8 - lh t2, 32(v0) // 16 - lh t3, 48(v0) // 24 - lh t4, 64(v0) // 32 - lh t5, 80(v0) // 40 - lh t6, 96(v0) // 48 - lh t7, 112(v0) // 56 - add t8, t0, t7 // t8 = tmp0 - sub t7, t0, t7 // t7 = tmp7 - add t0, t1, t6 // t0 = tmp1 - sub t1, t1, t6 // t1 = tmp6 - add t6, t2, t5 // t6 = tmp2 - sub t5, t2, t5 // t5 = tmp5 - add t2, t3, t4 // t2 = tmp3 - sub t3, t3, t4 // t3 = tmp4 - add t4, t8, t2 // t4 = tmp10 = tmp0 + tmp3 - sub t8, t8, t2 // t8 = tmp13 = tmp0 - tmp3 - sub s0, t0, t6 // s0 = tmp12 = tmp1 - tmp2 - ins t8, s0, 16, 16 // t8 = tmp12|tmp13 - add t2, t0, t6 // t2 = tmp11 = tmp1 + tmp2 - mult $0, $0 // ac0 = 0 - dpa.w.ph $ac0, t8, s1 // ac0 += t12*181 + t13*181 - add s0, t4, t2 // t8 = tmp10+tmp11 - sub t4, t4, t2 // t4 = tmp10-tmp11 - sh s0, 0(v0) - sh t4, 64(v0) - extr.w t2, $ac0, 8 // z1 = MULTIPLY(tmp12+tmp13,FIX_0_707106781) - addq.ph t4, t8, t2 // t9 = tmp13 + z1 - subq.ph t8, t8, t2 // t2 = tmp13 - z1 - sh t4, 32(v0) - sh t8, 96(v0) - add t3, t3, t5 // t3 = tmp10 = tmp4 + tmp5 - add t0, t5, t1 // t0 = tmp11 = tmp5 + tmp6 - add t1, t1, t7 // t1 = tmp12 = tmp6 + tmp7 - andi t4, a1, 0xffff - mul s0, t1, t4 - sra s0, s0, 8 // s0 = z4 = MULTIPLY(tmp12, FIX_1_306562965) - ins t1, t3, 16, 16 // t1 = tmp10|tmp12 - mult $0, $0 // ac0 = 0 - mulsa.w.ph $ac0, t1, a3 // ac0 += t10*98 - t12*98 - extr.w t8, $ac0, 8 // z5 = MULTIPLY(tmp10-tmp12,FIX_0_382683433) - add t2, t7, t8 // t2 = tmp7 + z5 - sub t7, t7, t8 // t7 = tmp7 - z5 - andi t4, a2, 0xffff - mul t8, t3, t4 - sra t8, t8, 8 // t8 = z2 = MULTIPLY(tmp10, FIX_0_541196100) - andi t4, s1, 0xffff - mul t6, t0, t4 - sra t6, t6, 8 // t6 = z3 = MULTIPLY(tmp11, FIX_0_707106781) - add t0, t6, t8 // t0 = z3 + z2 - sub t1, t6, t8 // t1 = z3 - z2 - add t3, t6, s0 // t3 = z3 + z4 - sub t4, t6, s0 // t4 = z3 - z4 - sub t5, t2, t1 // t5 = dataptr[5] - sub t6, t7, t0 // t6 = dataptr[3] - add t3, t2, t3 // t3 = dataptr[1] - add t4, t7, t4 // t4 = dataptr[7] - sh t5, 80(v0) - sh t6, 48(v0) - sh t3, 16(v0) - sh t4, 112(v0) - addiu v0, 2 - bne v0, v1, 1b - nop - - RESTORE_REGS_FROM_STACK 8, s0, s1 - - j ra - nop -END(jsimd_fdct_ifast_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_quantize_mips_dspr2) -/* - * a0 - coef_block - * a1 - divisors - * a2 - workspace - */ - - .set at - - SAVE_REGS_ON_STACK 16, s0, s1, s2 - - addiu v0, a2, 124 // v0 = workspace_end - lh t0, 0(a2) - lh t1, 0(a1) - lh t2, 128(a1) - sra t3, t0, 15 - sll t3, t3, 1 - addiu t3, t3, 1 - mul t0, t0, t3 - lh t4, 384(a1) - lh t5, 130(a1) - lh t6, 2(a2) - lh t7, 2(a1) - lh t8, 386(a1) - -1: - andi t1, 0xffff - add t9, t0, t2 - andi t9, 0xffff - mul v1, t9, t1 - sra s0, t6, 15 - sll s0, s0, 1 - addiu s0, s0, 1 - addiu t9, t4, 16 - srav v1, v1, t9 - mul v1, v1, t3 - mul t6, t6, s0 - andi t7, 0xffff - addiu a2, a2, 4 - addiu a1, a1, 4 - add s1, t6, t5 - andi s1, 0xffff - sh v1, 0(a0) - - mul s2, s1, t7 - addiu s1, t8, 16 - srav s2, s2, s1 - mul s2,s2, s0 - lh t0, 0(a2) - lh t1, 0(a1) - sra t3, t0, 15 - sll t3, t3, 1 - addiu t3, t3, 1 - mul t0, t0, t3 - lh t2, 128(a1) - lh t4, 384(a1) - lh t5, 130(a1) - lh t8, 386(a1) - lh t6, 2(a2) - lh t7, 2(a1) - sh s2, 2(a0) - lh t0, 0(a2) - sra t3, t0, 15 - sll t3, t3, 1 - addiu t3, t3, 1 - mul t0, t0,t3 - bne a2, v0, 1b - addiu a0, a0, 4 - - andi t1, 0xffff - add t9, t0, t2 - andi t9, 0xffff - mul v1, t9, t1 - sra s0, t6, 15 - sll s0, s0, 1 - addiu s0, s0, 1 - addiu t9, t4, 16 - srav v1, v1, t9 - mul v1, v1, t3 - mul t6, t6, s0 - andi t7, 0xffff - sh v1, 0(a0) - add s1, t6, t5 - andi s1, 0xffff - mul s2, s1, t7 - addiu s1, t8, 16 - addiu a2, a2, 4 - addiu a1, a1, 4 - srav s2, s2, s1 - mul s2, s2, s0 - sh s2, 2(a0) - - RESTORE_REGS_FROM_STACK 16, s0, s1, s2 - - j ra - nop - -END(jsimd_quantize_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_quantize_float_mips_dspr2) -/* - * a0 - coef_block - * a1 - divisors - * a2 - workspace - */ - - .set at - - li t1, 0x46800100 //integer representation 16384.5 - mtc1 t1, f0 - li t0, 63 -0: - lwc1 f2, 0(a2) - lwc1 f10, 0(a1) - lwc1 f4, 4(a2) - lwc1 f12, 4(a1) - lwc1 f6, 8(a2) - lwc1 f14, 8(a1) - lwc1 f8, 12(a2) - lwc1 f16, 12(a1) - madd.s f2, f0, f2, f10 - madd.s f4, f0, f4, f12 - madd.s f6, f0, f6, f14 - madd.s f8, f0, f8, f16 - lwc1 f10, 16(a1) - lwc1 f12, 20(a1) - trunc.w.s f2, f2 - trunc.w.s f4, f4 - trunc.w.s f6, f6 - trunc.w.s f8, f8 - lwc1 f14, 24(a1) - lwc1 f16, 28(a1) - mfc1 t1, f2 - mfc1 t2, f4 - mfc1 t3, f6 - mfc1 t4, f8 - lwc1 f2, 16(a2) - lwc1 f4, 20(a2) - lwc1 f6, 24(a2) - lwc1 f8, 28(a2) - madd.s f2, f0, f2, f10 - madd.s f4, f0, f4, f12 - madd.s f6, f0, f6, f14 - madd.s f8, f0, f8, f16 - addiu t1, t1, -16384 - addiu t2, t2, -16384 - addiu t3, t3, -16384 - addiu t4, t4, -16384 - trunc.w.s f2, f2 - trunc.w.s f4, f4 - trunc.w.s f6, f6 - trunc.w.s f8, f8 - sh t1, 0(a0) - sh t2, 2(a0) - sh t3, 4(a0) - sh t4, 6(a0) - mfc1 t1, f2 - mfc1 t2, f4 - mfc1 t3, f6 - mfc1 t4, f8 - addiu t0, t0, -8 - addiu a2, a2, 32 - addiu a1, a1, 32 - addiu t1, t1, -16384 - addiu t2, t2, -16384 - addiu t3, t3, -16384 - addiu t4, t4, -16384 - sh t1, 8(a0) - sh t2, 10(a0) - sh t3, 12(a0) - sh t4, 14(a0) - bgez t0, 0b - addiu a0, a0, 16 - - j ra - nop - -END(jsimd_quantize_float_mips_dspr2) -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_idct_2x2_mips_dspr2) -/* - * a0 - compptr->dct_table - * a1 - coef_block - * a2 - output_buf - * a3 - output_col - */ - .set at - - SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, s5 - - addiu sp, sp, -40 - move v0, sp - addiu s2, zero, 29692 - addiu s3, zero, -10426 - addiu s4, zero, 6967 - addiu s5, zero, -5906 - lh t0, 0(a1) // t0 = inptr[DCTSIZE*0] - lh t5, 0(a0) // t5 = quantptr[DCTSIZE*0] - lh t1, 48(a1) // t1 = inptr[DCTSIZE*3] - lh t6, 48(a0) // t6 = quantptr[DCTSIZE*3] - mul t4, t5, t0 - lh t0, 16(a1) // t0 = inptr[DCTSIZE*1] - lh t5, 16(a0) // t5 = quantptr[DCTSIZE*1] - mul t6, t6, t1 - mul t5, t5, t0 - lh t2, 80(a1) // t2 = inptr[DCTSIZE*5] - lh t7, 80(a0) // t7 = quantptr[DCTSIZE*5] - lh t3, 112(a1) // t3 = inptr[DCTSIZE*7] - lh t8, 112(a0) // t8 = quantptr[DCTSIZE*7] - mul t7, t7, t2 - mult zero, zero - mul t8, t8, t3 - li s0, 0x73FCD746 // s0 = (29692 << 16) | (-10426 & 0xffff) - li s1, 0x1B37E8EE // s1 = (6967 << 16) | (-5906 & 0xffff) - ins t6, t5, 16, 16 // t6 = t5|t6 - sll t4, t4, 15 - dpa.w.ph $ac0, t6, s0 - lh t1, 2(a1) - lh t6, 2(a0) - ins t8, t7, 16, 16 // t8 = t7|t8 - dpa.w.ph $ac0, t8, s1 - mflo t0, $ac0 - mul t5, t6, t1 - lh t1, 18(a1) - lh t6, 18(a0) - lh t2, 50(a1) - lh t7, 50(a0) - mul t6, t6, t1 - subu t8, t4, t0 - mul t7, t7, t2 - addu t0, t4, t0 - shra_r.w t0, t0, 13 - lh t1, 82(a1) - lh t2, 82(a0) - lh t3, 114(a1) - lh t4, 114(a0) - shra_r.w t8, t8, 13 - mul t1, t1, t2 - mul t3, t3, t4 - sw t0, 0(v0) - sw t8, 20(v0) - sll t4, t5, 15 - ins t7, t6, 16, 16 - mult zero, zero - dpa.w.ph $ac0, t7, s0 - ins t3, t1, 16, 16 - lh t1, 6(a1) - lh t6, 6(a0) - dpa.w.ph $ac0, t3, s1 - mflo t0, $ac0 - mul t5, t6, t1 - lh t1, 22(a1) - lh t6, 22(a0) - lh t2, 54(a1) - lh t7, 54(a0) - mul t6, t6, t1 - subu t8, t4, t0 - mul t7, t7, t2 - addu t0, t4, t0 - shra_r.w t0, t0, 13 - lh t1, 86(a1) - lh t2, 86(a0) - lh t3, 118(a1) - lh t4, 118(a0) - shra_r.w t8, t8, 13 - mul t1, t1, t2 - mul t3, t3, t4 - sw t0, 4(v0) - sw t8, 24(v0) - sll t4, t5, 15 - ins t7, t6, 16, 16 - mult zero, zero - dpa.w.ph $ac0, t7, s0 - ins t3, t1, 16, 16 - lh t1, 10(a1) - lh t6, 10(a0) - dpa.w.ph $ac0, t3, s1 - mflo t0, $ac0 - mul t5, t6, t1 - lh t1, 26(a1) - lh t6, 26(a0) - lh t2, 58(a1) - lh t7, 58(a0) - mul t6, t6, t1 - subu t8, t4, t0 - mul t7, t7, t2 - addu t0, t4, t0 - shra_r.w t0, t0, 13 - lh t1, 90(a1) - lh t2, 90(a0) - lh t3, 122(a1) - lh t4, 122(a0) - shra_r.w t8, t8, 13 - mul t1, t1, t2 - mul t3, t3, t4 - sw t0, 8(v0) - sw t8, 28(v0) - sll t4, t5, 15 - ins t7, t6, 16, 16 - mult zero, zero - dpa.w.ph $ac0, t7, s0 - ins t3, t1, 16, 16 - lh t1, 14(a1) - lh t6, 14(a0) - dpa.w.ph $ac0, t3, s1 - mflo t0, $ac0 - mul t5, t6, t1 - lh t1, 30(a1) - lh t6, 30(a0) - lh t2, 62(a1) - lh t7, 62(a0) - mul t6, t6, t1 - subu t8, t4, t0 - mul t7, t7, t2 - addu t0, t4, t0 - shra_r.w t0, t0, 13 - lh t1, 94(a1) - lh t2, 94(a0) - lh t3, 126(a1) - lh t4, 126(a0) - shra_r.w t8, t8, 13 - mul t1, t1, t2 - mul t3, t3, t4 - sw t0, 12(v0) - sw t8, 32(v0) - sll t4, t5, 15 - ins t7, t6, 16, 16 - mult zero, zero - dpa.w.ph $ac0, t7, s0 - ins t3, t1, 16, 16 - dpa.w.ph $ac0, t3, s1 - mflo t0, $ac0 - lw t9, 0(a2) - lw t3, 0(v0) - lw t7, 4(v0) - lw t1, 8(v0) - addu t9, t9, a3 - sll t3, t3, 15 - subu t8, t4, t0 - addu t0, t4, t0 - shra_r.w t0, t0, 13 - shra_r.w t8, t8, 13 - sw t0, 16(v0) - sw t8, 36(v0) - lw t5, 12(v0) - lw t6, 16(v0) - mult t7, s2 - madd t1, s3 - madd t5, s4 - madd t6, s5 - lw t5, 24(v0) - lw t7, 28(v0) - mflo t0, $ac0 - lw t8, 32(v0) - lw t2, 36(v0) - mult $ac1, t5, s2 - madd $ac1, t7, s3 - madd $ac1, t8, s4 - madd $ac1, t2, s5 - addu t1, t3, t0 - subu t6, t3, t0 - shra_r.w t1, t1, 20 - shra_r.w t6, t6, 20 - mflo t4, $ac1 - shll_s.w t1, t1, 24 - shll_s.w t6, t6, 24 - sra t1, t1, 24 - sra t6, t6, 24 - addiu t1, t1, 128 - addiu t6, t6, 128 - lw t0, 20(v0) - sb t1, 0(t9) - sb t6, 1(t9) - sll t0, t0, 15 - lw t9, 4(a2) - addu t1, t0, t4 - subu t6, t0, t4 - addu t9, t9, a3 - shra_r.w t1, t1, 20 - shra_r.w t6, t6, 20 - shll_s.w t1, t1, 24 - shll_s.w t6, t6, 24 - sra t1, t1, 24 - sra t6, t6, 24 - addiu t1, t1, 128 - addiu t6, t6, 128 - sb t1, 0(t9) - sb t6, 1(t9) - addiu sp, sp, 40 - - RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, s5 - - j ra - nop - -END(jsimd_idct_2x2_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_idct_4x4_mips_dspr2) -/* - * a0 - compptr->dct_table - * a1 - coef_block - * a2 - output_buf - * a3 - output_col - * 16(sp) - workspace[DCTSIZE*4]; // buffers data between passes - */ - - .set at - SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - lw v1, 48(sp) - move t0, a1 - move t1, v1 - li t9, 4 - li s0, 0x2e75f93e - li s1, 0x21f9ba79 - li s2, 0xecc2efb0 - li s3, 0x52031ccd - -0: - lh s6, 32(t0) // inptr[DCTSIZE*2] - lh t6, 32(a0) // quantptr[DCTSIZE*2] - lh s7, 96(t0) // inptr[DCTSIZE*6] - lh t7, 96(a0) // quantptr[DCTSIZE*6] - mul t6, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2]) - lh s4, 0(t0) // inptr[DCTSIZE*0] - mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6]) - lh s5, 0(a0) // quantptr[0] - li s6, 15137 - li s7, 6270 - mul t2, s4, s5 // tmp0 = (inptr[0] * quantptr[0]) - mul t6, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2]) - lh t5, 112(t0) // inptr[DCTSIZE*7] - mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6]) - lh s4, 112(a0) // quantptr[DCTSIZE*7] - lh v0, 80(t0) // inptr[DCTSIZE*5] - lh s5, 80(a0) // quantptr[DCTSIZE*5] - lh s6, 48(a0) // quantptr[DCTSIZE*3] - sll t2, t2, 14 // tmp0 <<= (CONST_BITS+1) - lh s7, 16(a0) // quantptr[DCTSIZE*1] - lh t8, 16(t0) // inptr[DCTSIZE*1] - subu t6, t6, t7 // tmp2 = MULTIPLY(z2, t5) - MULTIPLY(z3, t6) - lh t7, 48(t0) // inptr[DCTSIZE*3] - mul t5, s4, t5 // z1 = (inptr[DCTSIZE*7] * quantptr[DCTSIZE*7]) - mul v0, s5, v0 // z2 = (inptr[DCTSIZE*5] * quantptr[DCTSIZE*5]) - mul t7, s6, t7 // z3 = (inptr[DCTSIZE*3] * quantptr[DCTSIZE*3]) - mul t8, s7, t8 // z4 = (inptr[DCTSIZE*1] * quantptr[DCTSIZE*1]) - addu t3, t2, t6 // tmp10 = tmp0 + z2 - subu t4, t2, t6 // tmp10 = tmp0 - z2 - mult $ac0, zero, zero - mult $ac1, zero, zero - ins t5, v0, 16, 16 - ins t7, t8, 16, 16 - addiu t9, t9, -1 - dpa.w.ph $ac0, t5, s0 - dpa.w.ph $ac0, t7, s1 - dpa.w.ph $ac1, t5, s2 - dpa.w.ph $ac1, t7, s3 - mflo s4, $ac0 - mflo s5, $ac1 - addiu a0, a0, 2 - addiu t1, t1, 4 - addiu t0, t0, 2 - addu t6, t4, s4 - subu t5, t4, s4 - addu s6, t3, s5 - subu s7, t3, s5 - shra_r.w t6, t6, 12 // DESCALE(tmp12 + temp1, 12) - shra_r.w t5, t5, 12 // DESCALE(tmp12 - temp1, 12) - shra_r.w s6, s6, 12 // DESCALE(tmp10 + temp2, 12) - shra_r.w s7, s7, 12 // DESCALE(tmp10 - temp2, 12) - sw t6, 28(t1) - sw t5, 60(t1) - sw s6, -4(t1) - bgtz t9, 0b - sw s7, 92(t1) - // second loop three pass - li t9, 3 -1: - lh s6, 34(t0) // inptr[DCTSIZE*2] - lh t6, 34(a0) // quantptr[DCTSIZE*2] - lh s7, 98(t0) // inptr[DCTSIZE*6] - lh t7, 98(a0) // quantptr[DCTSIZE*6] - mul t6, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2]) - lh s4, 2(t0) // inptr[DCTSIZE*0] - mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6]) - lh s5, 2(a0) // quantptr[DCTSIZE*0] - li s6, 15137 - li s7, 6270 - mul t2, s4, s5 // tmp0 = (inptr[0] * quantptr[0]) - mul v0, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2]) - lh t5, 114(t0) // inptr[DCTSIZE*7] - mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6]) - lh s4, 114(a0) // quantptr[DCTSIZE*7] - lh s5, 82(a0) // quantptr[DCTSIZE*5] - lh t6, 82(t0) // inptr[DCTSIZE*5] - sll t2, t2, 14 // tmp0 <<= (CONST_BITS+1) - lh s6, 50(a0) // quantptr[DCTSIZE*3] - lh t8, 18(t0) // inptr[DCTSIZE*1] - subu v0, v0, t7 // tmp2 = MULTIPLY(z2, t5) - MULTIPLY(z3, t6) - lh t7, 50(t0) // inptr[DCTSIZE*3] - lh s7, 18(a0) // quantptr[DCTSIZE*1] - mul t5, s4, t5 // z1 = (inptr[DCTSIZE*7] * quantptr[DCTSIZE*7]) - mul t6, s5, t6 // z2 = (inptr[DCTSIZE*5] * quantptr[DCTSIZE*5]) - mul t7, s6, t7 // z3 = (inptr[DCTSIZE*3] * quantptr[DCTSIZE*3]) - mul t8, s7, t8 // z4 = (inptr[DCTSIZE*1] * quantptr[DCTSIZE*1]) - addu t3, t2, v0 // tmp10 = tmp0 + z2 - subu t4, t2, v0 // tmp10 = tmp0 - z2 - mult $ac0, zero, zero - mult $ac1, zero, zero - ins t5, t6, 16, 16 - ins t7, t8, 16, 16 - dpa.w.ph $ac0, t5, s0 - dpa.w.ph $ac0, t7, s1 - dpa.w.ph $ac1, t5, s2 - dpa.w.ph $ac1, t7, s3 - mflo t5, $ac0 - mflo t6, $ac1 - addiu t9, t9, -1 - addiu t0, t0, 2 - addiu a0, a0, 2 - addiu t1, t1, 4 - addu s5, t4, t5 - subu s4, t4, t5 - addu s6, t3, t6 - subu s7, t3, t6 - shra_r.w s5, s5, 12 // DESCALE(tmp12 + temp1, 12) - shra_r.w s4, s4, 12 // DESCALE(tmp12 - temp1, 12) - shra_r.w s6, s6, 12 // DESCALE(tmp10 + temp2, 12) - shra_r.w s7, s7, 12 // DESCALE(tmp10 - temp2, 12) - sw s5, 32(t1) - sw s4, 64(t1) - sw s6, 0(t1) - bgtz t9, 1b - sw s7, 96(t1) - move t1, v1 - li s4, 15137 - lw s6, 8(t1) // wsptr[2] - li s5, 6270 - lw s7, 24(t1) // wsptr[6] - mul s4, s4, s6 // MULTIPLY((JLONG) wsptr[2], FIX_1_847759065) - lw t2, 0(t1) // wsptr[0] - mul s5, s5, s7 // MULTIPLY((JLONG) wsptr[6], - FIX_0_765366865) - lh t5, 28(t1) // wsptr[7] - lh t6, 20(t1) // wsptr[5] - lh t7, 12(t1) // wsptr[3] - lh t8, 4(t1) // wsptr[1] - ins t5, t6, 16, 16 - ins t7, t8, 16, 16 - mult $ac0, zero, zero - dpa.w.ph $ac0, t5, s0 - dpa.w.ph $ac0, t7, s1 - mult $ac1, zero, zero - dpa.w.ph $ac1, t5, s2 - dpa.w.ph $ac1, t7, s3 - sll t2, t2, 14 // tmp0 = ((JLONG) wsptr[0]) << (CONST_BITS+1) - mflo s6, $ac0 - // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865) - subu s4, s4, s5 - addu t3, t2, s4 // tmp10 = tmp0 + z2 - mflo s7, $ac1 - subu t4, t2, s4 // tmp10 = tmp0 - z2 - addu t7, t4, s6 - subu t8, t4, s6 - addu t5, t3, s7 - subu t6, t3, s7 - shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, 19) - shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, 19) - shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, 19) - shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, 19) - sll s4, t9, 2 - lw v0, 0(a2) // output_buf[ctr] - shll_s.w t5, t5, 24 - shll_s.w t6, t6, 24 - shll_s.w t7, t7, 24 - shll_s.w t8, t8, 24 - sra t5, t5, 24 - sra t6, t6, 24 - sra t7, t7, 24 - sra t8, t8, 24 - addu v0, v0, a3 // outptr = output_buf[ctr] + output_col - addiu t5, t5, 128 - addiu t6, t6, 128 - addiu t7, t7, 128 - addiu t8, t8, 128 - sb t5, 0(v0) - sb t7, 1(v0) - sb t8, 2(v0) - sb t6, 3(v0) - // 2 - li s4, 15137 - lw s6, 40(t1) // wsptr[2] - li s5, 6270 - lw s7, 56(t1) // wsptr[6] - mul s4, s4, s6 // MULTIPLY((JLONG) wsptr[2], FIX_1_847759065) - lw t2, 32(t1) // wsptr[0] - mul s5, s5, s7 // MULTIPLY((JLONG) wsptr[6], - FIX_0_765366865) - lh t5, 60(t1) // wsptr[7] - lh t6, 52(t1) // wsptr[5] - lh t7, 44(t1) // wsptr[3] - lh t8, 36(t1) // wsptr[1] - ins t5, t6, 16, 16 - ins t7, t8, 16, 16 - mult $ac0, zero, zero - dpa.w.ph $ac0, t5, s0 - dpa.w.ph $ac0, t7, s1 - mult $ac1, zero, zero - dpa.w.ph $ac1, t5, s2 - dpa.w.ph $ac1, t7, s3 - sll t2, t2, 14 // tmp0 = ((JLONG) wsptr[0]) << (CONST_BITS+1) - mflo s6, $ac0 - // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865) - subu s4, s4, s5 - addu t3, t2, s4 // tmp10 = tmp0 + z2 - mflo s7, $ac1 - subu t4, t2, s4 // tmp10 = tmp0 - z2 - addu t7, t4, s6 - subu t8, t4, s6 - addu t5, t3, s7 - subu t6, t3, s7 - shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, CONST_BITS-PASS1_BITS+1) - shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, CONST_BITS-PASS1_BITS+1) - shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, CONST_BITS-PASS1_BITS+1) - shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, CONST_BITS-PASS1_BITS+1) - sll s4, t9, 2 - lw v0, 4(a2) // output_buf[ctr] - shll_s.w t5, t5, 24 - shll_s.w t6, t6, 24 - shll_s.w t7, t7, 24 - shll_s.w t8, t8, 24 - sra t5, t5, 24 - sra t6, t6, 24 - sra t7, t7, 24 - sra t8, t8, 24 - addu v0, v0, a3 // outptr = output_buf[ctr] + output_col - addiu t5, t5, 128 - addiu t6, t6, 128 - addiu t7, t7, 128 - addiu t8, t8, 128 - sb t5, 0(v0) - sb t7, 1(v0) - sb t8, 2(v0) - sb t6, 3(v0) - // 3 - li s4, 15137 - lw s6, 72(t1) // wsptr[2] - li s5, 6270 - lw s7, 88(t1) // wsptr[6] - mul s4, s4, s6 // MULTIPLY((JLONG) wsptr[2], FIX_1_847759065) - lw t2, 64(t1) // wsptr[0] - mul s5, s5, s7 // MULTIPLY((JLONG) wsptr[6], - FIX_0_765366865) - lh t5, 92(t1) // wsptr[7] - lh t6, 84(t1) // wsptr[5] - lh t7, 76(t1) // wsptr[3] - lh t8, 68(t1) // wsptr[1] - ins t5, t6, 16, 16 - ins t7, t8, 16, 16 - mult $ac0, zero, zero - dpa.w.ph $ac0, t5, s0 - dpa.w.ph $ac0, t7, s1 - mult $ac1, zero, zero - dpa.w.ph $ac1, t5, s2 - dpa.w.ph $ac1, t7, s3 - sll t2, t2, 14 // tmp0 = ((JLONG) wsptr[0]) << (CONST_BITS+1) - mflo s6, $ac0 - // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865) - subu s4, s4, s5 - addu t3, t2, s4 // tmp10 = tmp0 + z2 - mflo s7, $ac1 - subu t4, t2, s4 // tmp10 = tmp0 - z2 - addu t7, t4, s6 - subu t8, t4, s6 - addu t5, t3, s7 - subu t6, t3, s7 - shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, 19) - shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, 19) - shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, 19) - shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, 19) - sll s4, t9, 2 - lw v0, 8(a2) // output_buf[ctr] - shll_s.w t5, t5, 24 - shll_s.w t6, t6, 24 - shll_s.w t7, t7, 24 - shll_s.w t8, t8, 24 - sra t5, t5, 24 - sra t6, t6, 24 - sra t7, t7, 24 - sra t8, t8, 24 - addu v0, v0, a3 // outptr = output_buf[ctr] + output_col - addiu t5, t5, 128 - addiu t6, t6, 128 - addiu t7, t7, 128 - addiu t8, t8, 128 - sb t5, 0(v0) - sb t7, 1(v0) - sb t8, 2(v0) - sb t6, 3(v0) - li s4, 15137 - lw s6, 104(t1) // wsptr[2] - li s5, 6270 - lw s7, 120(t1) // wsptr[6] - mul s4, s4, s6 // MULTIPLY((JLONG) wsptr[2], FIX_1_847759065) - lw t2, 96(t1) // wsptr[0] - mul s5, s5, s7 // MULTIPLY((JLONG) wsptr[6], -FIX_0_765366865) - lh t5, 124(t1) // wsptr[7] - lh t6, 116(t1) // wsptr[5] - lh t7, 108(t1) // wsptr[3] - lh t8, 100(t1) // wsptr[1] - ins t5, t6, 16, 16 - ins t7, t8, 16, 16 - mult $ac0, zero, zero - dpa.w.ph $ac0, t5, s0 - dpa.w.ph $ac0, t7, s1 - mult $ac1, zero, zero - dpa.w.ph $ac1, t5, s2 - dpa.w.ph $ac1, t7, s3 - sll t2, t2, 14 // tmp0 = ((JLONG) wsptr[0]) << (CONST_BITS+1) - mflo s6, $ac0 - // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865) - subu s4, s4, s5 - addu t3, t2, s4 // tmp10 = tmp0 + z2; - mflo s7, $ac1 - subu t4, t2, s4 // tmp10 = tmp0 - z2; - addu t7, t4, s6 - subu t8, t4, s6 - addu t5, t3, s7 - subu t6, t3, s7 - shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, 19) - shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, 19) - shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, 19) - shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, 19) - sll s4, t9, 2 - lw v0, 12(a2) // output_buf[ctr] - shll_s.w t5, t5, 24 - shll_s.w t6, t6, 24 - shll_s.w t7, t7, 24 - shll_s.w t8, t8, 24 - sra t5, t5, 24 - sra t6, t6, 24 - sra t7, t7, 24 - sra t8, t8, 24 - addu v0, v0, a3 // outptr = output_buf[ctr] + output_col - addiu t5, t5, 128 - addiu t6, t6, 128 - addiu t7, t7, 128 - addiu t8, t8, 128 - sb t5, 0(v0) - sb t7, 1(v0) - sb t8, 2(v0) - sb t6, 3(v0) - - RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - j ra - nop -END(jsimd_idct_4x4_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_idct_6x6_mips_dspr2) -/* - * a0 - compptr->dct_table - * a1 - coef_block - * a2 - output_buf - * a3 - output_col - */ - .set at - - SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - addiu sp, sp, -144 - move v0, sp - addiu v1, v0, 24 - addiu t9, zero, 5793 - addiu s0, zero, 10033 - addiu s1, zero, 2998 - -1: - lh s2, 0(a0) // q0 = quantptr[ 0] - lh s3, 32(a0) // q1 = quantptr[16] - lh s4, 64(a0) // q2 = quantptr[32] - lh t2, 64(a1) // tmp2 = inptr[32] - lh t1, 32(a1) // tmp1 = inptr[16] - lh t0, 0(a1) // tmp0 = inptr[ 0] - mul t2, t2, s4 // tmp2 = tmp2 * q2 - mul t1, t1, s3 // tmp1 = tmp1 * q1 - mul t0, t0, s2 // tmp0 = tmp0 * q0 - lh t6, 16(a1) // z1 = inptr[ 8] - lh t8, 80(a1) // z3 = inptr[40] - lh t7, 48(a1) // z2 = inptr[24] - lh s2, 16(a0) // q0 = quantptr[ 8] - lh s4, 80(a0) // q2 = quantptr[40] - lh s3, 48(a0) // q1 = quantptr[24] - mul t2, t2, t9 // tmp2 = tmp2 * 5793 - mul t1, t1, s0 // tmp1 = tmp1 * 10033 - sll t0, t0, 13 // tmp0 = tmp0 << 13 - mul t6, t6, s2 // z1 = z1 * q0 - mul t8, t8, s4 // z3 = z3 * q2 - mul t7, t7, s3 // z2 = z2 * q1 - addu t3, t0, t2 // tmp10 = tmp0 + tmp2 - sll t2, t2, 1 // tmp2 = tmp2 << 2 - subu t4, t0, t2 // tmp11 = tmp0 - tmp2; - subu t5, t3, t1 // tmp12 = tmp10 - tmp1 - addu t3, t3, t1 // tmp10 = tmp10 + tmp1 - addu t1, t6, t8 // tmp1 = z1 + z3 - mul t1, t1, s1 // tmp1 = tmp1 * 2998 - shra_r.w t4, t4, 11 // tmp11 = (tmp11 + 1024) >> 11 - subu t2, t6, t8 // tmp2 = z1 - z3 - subu t2, t2, t7 // tmp2 = tmp2 - z2 - sll t2, t2, 2 // tmp2 = tmp2 << 2 - addu t0, t6, t7 // tmp0 = z1 + z2 - sll t0, t0, 13 // tmp0 = tmp0 << 13 - subu s2, t8, t7 // q0 = z3 - z2 - sll s2, s2, 13 // q0 = q0 << 13 - addu t0, t0, t1 // tmp0 = tmp0 + tmp1 - addu t1, s2, t1 // tmp1 = q0 + tmp1 - addu s2, t4, t2 // q0 = tmp11 + tmp2 - subu s3, t4, t2 // q1 = tmp11 - tmp2 - addu t6, t3, t0 // z1 = tmp10 + tmp0 - subu t7, t3, t0 // z2 = tmp10 - tmp0 - addu t4, t5, t1 // tmp11 = tmp12 + tmp1 - subu t5, t5, t1 // tmp12 = tmp12 - tmp1 - shra_r.w t6, t6, 11 // z1 = (z1 + 1024) >> 11 - shra_r.w t7, t7, 11 // z2 = (z2 + 1024) >> 11 - shra_r.w t4, t4, 11 // tmp11 = (tmp11 + 1024) >> 11 - shra_r.w t5, t5, 11 // tmp12 = (tmp12 + 1024) >> 11 - sw s2, 24(v0) - sw s3, 96(v0) - sw t6, 0(v0) - sw t7, 120(v0) - sw t4, 48(v0) - sw t5, 72(v0) - addiu v0, v0, 4 - addiu a1, a1, 2 - bne v0, v1, 1b - addiu a0, a0, 2 - - /* Pass 2: process 6 rows from work array, store into output array. */ - move v0, sp - addiu v1, v0, 144 - -2: - lw t0, 0(v0) - lw t2, 16(v0) - lw s5, 0(a2) - addiu t0, t0, 16 - sll t0, t0, 13 - mul t3, t2, t9 - lw t6, 4(v0) - lw t8, 20(v0) - lw t7, 12(v0) - addu s5, s5, a3 - addu s6, t6, t8 - mul s6, s6, s1 - addu t1, t0, t3 - subu t4, t0, t3 - subu t4, t4, t3 - lw t3, 8(v0) - mul t0, t3, s0 - addu s7, t6, t7 - sll s7, s7, 13 - addu s7, s6, s7 - subu t2, t8, t7 - sll t2, t2, 13 - addu t2, s6, t2 - subu s6, t6, t7 - subu s6, s6, t8 - sll s6, s6, 13 - addu t3, t1, t0 - subu t5, t1, t0 - addu t6, t3, s7 - subu t3, t3, s7 - addu t7, t4, s6 - subu t4, t4, s6 - addu t8, t5, t2 - subu t5, t5, t2 - shll_s.w t6, t6, 6 - shll_s.w t3, t3, 6 - shll_s.w t7, t7, 6 - shll_s.w t4, t4, 6 - shll_s.w t8, t8, 6 - shll_s.w t5, t5, 6 - sra t6, t6, 24 - addiu t6, t6, 128 - sra t3, t3, 24 - addiu t3, t3, 128 - sb t6, 0(s5) - sra t7, t7, 24 - addiu t7, t7, 128 - sb t3, 5(s5) - sra t4, t4, 24 - addiu t4, t4, 128 - sb t7, 1(s5) - sra t8, t8, 24 - addiu t8, t8, 128 - sb t4, 4(s5) - addiu v0, v0, 24 - sra t5, t5, 24 - addiu t5, t5, 128 - sb t8, 2(s5) - addiu a2, a2, 4 - bne v0, v1, 2b - sb t5, 3(s5) - - addiu sp, sp, 144 - - RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 - - j ra - nop - -END(jsimd_idct_6x6_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_idct_12x12_pass1_mips_dspr2) -/* - * a0 - compptr->dct_table - * a1 - coef_block - * a2 - workspace - */ - - SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 - - li a3, 8 - -1: - // odd part - lh t0, 48(a1) - lh t1, 48(a0) - lh t2, 16(a1) - lh t3, 16(a0) - lh t4, 80(a1) - lh t5, 80(a0) - lh t6, 112(a1) - lh t7, 112(a0) - mul t0, t0, t1 // z2 - mul t1, t2, t3 // z1 - mul t2, t4, t5 // z3 - mul t3, t6, t7 // z4 - li t4, 10703 // FIX(1.306562965) - li t5, 4433 // FIX_0_541196100 - li t6, 7053 // FIX(0.860918669) - mul t4, t0,t4 // tmp11 - mul t5, t0,t5 // -tmp14 - addu t7, t1,t2 // tmp10 - addu t8, t7,t3 // tmp10 + z4 - mul t6, t6, t8 // tmp15 - li t8, 2139 // FIX(0.261052384) - mul t8, t7, t8 // MULTIPLY(tmp10, FIX(0.261052384)) - li t7, 2295 // FIX(0.280143716) - mul t7, t1, t7 // MULTIPLY(z1, FIX(0.280143716)) - addu t9, t2, t3 // z3 + z4 - li s0, 8565 // FIX(1.045510580) - mul t9, t9, s0 // -tmp13 - li s0, 12112 // FIX(1.478575242) - mul s0, t2, s0 // MULTIPLY(z3, FIX(1.478575242) - li s1, 12998 // FIX(1.586706681) - mul s1, t3, s1 // MULTIPLY(z4, FIX(1.586706681)) - li s2, 5540 // FIX(0.676326758) - mul s2, t1, s2 // MULTIPLY(z1, FIX(0.676326758)) - li s3, 16244 // FIX(1.982889723) - mul s3, t3, s3 // MULTIPLY(z4, FIX(1.982889723)) - subu t1, t1, t3 // z1-=z4 - subu t0, t0, t2 // z2-=z3 - addu t2, t0, t1 // z1+z2 - li t3, 4433 // FIX_0_541196100 - mul t2, t2, t3 // z3 - li t3, 6270 // FIX_0_765366865 - mul t1, t1, t3 // MULTIPLY(z1, FIX_0_765366865) - li t3, 15137 // FIX_0_765366865 - mul t0, t0, t3 // MULTIPLY(z2, FIX_1_847759065) - addu t8, t6, t8 // tmp12 - addu t3, t8, t4 // tmp12 + tmp11 - addu t3, t3, t7 // tmp10 - subu t8, t8, t9 // tmp12 + tmp13 - addu s0, t5, s0 - subu t8, t8, s0 // tmp12 - subu t9, t6, t9 - subu s1, s1, t4 - addu t9, t9, s1 // tmp13 - subu t6, t6, t5 - subu t6, t6, s2 - subu t6, t6, s3 // tmp15 - // even part start - lh t4, 64(a1) - lh t5, 64(a0) - lh t7, 32(a1) - lh s0, 32(a0) - lh s1, 0(a1) - lh s2, 0(a0) - lh s3, 96(a1) - lh v0, 96(a0) - mul t4, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*4],quantptr[DCTSIZE*4]) - mul t5, t7, s0 // DEQUANTIZE(inptr[DCTSIZE*2],quantptr[DCTSIZE*2]) - mul t7, s1, s2 // DEQUANTIZE(inptr[DCTSIZE*0],quantptr[DCTSIZE*0]) - mul s0, s3, v0 // DEQUANTIZE(inptr[DCTSIZE*6],quantptr[DCTSIZE*6]) - // odd part end - addu t1, t2, t1 // tmp11 - subu t0, t2, t0 // tmp14 - // update counter and pointers - addiu a3, a3, -1 - addiu a0, a0, 2 - addiu a1, a1, 2 - // even part rest - li s1, 10033 - li s2, 11190 - mul t4, t4, s1 // z4 - mul s1, t5, s2 // z4 - sll t5, t5, 13 // z1 - sll t7, t7, 13 - addiu t7, t7, 1024 // z3 - sll s0, s0, 13 // z2 - addu s2, t7, t4 // tmp10 - subu t4, t7, t4 // tmp11 - subu s3, t5, s0 // tmp12 - addu t2, t7, s3 // tmp21 - subu s3, t7, s3 // tmp24 - addu t7, s1, s0 // tmp12 - addu v0, s2, t7 // tmp20 - subu s2, s2, t7 // tmp25 - subu s1, s1, t5 // z4 - z1 - subu s1, s1, s0 // tmp12 - addu s0, t4, s1 // tmp22 - subu t4, t4, s1 // tmp23 - // final output stage - addu t5, v0, t3 - subu v0, v0, t3 - addu t3, t2, t1 - subu t2, t2, t1 - addu t1, s0, t8 - subu s0, s0, t8 - addu t8, t4, t9 - subu t4, t4, t9 - addu t9, s3, t0 - subu s3, s3, t0 - addu t0, s2, t6 - subu s2, s2, t6 - sra t5, t5, 11 - sra t3, t3, 11 - sra t1, t1, 11 - sra t8, t8, 11 - sra t9, t9, 11 - sra t0, t0, 11 - sra s2, s2, 11 - sra s3, s3, 11 - sra t4, t4, 11 - sra s0, s0, 11 - sra t2, t2, 11 - sra v0, v0, 11 - sw t5, 0(a2) - sw t3, 32(a2) - sw t1, 64(a2) - sw t8, 96(a2) - sw t9, 128(a2) - sw t0, 160(a2) - sw s2, 192(a2) - sw s3, 224(a2) - sw t4, 256(a2) - sw s0, 288(a2) - sw t2, 320(a2) - sw v0, 352(a2) - bgtz a3, 1b - addiu a2, a2, 4 - - RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 - - j ra - nop - -END(jsimd_idct_12x12_pass1_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_idct_12x12_pass2_mips_dspr2) -/* - * a0 - workspace - * a1 - output - */ - - SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 - - li a3, 12 - -1: - // Odd part - lw t0, 12(a0) - lw t1, 4(a0) - lw t2, 20(a0) - lw t3, 28(a0) - li t4, 10703 // FIX(1.306562965) - li t5, 4433 // FIX_0_541196100 - mul t4, t0, t4 // tmp11 - mul t5, t0, t5 // -tmp14 - addu t6, t1, t2 // tmp10 - li t7, 2139 // FIX(0.261052384) - mul t7, t6, t7 // MULTIPLY(tmp10, FIX(0.261052384)) - addu t6, t6, t3 // tmp10 + z4 - li t8, 7053 // FIX(0.860918669) - mul t6, t6, t8 // tmp15 - li t8, 2295 // FIX(0.280143716) - mul t8, t1, t8 // MULTIPLY(z1, FIX(0.280143716)) - addu t9, t2, t3 // z3 + z4 - li s0, 8565 // FIX(1.045510580) - mul t9, t9, s0 // -tmp13 - li s0, 12112 // FIX(1.478575242) - mul s0, t2, s0 // MULTIPLY(z3, FIX(1.478575242)) - li s1, 12998 // FIX(1.586706681) - mul s1, t3, s1 // MULTIPLY(z4, FIX(1.586706681)) - li s2, 5540 // FIX(0.676326758) - mul s2, t1, s2 // MULTIPLY(z1, FIX(0.676326758)) - li s3, 16244 // FIX(1.982889723) - mul s3, t3, s3 // MULTIPLY(z4, FIX(1.982889723)) - subu t1, t1, t3 // z1 -= z4 - subu t0, t0, t2 // z2 -= z3 - addu t2, t1, t0 // z1 + z2 - li t3, 4433 // FIX_0_541196100 - mul t2, t2, t3 // z3 - li t3, 6270 // FIX_0_765366865 - mul t1, t1, t3 // MULTIPLY(z1, FIX_0_765366865) - li t3, 15137 // FIX_1_847759065 - mul t0, t0, t3 // MULTIPLY(z2, FIX_1_847759065) - addu t3, t6, t7 // tmp12 - addu t7, t3, t4 - addu t7, t7, t8 // tmp10 - subu t3, t3, t9 - subu t3, t3, t5 - subu t3, t3, s0 // tmp12 - subu t9, t6, t9 - subu t9, t9, t4 - addu t9, t9, s1 // tmp13 - subu t6, t6, t5 - subu t6, t6, s2 - subu t6, t6, s3 // tmp15 - addu t1, t2, t1 // tmp11 - subu t0, t2, t0 // tmp14 - // even part - lw t2, 16(a0) // z4 - lw t4, 8(a0) // z1 - lw t5, 0(a0) // z3 - lw t8, 24(a0) // z2 - li s0, 10033 // FIX(1.224744871) - li s1, 11190 // FIX(1.366025404) - mul t2, t2, s0 // z4 - mul s0, t4, s1 // z4 - addiu t5, t5, 0x10 - sll t5, t5, 13 // z3 - sll t4, t4, 13 // z1 - sll t8, t8, 13 // z2 - subu s1, t4, t8 // tmp12 - addu s2, t5, t2 // tmp10 - subu t2, t5, t2 // tmp11 - addu s3, t5, s1 // tmp21 - subu s1, t5, s1 // tmp24 - addu t5, s0, t8 // tmp12 - addu v0, s2, t5 // tmp20 - subu t5, s2, t5 // tmp25 - subu t4, s0, t4 - subu t4, t4, t8 // tmp12 - addu t8, t2, t4 // tmp22 - subu t2, t2, t4 // tmp23 - // increment counter and pointers - addiu a3, a3, -1 - addiu a0, a0, 32 - // Final stage - addu t4, v0, t7 - subu v0, v0, t7 - addu t7, s3, t1 - subu s3, s3, t1 - addu t1, t8, t3 - subu t8, t8, t3 - addu t3, t2, t9 - subu t2, t2, t9 - addu t9, s1, t0 - subu s1, s1, t0 - addu t0, t5, t6 - subu t5, t5, t6 - sll t4, t4, 4 - sll t7, t7, 4 - sll t1, t1, 4 - sll t3, t3, 4 - sll t9, t9, 4 - sll t0, t0, 4 - sll t5, t5, 4 - sll s1, s1, 4 - sll t2, t2, 4 - sll t8, t8, 4 - sll s3, s3, 4 - sll v0, v0, 4 - shll_s.w t4, t4, 2 - shll_s.w t7, t7, 2 - shll_s.w t1, t1, 2 - shll_s.w t3, t3, 2 - shll_s.w t9, t9, 2 - shll_s.w t0, t0, 2 - shll_s.w t5, t5, 2 - shll_s.w s1, s1, 2 - shll_s.w t2, t2, 2 - shll_s.w t8, t8, 2 - shll_s.w s3, s3, 2 - shll_s.w v0, v0, 2 - srl t4, t4, 24 - srl t7, t7, 24 - srl t1, t1, 24 - srl t3, t3, 24 - srl t9, t9, 24 - srl t0, t0, 24 - srl t5, t5, 24 - srl s1, s1, 24 - srl t2, t2, 24 - srl t8, t8, 24 - srl s3, s3, 24 - srl v0, v0, 24 - lw t6, 0(a1) - addiu t4, t4, 0x80 - addiu t7, t7, 0x80 - addiu t1, t1, 0x80 - addiu t3, t3, 0x80 - addiu t9, t9, 0x80 - addiu t0, t0, 0x80 - addiu t5, t5, 0x80 - addiu s1, s1, 0x80 - addiu t2, t2, 0x80 - addiu t8, t8, 0x80 - addiu s3, s3, 0x80 - addiu v0, v0, 0x80 - sb t4, 0(t6) - sb t7, 1(t6) - sb t1, 2(t6) - sb t3, 3(t6) - sb t9, 4(t6) - sb t0, 5(t6) - sb t5, 6(t6) - sb s1, 7(t6) - sb t2, 8(t6) - sb t8, 9(t6) - sb s3, 10(t6) - sb v0, 11(t6) - bgtz a3, 1b - addiu a1, a1, 4 - - RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 - - jr ra - nop - -END(jsimd_idct_12x12_pass2_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_convsamp_mips_dspr2) -/* - * a0 - sample_data - * a1 - start_col - * a2 - workspace - */ - - lw t0, 0(a0) - li t7, 0xff80ff80 - addu t0, t0, a1 - ulw t1, 0(t0) - ulw t2, 4(t0) - preceu.ph.qbr t3, t1 - preceu.ph.qbl t4, t1 - lw t0, 4(a0) - preceu.ph.qbr t5, t2 - preceu.ph.qbl t6, t2 - addu t0, t0, a1 - addu.ph t3, t3, t7 - addu.ph t4, t4, t7 - ulw t1, 0(t0) - ulw t2, 4(t0) - addu.ph t5, t5, t7 - addu.ph t6, t6, t7 - usw t3, 0(a2) - usw t4, 4(a2) - preceu.ph.qbr t3, t1 - preceu.ph.qbl t4, t1 - usw t5, 8(a2) - usw t6, 12(a2) - - lw t0, 8(a0) - preceu.ph.qbr t5, t2 - preceu.ph.qbl t6, t2 - addu t0, t0, a1 - addu.ph t3, t3, t7 - addu.ph t4, t4, t7 - ulw t1, 0(t0) - ulw t2, 4(t0) - addu.ph t5, t5, t7 - addu.ph t6, t6, t7 - usw t3, 16(a2) - usw t4, 20(a2) - preceu.ph.qbr t3, t1 - preceu.ph.qbl t4, t1 - usw t5, 24(a2) - usw t6, 28(a2) - - lw t0, 12(a0) - preceu.ph.qbr t5, t2 - preceu.ph.qbl t6, t2 - addu t0, t0, a1 - addu.ph t3, t3, t7 - addu.ph t4, t4, t7 - ulw t1, 0(t0) - ulw t2, 4(t0) - addu.ph t5, t5, t7 - addu.ph t6, t6, t7 - usw t3, 32(a2) - usw t4, 36(a2) - preceu.ph.qbr t3, t1 - preceu.ph.qbl t4, t1 - usw t5, 40(a2) - usw t6, 44(a2) - - lw t0, 16(a0) - preceu.ph.qbr t5, t2 - preceu.ph.qbl t6, t2 - addu t0, t0, a1 - addu.ph t3, t3, t7 - addu.ph t4, t4, t7 - ulw t1, 0(t0) - ulw t2, 4(t0) - addu.ph t5, t5, t7 - addu.ph t6, t6, t7 - usw t3, 48(a2) - usw t4, 52(a2) - preceu.ph.qbr t3, t1 - preceu.ph.qbl t4, t1 - usw t5, 56(a2) - usw t6, 60(a2) - - lw t0, 20(a0) - preceu.ph.qbr t5, t2 - preceu.ph.qbl t6, t2 - addu t0, t0, a1 - addu.ph t3, t3, t7 - addu.ph t4, t4, t7 - ulw t1, 0(t0) - ulw t2, 4(t0) - addu.ph t5, t5, t7 - addu.ph t6, t6, t7 - usw t3, 64(a2) - usw t4, 68(a2) - preceu.ph.qbr t3, t1 - preceu.ph.qbl t4, t1 - usw t5, 72(a2) - usw t6, 76(a2) - - lw t0, 24(a0) - preceu.ph.qbr t5, t2 - preceu.ph.qbl t6, t2 - addu t0, t0, a1 - addu.ph t3, t3, t7 - addu.ph t4, t4, t7 - ulw t1, 0(t0) - ulw t2, 4(t0) - addu.ph t5, t5, t7 - addu.ph t6, t6, t7 - usw t3, 80(a2) - usw t4, 84(a2) - preceu.ph.qbr t3, t1 - preceu.ph.qbl t4, t1 - usw t5, 88(a2) - usw t6, 92(a2) - - lw t0, 28(a0) - preceu.ph.qbr t5, t2 - preceu.ph.qbl t6, t2 - addu t0, t0, a1 - addu.ph t3, t3, t7 - addu.ph t4, t4, t7 - ulw t1, 0(t0) - ulw t2, 4(t0) - addu.ph t5, t5, t7 - addu.ph t6, t6, t7 - usw t3, 96(a2) - usw t4, 100(a2) - preceu.ph.qbr t3, t1 - preceu.ph.qbl t4, t1 - usw t5, 104(a2) - usw t6, 108(a2) - preceu.ph.qbr t5, t2 - preceu.ph.qbl t6, t2 - addu.ph t3, t3, t7 - addu.ph t4, t4, t7 - addu.ph t5, t5, t7 - addu.ph t6, t6, t7 - usw t3, 112(a2) - usw t4, 116(a2) - usw t5, 120(a2) - usw t6, 124(a2) - - j ra - nop - -END(jsimd_convsamp_mips_dspr2) - -/*****************************************************************************/ -LEAF_MIPS_DSPR2(jsimd_convsamp_float_mips_dspr2) -/* - * a0 - sample_data - * a1 - start_col - * a2 - workspace - */ - - .set at - - lw t0, 0(a0) - addu t0, t0, a1 - lbu t1, 0(t0) - lbu t2, 1(t0) - lbu t3, 2(t0) - lbu t4, 3(t0) - lbu t5, 4(t0) - lbu t6, 5(t0) - lbu t7, 6(t0) - lbu t8, 7(t0) - addiu t1, t1, -128 - addiu t2, t2, -128 - addiu t3, t3, -128 - addiu t4, t4, -128 - addiu t5, t5, -128 - addiu t6, t6, -128 - addiu t7, t7, -128 - addiu t8, t8, -128 - mtc1 t1, f2 - mtc1 t2, f4 - mtc1 t3, f6 - mtc1 t4, f8 - mtc1 t5, f10 - mtc1 t6, f12 - mtc1 t7, f14 - mtc1 t8, f16 - cvt.s.w f2, f2 - cvt.s.w f4, f4 - cvt.s.w f6, f6 - cvt.s.w f8, f8 - cvt.s.w f10, f10 - cvt.s.w f12, f12 - cvt.s.w f14, f14 - cvt.s.w f16, f16 - lw t0, 4(a0) - swc1 f2, 0(a2) - swc1 f4, 4(a2) - swc1 f6, 8(a2) - addu t0, t0, a1 - swc1 f8, 12(a2) - swc1 f10, 16(a2) - swc1 f12, 20(a2) - swc1 f14, 24(a2) - swc1 f16, 28(a2) - //elemr 1 - lbu t1, 0(t0) - lbu t2, 1(t0) - lbu t3, 2(t0) - lbu t4, 3(t0) - lbu t5, 4(t0) - lbu t6, 5(t0) - lbu t7, 6(t0) - lbu t8, 7(t0) - addiu t1, t1, -128 - addiu t2, t2, -128 - addiu t3, t3, -128 - addiu t4, t4, -128 - addiu t5, t5, -128 - addiu t6, t6, -128 - addiu t7, t7, -128 - addiu t8, t8, -128 - mtc1 t1, f2 - mtc1 t2, f4 - mtc1 t3, f6 - mtc1 t4, f8 - mtc1 t5, f10 - mtc1 t6, f12 - mtc1 t7, f14 - mtc1 t8, f16 - cvt.s.w f2, f2 - cvt.s.w f4, f4 - cvt.s.w f6, f6 - cvt.s.w f8, f8 - cvt.s.w f10, f10 - cvt.s.w f12, f12 - cvt.s.w f14, f14 - cvt.s.w f16, f16 - lw t0, 8(a0) - swc1 f2, 32(a2) - swc1 f4, 36(a2) - swc1 f6, 40(a2) - addu t0, t0, a1 - swc1 f8, 44(a2) - swc1 f10, 48(a2) - swc1 f12, 52(a2) - swc1 f14, 56(a2) - swc1 f16, 60(a2) - //elemr 2 - lbu t1, 0(t0) - lbu t2, 1(t0) - lbu t3, 2(t0) - lbu t4, 3(t0) - lbu t5, 4(t0) - lbu t6, 5(t0) - lbu t7, 6(t0) - lbu t8, 7(t0) - addiu t1, t1, -128 - addiu t2, t2, -128 - addiu t3, t3, -128 - addiu t4, t4, -128 - addiu t5, t5, -128 - addiu t6, t6, -128 - addiu t7, t7, -128 - addiu t8, t8, -128 - mtc1 t1, f2 - mtc1 t2, f4 - mtc1 t3, f6 - mtc1 t4, f8 - mtc1 t5, f10 - mtc1 t6, f12 - mtc1 t7, f14 - mtc1 t8, f16 - cvt.s.w f2, f2 - cvt.s.w f4, f4 - cvt.s.w f6, f6 - cvt.s.w f8, f8 - cvt.s.w f10, f10 - cvt.s.w f12, f12 - cvt.s.w f14, f14 - cvt.s.w f16, f16 - lw t0, 12(a0) - swc1 f2, 64(a2) - swc1 f4, 68(a2) - swc1 f6, 72(a2) - addu t0, t0, a1 - swc1 f8, 76(a2) - swc1 f10, 80(a2) - swc1 f12, 84(a2) - swc1 f14, 88(a2) - swc1 f16, 92(a2) - //elemr 3 - lbu t1, 0(t0) - lbu t2, 1(t0) - lbu t3, 2(t0) - lbu t4, 3(t0) - lbu t5, 4(t0) - lbu t6, 5(t0) - lbu t7, 6(t0) - lbu t8, 7(t0) - addiu t1, t1, -128 - addiu t2, t2, -128 - addiu t3, t3, -128 - addiu t4, t4, -128 - addiu t5, t5, -128 - addiu t6, t6, -128 - addiu t7, t7, -128 - addiu t8, t8, -128 - mtc1 t1, f2 - mtc1 t2, f4 - mtc1 t3, f6 - mtc1 t4, f8 - mtc1 t5, f10 - mtc1 t6, f12 - mtc1 t7, f14 - mtc1 t8, f16 - cvt.s.w f2, f2 - cvt.s.w f4, f4 - cvt.s.w f6, f6 - cvt.s.w f8, f8 - cvt.s.w f10, f10 - cvt.s.w f12, f12 - cvt.s.w f14, f14 - cvt.s.w f16, f16 - lw t0, 16(a0) - swc1 f2, 96(a2) - swc1 f4, 100(a2) - swc1 f6, 104(a2) - addu t0, t0, a1 - swc1 f8, 108(a2) - swc1 f10, 112(a2) - swc1 f12, 116(a2) - swc1 f14, 120(a2) - swc1 f16, 124(a2) - //elemr 4 - lbu t1, 0(t0) - lbu t2, 1(t0) - lbu t3, 2(t0) - lbu t4, 3(t0) - lbu t5, 4(t0) - lbu t6, 5(t0) - lbu t7, 6(t0) - lbu t8, 7(t0) - addiu t1, t1, -128 - addiu t2, t2, -128 - addiu t3, t3, -128 - addiu t4, t4, -128 - addiu t5, t5, -128 - addiu t6, t6, -128 - addiu t7, t7, -128 - addiu t8, t8, -128 - mtc1 t1, f2 - mtc1 t2, f4 - mtc1 t3, f6 - mtc1 t4, f8 - mtc1 t5, f10 - mtc1 t6, f12 - mtc1 t7, f14 - mtc1 t8, f16 - cvt.s.w f2, f2 - cvt.s.w f4, f4 - cvt.s.w f6, f6 - cvt.s.w f8, f8 - cvt.s.w f10, f10 - cvt.s.w f12, f12 - cvt.s.w f14, f14 - cvt.s.w f16, f16 - lw t0, 20(a0) - swc1 f2, 128(a2) - swc1 f4, 132(a2) - swc1 f6, 136(a2) - addu t0, t0, a1 - swc1 f8, 140(a2) - swc1 f10, 144(a2) - swc1 f12, 148(a2) - swc1 f14, 152(a2) - swc1 f16, 156(a2) - //elemr 5 - lbu t1, 0(t0) - lbu t2, 1(t0) - lbu t3, 2(t0) - lbu t4, 3(t0) - lbu t5, 4(t0) - lbu t6, 5(t0) - lbu t7, 6(t0) - lbu t8, 7(t0) - addiu t1, t1, -128 - addiu t2, t2, -128 - addiu t3, t3, -128 - addiu t4, t4, -128 - addiu t5, t5, -128 - addiu t6, t6, -128 - addiu t7, t7, -128 - addiu t8, t8, -128 - mtc1 t1, f2 - mtc1 t2, f4 - mtc1 t3, f6 - mtc1 t4, f8 - mtc1 t5, f10 - mtc1 t6, f12 - mtc1 t7, f14 - mtc1 t8, f16 - cvt.s.w f2, f2 - cvt.s.w f4, f4 - cvt.s.w f6, f6 - cvt.s.w f8, f8 - cvt.s.w f10, f10 - cvt.s.w f12, f12 - cvt.s.w f14, f14 - cvt.s.w f16, f16 - lw t0, 24(a0) - swc1 f2, 160(a2) - swc1 f4, 164(a2) - swc1 f6, 168(a2) - addu t0, t0, a1 - swc1 f8, 172(a2) - swc1 f10, 176(a2) - swc1 f12, 180(a2) - swc1 f14, 184(a2) - swc1 f16, 188(a2) - //elemr 6 - lbu t1, 0(t0) - lbu t2, 1(t0) - lbu t3, 2(t0) - lbu t4, 3(t0) - lbu t5, 4(t0) - lbu t6, 5(t0) - lbu t7, 6(t0) - lbu t8, 7(t0) - addiu t1, t1, -128 - addiu t2, t2, -128 - addiu t3, t3, -128 - addiu t4, t4, -128 - addiu t5, t5, -128 - addiu t6, t6, -128 - addiu t7, t7, -128 - addiu t8, t8, -128 - mtc1 t1, f2 - mtc1 t2, f4 - mtc1 t3, f6 - mtc1 t4, f8 - mtc1 t5, f10 - mtc1 t6, f12 - mtc1 t7, f14 - mtc1 t8, f16 - cvt.s.w f2, f2 - cvt.s.w f4, f4 - cvt.s.w f6, f6 - cvt.s.w f8, f8 - cvt.s.w f10, f10 - cvt.s.w f12, f12 - cvt.s.w f14, f14 - cvt.s.w f16, f16 - lw t0, 28(a0) - swc1 f2, 192(a2) - swc1 f4, 196(a2) - swc1 f6, 200(a2) - addu t0, t0, a1 - swc1 f8, 204(a2) - swc1 f10, 208(a2) - swc1 f12, 212(a2) - swc1 f14, 216(a2) - swc1 f16, 220(a2) - //elemr 7 - lbu t1, 0(t0) - lbu t2, 1(t0) - lbu t3, 2(t0) - lbu t4, 3(t0) - lbu t5, 4(t0) - lbu t6, 5(t0) - lbu t7, 6(t0) - lbu t8, 7(t0) - addiu t1, t1, -128 - addiu t2, t2, -128 - addiu t3, t3, -128 - addiu t4, t4, -128 - addiu t5, t5, -128 - addiu t6, t6, -128 - addiu t7, t7, -128 - addiu t8, t8, -128 - mtc1 t1, f2 - mtc1 t2, f4 - mtc1 t3, f6 - mtc1 t4, f8 - mtc1 t5, f10 - mtc1 t6, f12 - mtc1 t7, f14 - mtc1 t8, f16 - cvt.s.w f2, f2 - cvt.s.w f4, f4 - cvt.s.w f6, f6 - cvt.s.w f8, f8 - cvt.s.w f10, f10 - cvt.s.w f12, f12 - cvt.s.w f14, f14 - cvt.s.w f16, f16 - swc1 f2, 224(a2) - swc1 f4, 228(a2) - swc1 f6, 232(a2) - swc1 f8, 236(a2) - swc1 f10, 240(a2) - swc1 f12, 244(a2) - swc1 f14, 248(a2) - swc1 f16, 252(a2) - - j ra - nop - -END(jsimd_convsamp_float_mips_dspr2) - -/*****************************************************************************/ diff --git a/simd/jsimd_mips_dspr2_asm.h b/simd/jsimd_mips_dspr2_asm.h deleted file mode 100644 index 499e34b..0000000 --- a/simd/jsimd_mips_dspr2_asm.h +++ /dev/null @@ -1,283 +0,0 @@ -/* - * MIPS DSPr2 optimizations for libjpeg-turbo - * - * Copyright (C) 2013, MIPS Technologies, Inc., California. - * All Rights Reserved. - * Authors: Teodora Novkovic (teodora.novkovic@imgtec.com) - * Darko Laus (darko.laus@imgtec.com) - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgment in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -#define zero $0 -#define AT $1 -#define v0 $2 -#define v1 $3 -#define a0 $4 -#define a1 $5 -#define a2 $6 -#define a3 $7 -#define t0 $8 -#define t1 $9 -#define t2 $10 -#define t3 $11 -#define t4 $12 -#define t5 $13 -#define t6 $14 -#define t7 $15 -#define s0 $16 -#define s1 $17 -#define s2 $18 -#define s3 $19 -#define s4 $20 -#define s5 $21 -#define s6 $22 -#define s7 $23 -#define t8 $24 -#define t9 $25 -#define k0 $26 -#define k1 $27 -#define gp $28 -#define sp $29 -#define fp $30 -#define s8 $30 -#define ra $31 - -#define f0 $f0 -#define f1 $f1 -#define f2 $f2 -#define f3 $f3 -#define f4 $f4 -#define f5 $f5 -#define f6 $f6 -#define f7 $f7 -#define f8 $f8 -#define f9 $f9 -#define f10 $f10 -#define f11 $f11 -#define f12 $f12 -#define f13 $f13 -#define f14 $f14 -#define f15 $f15 -#define f16 $f16 -#define f17 $f17 -#define f18 $f18 -#define f19 $f19 -#define f20 $f20 -#define f21 $f21 -#define f22 $f22 -#define f23 $f23 -#define f24 $f24 -#define f25 $f25 -#define f26 $f26 -#define f27 $f27 -#define f28 $f28 -#define f29 $f29 -#define f30 $f30 -#define f31 $f31 - -/* - * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2 - */ -#define LEAF_MIPS32R2(symbol) \ - .globl symbol; \ - .align 2; \ - .type symbol, @function; \ - .ent symbol, 0; \ -symbol: .frame sp, 0, ra; \ - .set push; \ - .set arch=mips32r2; \ - .set noreorder; \ - .set noat; - -/* - * LEAF_MIPS_DSPR2 - declare leaf routine for MIPS DSPr2 - */ -#define LEAF_MIPS_DSPR2(symbol) \ -LEAF_MIPS32R2(symbol) \ - .set dspr2; - -/* - * END - mark end of function - */ -#define END(function) \ - .set pop; \ - .end function; \ - .size function,.-function - -/* - * Checks if stack offset is big enough for storing/restoring regs_num - * number of register to/from stack. Stack offset must be greater than - * or equal to the number of bytes needed for storing registers (regs_num*4). - * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is - * preserved for input arguments of the functions, already stored in a0-a3), - * stack size can be further optimized by utilizing this space. - */ -.macro CHECK_STACK_OFFSET regs_num, stack_offset -.if \stack_offset < \regs_num * 4 - 16 -.error "Stack offset too small." -.endif -.endm - -/* - * Saves set of registers on stack. Maximum number of registers that - * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). - * Stack offset is number of bytes that are added to stack pointer (sp) - * before registers are pushed in order to provide enough space on stack - * (offset must be multiple of 4, and must be big enough, as described by - * CHECK_STACK_OFFSET macro). This macro is intended to be used in - * combination with RESTORE_REGS_FROM_STACK macro. Example: - * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 - * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 - */ -.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \ - r2 = 0, r3 = 0, r4 = 0, \ - r5 = 0, r6 = 0, r7 = 0, \ - r8 = 0, r9 = 0, r10 = 0, \ - r11 = 0, r12 = 0, r13 = 0, \ - r14 = 0 - .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4) - .error "Stack offset must be pozitive and multiple of 4." - .endif - .if \stack_offset != 0 - addiu sp, sp, -\stack_offset - .endif - sw \r1, 0(sp) - .if \r2 != 0 - sw \r2, 4(sp) - .endif - .if \r3 != 0 - sw \r3, 8(sp) - .endif - .if \r4 != 0 - sw \r4, 12(sp) - .endif - .if \r5 != 0 - CHECK_STACK_OFFSET 5, \stack_offset - sw \r5, 16(sp) - .endif - .if \r6 != 0 - CHECK_STACK_OFFSET 6, \stack_offset - sw \r6, 20(sp) - .endif - .if \r7 != 0 - CHECK_STACK_OFFSET 7, \stack_offset - sw \r7, 24(sp) - .endif - .if \r8 != 0 - CHECK_STACK_OFFSET 8, \stack_offset - sw \r8, 28(sp) - .endif - .if \r9 != 0 - CHECK_STACK_OFFSET 9, \stack_offset - sw \r9, 32(sp) - .endif - .if \r10 != 0 - CHECK_STACK_OFFSET 10, \stack_offset - sw \r10, 36(sp) - .endif - .if \r11 != 0 - CHECK_STACK_OFFSET 11, \stack_offset - sw \r11, 40(sp) - .endif - .if \r12 != 0 - CHECK_STACK_OFFSET 12, \stack_offset - sw \r12, 44(sp) - .endif - .if \r13 != 0 - CHECK_STACK_OFFSET 13, \stack_offset - sw \r13, 48(sp) - .endif - .if \r14 != 0 - CHECK_STACK_OFFSET 14, \stack_offset - sw \r14, 52(sp) - .endif -.endm - -/* - * Restores set of registers from stack. Maximum number of registers that - * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). - * Stack offset is number of bytes that are added to stack pointer (sp) - * after registers are restored (offset must be multiple of 4, and must - * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is - * intended to be used in combination with RESTORE_REGS_FROM_STACK macro. - * Example: - * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 - * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 - */ -.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \ - r2 = 0, r3 = 0, r4 = 0, \ - r5 = 0, r6 = 0, r7 = 0, \ - r8 = 0, r9 = 0, r10 = 0, \ - r11 = 0, r12 = 0, r13 = 0, \ - r14 = 0 - .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4) - .error "Stack offset must be pozitive and multiple of 4." - .endif - lw \r1, 0(sp) - .if \r2 != 0 - lw \r2, 4(sp) - .endif - .if \r3 != 0 - lw \r3, 8(sp) - .endif - .if \r4 != 0 - lw \r4, 12(sp) - .endif - .if \r5 != 0 - CHECK_STACK_OFFSET 5, \stack_offset - lw \r5, 16(sp) - .endif - .if \r6 != 0 - CHECK_STACK_OFFSET 6, \stack_offset - lw \r6, 20(sp) - .endif - .if \r7 != 0 - CHECK_STACK_OFFSET 7, \stack_offset - lw \r7, 24(sp) - .endif - .if \r8 != 0 - CHECK_STACK_OFFSET 8, \stack_offset - lw \r8, 28(sp) - .endif - .if \r9 != 0 - CHECK_STACK_OFFSET 9, \stack_offset - lw \r9, 32(sp) - .endif - .if \r10 != 0 - CHECK_STACK_OFFSET 10, \stack_offset - lw \r10, 36(sp) - .endif - .if \r11 != 0 - CHECK_STACK_OFFSET 11, \stack_offset - lw \r11, 40(sp) - .endif - .if \r12 != 0 - CHECK_STACK_OFFSET 12, \stack_offset - lw \r12, 44(sp) - .endif - .if \r13 != 0 - CHECK_STACK_OFFSET 13, \stack_offset - lw \r13, 48(sp) - .endif - .if \r14 != 0 - CHECK_STACK_OFFSET 14, \stack_offset - lw \r14, 52(sp) - .endif - .if \stack_offset != 0 - addiu sp, sp, \stack_offset - .endif -.endm diff --git a/simd/jsimd_x86_64.c b/simd/jsimd_x86_64.c deleted file mode 100644 index a62bcdb..0000000 --- a/simd/jsimd_x86_64.c +++ /dev/null @@ -1,887 +0,0 @@ -/* - * jsimd_x86_64.c - * - * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2009-2011, 2014, 2016, D. R. Commander. - * Copyright (C) 2015, Matthieu Darbois. - * - * Based on the x86 SIMD extension for IJG JPEG library, - * Copyright (C) 1999-2006, MIYASAKA Masaru. - * For conditions of distribution and use, see copyright notice in jsimdext.inc - * - * This file contains the interface between the "normal" portions - * of the library and the SIMD implementations when running on a - * 64-bit x86 architecture. - */ - -#define JPEG_INTERNALS -#include "../jinclude.h" -#include "../jpeglib.h" -#include "../jsimd.h" -#include "../jdct.h" -#include "../jsimddct.h" -#include "jsimd.h" - -/* - * In the PIC cases, we have no guarantee that constants will keep - * their alignment. This macro allows us to verify it at runtime. - */ -#define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0) - -#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ - -static unsigned int simd_support = ~0; -static unsigned int simd_huffman = 1; - -/* - * Check what SIMD accelerations are supported. - * - * FIXME: This code is racy under a multi-threaded environment. - */ -LOCAL(void) -init_simd (void) -{ - char *env = NULL; - - if (simd_support != ~0U) - return; - - simd_support = JSIMD_SSE2 | JSIMD_SSE; - - /* Force different settings through environment variables */ - env = getenv("JSIMD_FORCENONE"); - if ((env != NULL) && (strcmp(env, "1") == 0)) - simd_support = 0; - env = getenv("JSIMD_NOHUFFENC"); - if ((env != NULL) && (strcmp(env, "1") == 0)) - simd_huffman = 0; -} - -GLOBAL(int) -jsimd_can_rgb_ycc (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) - return 0; - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_rgb_gray (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) - return 0; - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_ycc_rgb (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) - return 0; - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_ycc_rgb565 (void) -{ - return 0; -} - -GLOBAL(void) -jsimd_rgb_ycc_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) -{ - void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); - - switch(cinfo->in_color_space) { - case JCS_EXT_RGB: - sse2fct=jsimd_extrgb_ycc_convert_sse2; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - sse2fct=jsimd_extrgbx_ycc_convert_sse2; - break; - case JCS_EXT_BGR: - sse2fct=jsimd_extbgr_ycc_convert_sse2; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - sse2fct=jsimd_extbgrx_ycc_convert_sse2; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - sse2fct=jsimd_extxbgr_ycc_convert_sse2; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - sse2fct=jsimd_extxrgb_ycc_convert_sse2; - break; - default: - sse2fct=jsimd_rgb_ycc_convert_sse2; - break; - } - - sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); -} - -GLOBAL(void) -jsimd_rgb_gray_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) -{ - void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); - - switch(cinfo->in_color_space) { - case JCS_EXT_RGB: - sse2fct=jsimd_extrgb_gray_convert_sse2; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - sse2fct=jsimd_extrgbx_gray_convert_sse2; - break; - case JCS_EXT_BGR: - sse2fct=jsimd_extbgr_gray_convert_sse2; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - sse2fct=jsimd_extbgrx_gray_convert_sse2; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - sse2fct=jsimd_extxbgr_gray_convert_sse2; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - sse2fct=jsimd_extxrgb_gray_convert_sse2; - break; - default: - sse2fct=jsimd_rgb_gray_convert_sse2; - break; - } - - sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); -} - -GLOBAL(void) -jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) -{ - void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); - - switch(cinfo->out_color_space) { - case JCS_EXT_RGB: - sse2fct=jsimd_ycc_extrgb_convert_sse2; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - sse2fct=jsimd_ycc_extrgbx_convert_sse2; - break; - case JCS_EXT_BGR: - sse2fct=jsimd_ycc_extbgr_convert_sse2; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - sse2fct=jsimd_ycc_extbgrx_convert_sse2; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - sse2fct=jsimd_ycc_extxbgr_convert_sse2; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - sse2fct=jsimd_ycc_extxrgb_convert_sse2; - break; - default: - sse2fct=jsimd_ycc_rgb_convert_sse2; - break; - } - - sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); -} - -GLOBAL(void) -jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) -{ -} - -GLOBAL(int) -jsimd_can_h2v2_downsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_SSE2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_h2v1_downsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_SSE2) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) -{ - jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, - compptr->v_samp_factor, compptr->width_in_blocks, - input_data, output_data); -} - -GLOBAL(void) -jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) -{ - jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, - compptr->v_samp_factor, compptr->width_in_blocks, - input_data, output_data); -} - -GLOBAL(int) -jsimd_can_h2v2_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_SSE2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_h2v1_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if (simd_support & JSIMD_SSE2) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_h2v2_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) -{ - jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, - input_data, output_data_ptr); -} - -GLOBAL(void) -jsimd_h2v1_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) -{ - jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, - input_data, output_data_ptr); -} - -GLOBAL(int) -jsimd_can_h2v2_fancy_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_h2v1_fancy_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) -{ - jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, - compptr->downsampled_width, input_data, - output_data_ptr); -} - -GLOBAL(void) -jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) -{ - jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, - compptr->downsampled_width, input_data, - output_data_ptr); -} - -GLOBAL(int) -jsimd_can_h2v2_merged_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_h2v1_merged_upsample (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - - if ((simd_support & JSIMD_SSE2) && - IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) -{ - void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); - - switch(cinfo->out_color_space) { - case JCS_EXT_RGB: - sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; - break; - case JCS_EXT_BGR: - sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2; - break; - default: - sse2fct=jsimd_h2v2_merged_upsample_sse2; - break; - } - - sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); -} - -GLOBAL(void) -jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) -{ - void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); - - switch(cinfo->out_color_space) { - case JCS_EXT_RGB: - sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; - break; - case JCS_EXT_BGR: - sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; - break; - default: - sse2fct=jsimd_h2v1_merged_upsample_sse2; - break; - } - - sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); -} - -GLOBAL(int) -jsimd_can_convsamp (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(DCTELEM) != 2) - return 0; - - if (simd_support & JSIMD_SSE2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_convsamp_float (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(FAST_FLOAT) != 4) - return 0; - - if (simd_support & JSIMD_SSE2) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, - DCTELEM *workspace) -{ - jsimd_convsamp_sse2(sample_data, start_col, workspace); -} - -GLOBAL(void) -jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, - FAST_FLOAT *workspace) -{ - jsimd_convsamp_float_sse2(sample_data, start_col, workspace); -} - -GLOBAL(int) -jsimd_can_fdct_islow (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(DCTELEM) != 2) - return 0; - - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_fdct_ifast (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(DCTELEM) != 2) - return 0; - - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_fdct_float (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(FAST_FLOAT) != 4) - return 0; - - if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_fdct_islow (DCTELEM *data) -{ - jsimd_fdct_islow_sse2(data); -} - -GLOBAL(void) -jsimd_fdct_ifast (DCTELEM *data) -{ - jsimd_fdct_ifast_sse2(data); -} - -GLOBAL(void) -jsimd_fdct_float (FAST_FLOAT *data) -{ - jsimd_fdct_float_sse(data); -} - -GLOBAL(int) -jsimd_can_quantize (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (sizeof(DCTELEM) != 2) - return 0; - - if (simd_support & JSIMD_SSE2) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_quantize_float (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (sizeof(FAST_FLOAT) != 4) - return 0; - - if (simd_support & JSIMD_SSE2) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, - DCTELEM *workspace) -{ - jsimd_quantize_sse2(coef_block, divisors, workspace); -} - -GLOBAL(void) -jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, - FAST_FLOAT *workspace) -{ - jsimd_quantize_float_sse2(coef_block, divisors, workspace); -} - -GLOBAL(int) -jsimd_can_idct_2x2 (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(ISLOW_MULT_TYPE) != 2) - return 0; - - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_idct_4x4 (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(ISLOW_MULT_TYPE) != 2) - return 0; - - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) -{ - jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); -} - -GLOBAL(void) -jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) -{ - jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); -} - -GLOBAL(int) -jsimd_can_idct_islow (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(ISLOW_MULT_TYPE) != 2) - return 0; - - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_idct_ifast (void) -{ - init_simd(); - - /* The code is optimised for these values only */ - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(IFAST_MULT_TYPE) != 2) - return 0; - if (IFAST_SCALE_BITS != 2) - return 0; - - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) - return 1; - - return 0; -} - -GLOBAL(int) -jsimd_can_idct_float (void) -{ - init_simd(); - - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - if (BITS_IN_JSAMPLE != 8) - return 0; - if (sizeof(JDIMENSION) != 4) - return 0; - if (sizeof(FAST_FLOAT) != 4) - return 0; - if (sizeof(FLOAT_MULT_TYPE) != 4) - return 0; - - if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) - return 1; - - return 0; -} - -GLOBAL(void) -jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) -{ - jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, - output_col); -} - -GLOBAL(void) -jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) -{ - jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, - output_col); -} - -GLOBAL(void) -jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) -{ - jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, - output_col); -} - -GLOBAL(int) -jsimd_can_huff_encode_one_block (void) -{ - init_simd(); - - if (DCTSIZE != 8) - return 0; - if (sizeof(JCOEF) != 2) - return 0; - - if ((simd_support & JSIMD_SSE2) && simd_huffman && - IS_ALIGNED_SSE(jconst_huff_encode_one_block)) - return 1; - - return 0; -} - -GLOBAL(JOCTET*) -jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, - int last_dc_val, c_derived_tbl *dctbl, - c_derived_tbl *actbl) -{ - return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val, - dctbl, actbl); -} diff --git a/simd/jsimdcfg.inc.h b/simd/jsimdcfg.inc.h deleted file mode 100644 index d2b499f..0000000 --- a/simd/jsimdcfg.inc.h +++ /dev/null @@ -1,130 +0,0 @@ -// This file generates the include file for the assembly -// implementations by abusing the C preprocessor. -// -// Note: Some things are manually defined as they need to -// be mapped to NASM types. - -; -; Automatically generated include file from jsimdcfg.inc.h -; - -#define JPEG_INTERNALS - -#include "../jpeglib.h" -#include "../jconfig.h" -#include "../jmorecfg.h" -#include "jsimd.h" - -; -; -- jpeglib.h -; - -%define _cpp_protection_DCTSIZE DCTSIZE -%define _cpp_protection_DCTSIZE2 DCTSIZE2 - -; -; -- jmorecfg.h -; - -%define _cpp_protection_RGB_RED RGB_RED -%define _cpp_protection_RGB_GREEN RGB_GREEN -%define _cpp_protection_RGB_BLUE RGB_BLUE -%define _cpp_protection_RGB_PIXELSIZE RGB_PIXELSIZE - -%define _cpp_protection_EXT_RGB_RED EXT_RGB_RED -%define _cpp_protection_EXT_RGB_GREEN EXT_RGB_GREEN -%define _cpp_protection_EXT_RGB_BLUE EXT_RGB_BLUE -%define _cpp_protection_EXT_RGB_PIXELSIZE EXT_RGB_PIXELSIZE - -%define _cpp_protection_EXT_RGBX_RED EXT_RGBX_RED -%define _cpp_protection_EXT_RGBX_GREEN EXT_RGBX_GREEN -%define _cpp_protection_EXT_RGBX_BLUE EXT_RGBX_BLUE -%define _cpp_protection_EXT_RGBX_PIXELSIZE EXT_RGBX_PIXELSIZE - -%define _cpp_protection_EXT_BGR_RED EXT_BGR_RED -%define _cpp_protection_EXT_BGR_GREEN EXT_BGR_GREEN -%define _cpp_protection_EXT_BGR_BLUE EXT_BGR_BLUE -%define _cpp_protection_EXT_BGR_PIXELSIZE EXT_BGR_PIXELSIZE - -%define _cpp_protection_EXT_BGRX_RED EXT_BGRX_RED -%define _cpp_protection_EXT_BGRX_GREEN EXT_BGRX_GREEN -%define _cpp_protection_EXT_BGRX_BLUE EXT_BGRX_BLUE -%define _cpp_protection_EXT_BGRX_PIXELSIZE EXT_BGRX_PIXELSIZE - -%define _cpp_protection_EXT_XBGR_RED EXT_XBGR_RED -%define _cpp_protection_EXT_XBGR_GREEN EXT_XBGR_GREEN -%define _cpp_protection_EXT_XBGR_BLUE EXT_XBGR_BLUE -%define _cpp_protection_EXT_XBGR_PIXELSIZE EXT_XBGR_PIXELSIZE - -%define _cpp_protection_EXT_XRGB_RED EXT_XRGB_RED -%define _cpp_protection_EXT_XRGB_GREEN EXT_XRGB_GREEN -%define _cpp_protection_EXT_XRGB_BLUE EXT_XRGB_BLUE -%define _cpp_protection_EXT_XRGB_PIXELSIZE EXT_XRGB_PIXELSIZE - -%define RGBX_FILLER_0XFF 1 - -; Representation of a single sample (pixel element value). -; On this SIMD implementation, this must be 'unsigned char'. -; - -%define JSAMPLE byte ; unsigned char -%define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE) - -%define _cpp_protection_CENTERJSAMPLE CENTERJSAMPLE - -; Representation of a DCT frequency coefficient. -; On this SIMD implementation, this must be 'short'. -; -%define JCOEF word ; short -%define SIZEOF_JCOEF SIZEOF_WORD ; sizeof(JCOEF) - -; Datatype used for image dimensions. -; On this SIMD implementation, this must be 'unsigned int'. -; -%define JDIMENSION dword ; unsigned int -%define SIZEOF_JDIMENSION SIZEOF_DWORD ; sizeof(JDIMENSION) - -%define JSAMPROW POINTER ; JSAMPLE * (jpeglib.h) -%define JSAMPARRAY POINTER ; JSAMPROW * (jpeglib.h) -%define JSAMPIMAGE POINTER ; JSAMPARRAY * (jpeglib.h) -%define JCOEFPTR POINTER ; JCOEF * (jpeglib.h) -%define SIZEOF_JSAMPROW SIZEOF_POINTER ; sizeof(JSAMPROW) -%define SIZEOF_JSAMPARRAY SIZEOF_POINTER ; sizeof(JSAMPARRAY) -%define SIZEOF_JSAMPIMAGE SIZEOF_POINTER ; sizeof(JSAMPIMAGE) -%define SIZEOF_JCOEFPTR SIZEOF_POINTER ; sizeof(JCOEFPTR) - -; -; -- jdct.h -; - -; A forward DCT routine is given a pointer to a work area of type DCTELEM[]; -; the DCT is to be performed in-place in that buffer. -; To maximize parallelism, Type DCTELEM is changed to short (originally, int). -; -%define DCTELEM word ; short -%define SIZEOF_DCTELEM SIZEOF_WORD ; sizeof(DCTELEM) - -%define FAST_FLOAT FP32 ; float -%define SIZEOF_FAST_FLOAT SIZEOF_FP32 ; sizeof(FAST_FLOAT) - -; To maximize parallelism, Type MULTIPLIER is changed to short. -; -%define ISLOW_MULT_TYPE word ; must be short -%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_WORD ; sizeof(ISLOW_MULT_TYPE) - -%define IFAST_MULT_TYPE word ; must be short -%define SIZEOF_IFAST_MULT_TYPE SIZEOF_WORD ; sizeof(IFAST_MULT_TYPE) -%define IFAST_SCALE_BITS 2 ; fractional bits in scale factors - -%define FLOAT_MULT_TYPE FP32 ; must be float -%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FP32 ; sizeof(FLOAT_MULT_TYPE) - -; -; -- jsimd.h -; - -%define _cpp_protection_JSIMD_NONE JSIMD_NONE -%define _cpp_protection_JSIMD_MMX JSIMD_MMX -%define _cpp_protection_JSIMD_3DNOW JSIMD_3DNOW -%define _cpp_protection_JSIMD_SSE JSIMD_SSE -%define _cpp_protection_JSIMD_SSE2 JSIMD_SSE2 diff --git a/simd/jsimdcpu.asm b/simd/jsimdcpu.asm deleted file mode 100644 index 599083b..0000000 --- a/simd/jsimdcpu.asm +++ /dev/null @@ -1,104 +0,0 @@ -; -; jsimdcpu.asm - SIMD instruction support check -; -; Copyright 2009 Pierre Ossman for Cendio AB -; -; Based on the x86 SIMD extension for IJG JPEG library -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; For conditions of distribution and use, see copyright notice in jsimdext.inc -; -; This file should be assembled with NASM (Netwide Assembler), -; can *not* be assembled with Microsoft's MASM or any compatible -; assembler (including Borland's Turbo Assembler). -; NASM is available from http://nasm.sourceforge.net/ or -; http://sourceforge.net/project/showfiles.php?group_id=6208 -; -; [TAB8] - -%include "jsimdext.inc" - -; -------------------------------------------------------------------------- - SECTION SEG_TEXT - BITS 32 -; -; Check if the CPU supports SIMD instructions -; -; GLOBAL(unsigned int) -; jpeg_simd_cpu_support (void) -; - - align 16 - global EXTN(jpeg_simd_cpu_support) - -EXTN(jpeg_simd_cpu_support): - push ebx -; push ecx ; need not be preserved -; push edx ; need not be preserved -; push esi ; unused - push edi - - xor edi,edi ; simd support flag - - pushfd - pop eax - mov edx,eax - xor eax, 1<<21 ; flip ID bit in EFLAGS - push eax - popfd - pushfd - pop eax - xor eax,edx - jz short .return ; CPUID is not supported - - ; Check for MMX instruction support - xor eax,eax - cpuid - test eax,eax - jz short .return - - xor eax,eax - inc eax - cpuid - mov eax,edx ; eax = Standard feature flags - - test eax, 1<<23 ; bit23:MMX - jz short .no_mmx - or edi, byte JSIMD_MMX -.no_mmx: - test eax, 1<<25 ; bit25:SSE - jz short .no_sse - or edi, byte JSIMD_SSE -.no_sse: - test eax, 1<<26 ; bit26:SSE2 - jz short .no_sse2 - or edi, byte JSIMD_SSE2 -.no_sse2: - - ; Check for 3DNow! instruction support - mov eax, 0x80000000 - cpuid - cmp eax, 0x80000000 - jbe short .return - - mov eax, 0x80000001 - cpuid - mov eax,edx ; eax = Extended feature flags - - test eax, 1<<31 ; bit31:3DNow!(vendor independent) - jz short .no_3dnow - or edi, byte JSIMD_3DNOW -.no_3dnow: - -.return: - mov eax,edi - - pop edi -; pop esi ; unused -; pop edx ; need not be preserved -; pop ecx ; need not be preserved - pop ebx - ret - -; For some reason, the OS X linker does not honor the request to align the -; segment unless we do this. - align 16 diff --git a/simd/jsimdext.inc b/simd/jsimdext.inc deleted file mode 100644 index f28db60..0000000 --- a/simd/jsimdext.inc +++ /dev/null @@ -1,375 +0,0 @@ -; -; jsimdext.inc - common declarations -; -; Copyright 2009 Pierre Ossman for Cendio AB -; Copyright (C) 2010, D. R. Commander. -; -; Based on the x86 SIMD extension for IJG JPEG library - version 1.02 -; -; Copyright (C) 1999-2006, MIYASAKA Masaru. -; -; This software is provided 'as-is', without any express or implied -; warranty. In no event will the authors be held liable for any damages -; arising from the use of this software. -; -; Permission is granted to anyone to use this software for any purpose, -; including commercial applications, and to alter it and redistribute it -; freely, subject to the following restrictions: -; -; 1. The origin of this software must not be misrepresented; you must not -; claim that you wrote the original software. If you use this software -; in a product, an acknowledgment in the product documentation would be -; appreciated but is not required. -; 2. Altered source versions must be plainly marked as such, and must not be -; misrepresented as being the original software. -; 3. This notice may not be removed or altered from any source distribution. -; -; [TAB8] - -; ========================================================================== -; System-dependent configurations - -%ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)-------- -; * Microsoft Visual C++ -; * MinGW (Minimalist GNU for Windows) -; * CygWin -; * LCC-Win32 - -; -- segment definition -- -; -%ifdef __YASM_VER__ -%define SEG_TEXT .text align=16 -%define SEG_CONST .rdata align=16 -%else -%define SEG_TEXT .text align=16 public use32 class=CODE -%define SEG_CONST .rdata align=16 public use32 class=CONST -%endif - -%elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)-------- -; * Microsoft Visual C++ - -; -- segment definition -- -; -%ifdef __YASM_VER__ -%define SEG_TEXT .text align=16 -%define SEG_CONST .rdata align=16 -%else -%define SEG_TEXT .text align=16 public use64 class=CODE -%define SEG_CONST .rdata align=16 public use64 class=CONST -%endif -%define EXTN(name) name ; foo() -> foo - -%elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)---------- -; * Borland C++ (Win32) - -; -- segment definition -- -; -%define SEG_TEXT _text align=16 public use32 class=CODE -%define SEG_CONST _data align=16 public use32 class=DATA - -%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ -; * Linux -; * *BSD family Unix using elf format -; * Unix System V, including Solaris x86, UnixWare and SCO Unix - -; mark stack as non-executable -section .note.GNU-stack noalloc noexec nowrite progbits - -; -- segment definition -- -; -%ifdef __x86_64__ -%define SEG_TEXT .text progbits align=16 -%define SEG_CONST .rodata progbits align=16 -%else -%define SEG_TEXT .text progbits alloc exec nowrite align=16 -%define SEG_CONST .rodata progbits alloc noexec nowrite align=16 -%endif - -; To make the code position-independent, append -DPIC to the commandline -; -%define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC -%define EXTN(name) name ; foo() -> foo - -%elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)---- -; * Older Linux using a.out format (nasm -f aout -DAOUT ...) -; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...) - -; -- segment definition -- -; -%define SEG_TEXT .text -%define SEG_CONST .data - -; To make the code position-independent, append -DPIC to the commandline -; -%define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC - -%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- -; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format) - -; -- segment definition -- -; -%define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why? -%define SEG_CONST .rodata align=16 - -; The generation of position-independent code (PIC) is the default on Darwin. -; -%define PIC -%define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing - -%else ; ----(Other case)---------------------- - -; -- segment definition -- -; -%define SEG_TEXT .text -%define SEG_CONST .data - -%endif ; ---------------------------------------------- - -; ========================================================================== - -; -------------------------------------------------------------------------- -; Common types -; -%ifdef __x86_64__ -%define POINTER qword ; general pointer type -%define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER) -%define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT -%else -%define POINTER dword ; general pointer type -%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER) -%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT -%endif - -%define INT dword ; signed integer type -%define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT) -%define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT - -%define FP32 dword ; IEEE754 single -%define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32) -%define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT - -%define MMWORD qword ; int64 (MMX register) -%define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD) -%define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT - -; NASM is buggy and doesn't properly handle operand sizes for SSE -; instructions, so for now we have to define XMMWORD as blank. -%define XMMWORD ; int128 (SSE register) -%define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD) -%define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT - -; Similar hacks for when we load a dword or MMWORD into an xmm# register -%define XMM_DWORD -%define XMM_MMWORD - -%define SIZEOF_BYTE 1 ; sizeof(BYTE) -%define SIZEOF_WORD 2 ; sizeof(WORD) -%define SIZEOF_DWORD 4 ; sizeof(DWORD) -%define SIZEOF_QWORD 8 ; sizeof(QWORD) -%define SIZEOF_OWORD 16 ; sizeof(OWORD) - -%define BYTE_BIT 8 ; CHAR_BIT in C -%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT -%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT -%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT -%define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT - -; -------------------------------------------------------------------------- -; External Symbol Name -; -%ifndef EXTN -%define EXTN(name) _ %+ name ; foo() -> _foo -%endif - -; -------------------------------------------------------------------------- -; Macros for position-independent code (PIC) support -; -%ifndef GOT_SYMBOL -%undef PIC -%endif - -%ifdef PIC ; ------------------------------------------- - -%ifidn GOT_SYMBOL,_MACHO_PIC_ ; -------------------- - -; At present, nasm doesn't seem to support PIC generation for Mach-O. -; The PIC support code below is a little tricky. - - SECTION SEG_CONST -const_base: - -%define GOTOFF(got,sym) (got) + (sym) - const_base - -%imacro get_GOT 1 - ; NOTE: this macro destroys ecx resister. - call %%geteip - add ecx, byte (%%ref - $) - jmp short %%adjust -%%geteip: - mov ecx, POINTER [esp] - ret -%%adjust: - push ebp - xor ebp,ebp ; ebp = 0 -%ifidni %1,ebx ; (%1 == ebx) - ; db 0x8D,0x9C + jmp near const_base = - ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32) - db 0x8D,0x9C ; 8D,9C - jmp near const_base ; E9,(const_base-%%ref) -%%ref: -%else ; (%1 != ebx) - ; db 0x8D,0x8C + jmp near const_base = - ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32) - db 0x8D,0x8C ; 8D,8C - jmp near const_base ; E9,(const_base-%%ref) -%%ref: mov %1, ecx -%endif ; (%1 == ebx) - pop ebp -%endmacro - -%else ; GOT_SYMBOL != _MACHO_PIC_ ---------------- - -%define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff - -%imacro get_GOT 1 - extern GOT_SYMBOL - call %%geteip - add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc - jmp short %%done -%%geteip: - mov %1, POINTER [esp] - ret -%%done: -%endmacro - -%endif ; GOT_SYMBOL == _MACHO_PIC_ ---------------- - -%imacro pushpic 1.nolist - push %1 -%endmacro -%imacro poppic 1.nolist - pop %1 -%endmacro -%imacro movpic 2.nolist - mov %1,%2 -%endmacro - -%else ; !PIC ----------------------------------------- - -%define GOTOFF(got,sym) (sym) - -%imacro get_GOT 1.nolist -%endmacro -%imacro pushpic 1.nolist -%endmacro -%imacro poppic 1.nolist -%endmacro -%imacro movpic 2.nolist -%endmacro - -%endif ; PIC ----------------------------------------- - -; -------------------------------------------------------------------------- -; Align the next instruction on {2,4,8,16,..}-byte boundary. -; ".balign n,,m" in GNU as -; -%define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16) -%define FILLB(b,n) (($$-(b)) & ((n)-1)) - -%imacro alignx 1-2.nolist 0xFFFF -%%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \ - db 0x90 ; nop - times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \ - db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000] - times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \ - db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] - times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \ - db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] - times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \ - db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00] - times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \ - db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00] - times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \ - db 0x8B,0xED ; mov ebp,ebp - times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \ - db 0x90 ; nop -%endmacro - -; Align the next data on {2,4,8,16,..}-byte boundary. -; -%imacro alignz 1.nolist - align %1, db 0 ; filling zeros -%endmacro - -%ifdef __x86_64__ - -%ifdef WIN64 - -%imacro collect_args 0 - push r12 - push r13 - push r14 - push r15 - mov r10, rcx - mov r11, rdx - mov r12, r8 - mov r13, r9 - mov r14, [rax+48] - mov r15, [rax+56] - push rsi - push rdi - sub rsp, SIZEOF_XMMWORD - movaps XMMWORD [rsp], xmm6 - sub rsp, SIZEOF_XMMWORD - movaps XMMWORD [rsp], xmm7 -%endmacro - -%imacro uncollect_args 0 - movaps xmm7, XMMWORD [rsp] - add rsp, SIZEOF_XMMWORD - movaps xmm6, XMMWORD [rsp] - add rsp, SIZEOF_XMMWORD - pop rdi - pop rsi - pop r15 - pop r14 - pop r13 - pop r12 -%endmacro - -%else - -%imacro collect_args 0 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - mov r10, rdi - mov r11, rsi - mov r12, rdx - mov r13, rcx - mov r14, r8 - mov r15, r9 -%endmacro - -%imacro uncollect_args 0 - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 -%endmacro - -%endif - -%endif - -; -------------------------------------------------------------------------- -; Defines picked up from the C headers -; -%include "jsimdcfg.inc" - -; -------------------------------------------------------------------------- diff --git a/simd/loongson/jccolext-mmi.c b/simd/loongson/jccolext-mmi.c new file mode 100644 index 0000000..e1c4e69 --- /dev/null +++ b/simd/loongson/jccolext-mmi.c @@ -0,0 +1,469 @@ +/* + * Loongson MMI optimizations for libjpeg-turbo + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright (C) 2014-2015, D. R. Commander. All Rights Reserved. + * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. + * All Rights Reserved. + * Authors: ZhuChen + * SunZhangzhi + * CaiWanwei + * + * Based on the x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* This file is included by jccolor-mmi.c */ + + +#if RGB_RED == 0 +#define mmA mm0 +#define mmB mm1 +#elif RGB_GREEN == 0 +#define mmA mm2 +#define mmB mm3 +#elif RGB_BLUE == 0 +#define mmA mm4 +#define mmB mm5 +#else +#define mmA mm6 +#define mmB mm7 +#endif + +#if RGB_RED == 1 +#define mmC mm0 +#define mmD mm1 +#elif RGB_GREEN == 1 +#define mmC mm2 +#define mmD mm3 +#elif RGB_BLUE == 1 +#define mmC mm4 +#define mmD mm5 +#else +#define mmC mm6 +#define mmD mm7 +#endif + +#if RGB_RED == 2 +#define mmE mm0 +#define mmF mm1 +#elif RGB_GREEN == 2 +#define mmE mm2 +#define mmF mm3 +#elif RGB_BLUE == 2 +#define mmE mm4 +#define mmF mm5 +#else +#define mmE mm6 +#define mmF mm7 +#endif + +#if RGB_RED == 3 +#define mmG mm0 +#define mmH mm1 +#elif RGB_GREEN == 3 +#define mmG mm2 +#define mmH mm3 +#elif RGB_BLUE == 3 +#define mmG mm4 +#define mmH mm5 +#else +#define mmG mm6 +#define mmH mm7 +#endif + + +void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) +{ + JSAMPROW inptr, outptr0, outptr1, outptr2; + int num_cols, col; + __m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7; + __m64 wk[7]; + __m64 Y_BG, Cb_RG, Cr_BG; + + while (--num_rows >= 0) { + inptr = *input_buf++; + outptr0 = output_buf[0][output_row]; + outptr1 = output_buf[1][output_row]; + outptr2 = output_buf[2][output_row]; + output_row++; + + for (num_cols = image_width; num_cols > 0; num_cols -= 8, + outptr0 += 8, outptr1 += 8, outptr2 += 8) { + +#if RGB_PIXELSIZE == 3 + + if (num_cols < 8) { + col = num_cols * 3; + asm(".set noreorder\r\n" + + "li $8, 1\r\n" + "move $9, %3\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 1f\r\n" + "nop \r\n" + "subu $9, $9, 1\r\n" + "xor $12, $12, $12\r\n" + "move $13, %5\r\n" + "dadd $13, $13, $9\r\n" + "lbu $12, 0($13)\r\n" + + "1: \r\n" + "li $8, 2\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 2f\r\n" + "nop \r\n" + "subu $9, $9, 2\r\n" + "xor $11, $11, $11\r\n" + "move $13, %5\r\n" + "dadd $13, $13, $9\r\n" + "lhu $11, 0($13)\r\n" + "sll $12, $12, 16\r\n" + "or $12, $12, $11\r\n" + + "2: \r\n" + "dmtc1 $12, %0\r\n" + "li $8, 4\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 3f\r\n" + "nop \r\n" + "subu $9, $9, 4\r\n" + "move $13, %5\r\n" + "dadd $13, $13, $9\r\n" + "lwu $14, 0($13)\r\n" + "dmtc1 $14, %1\r\n" + "dsll32 $12, $12, 0\r\n" + "or $12, $12, $14\r\n" + "dmtc1 $12, %0\r\n" + + "3: \r\n" + "li $8, 8\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 4f\r\n" + "nop \r\n" + "mov.s %1, %0\r\n" + "ldc1 %0, 0(%5)\r\n" + "li $9, 8\r\n" + "j 5f\r\n" + "nop \r\n" + + "4: \r\n" + "li $8, 16\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 5f\r\n" + "nop \r\n" + "mov.s %2, %0\r\n" + "ldc1 %0, 0(%5)\r\n" + "ldc1 %1, 8(%5)\r\n" + + "5: \r\n" + "nop \r\n" + ".set reorder\r\n" + + : "=f" (mmA), "=f" (mmG), "=f" (mmF) + : "r" (col), "r" (num_rows), "r" (inptr) + : "$f0", "$f2", "$f4", "$8", "$9", "$10", "$11", "$12", "$13", + "$14", "memory" + ); + } else { + mmA = _mm_load_si64((__m64 *)&inptr[0]); + mmG = _mm_load_si64((__m64 *)&inptr[8]); + mmF = _mm_load_si64((__m64 *)&inptr[16]); + inptr += RGB_PIXELSIZE * 8; + } + mmD = mmA; + mmA = _mm_slli_si64(mmA, 4 * BYTE_BIT); + mmD = _mm_srli_si64(mmD, 4 * BYTE_BIT); + + mmA = _mm_unpackhi_pi8(mmA, mmG); + mmG = _mm_slli_si64(mmG, 4 * BYTE_BIT); + + mmD = _mm_unpacklo_pi8(mmD, mmF); + mmG = _mm_unpackhi_pi8(mmG, mmF); + + mmE = mmA; + mmA = _mm_slli_si64(mmA, 4 * BYTE_BIT); + mmE = _mm_srli_si64(mmE, 4 * BYTE_BIT); + + mmA = _mm_unpackhi_pi8(mmA, mmD); + mmD = _mm_slli_si64(mmD, 4 * BYTE_BIT); + + mmE = _mm_unpacklo_pi8(mmE, mmG); + mmD = _mm_unpackhi_pi8(mmD, mmG); + mmC = mmA; + mmA = _mm_loadlo_pi8_f(mmA); + mmC = _mm_loadhi_pi8_f(mmC); + + mmB = mmE; + mmE = _mm_loadlo_pi8_f(mmE); + mmB = _mm_loadhi_pi8_f(mmB); + + mmF = mmD; + mmD = _mm_loadlo_pi8_f(mmD); + mmF = _mm_loadhi_pi8_f(mmF); + +#else /* RGB_PIXELSIZE == 4 */ + + if (num_cols < 8) { + col = num_cols; + asm(".set noreorder\r\n" + + "li $8, 1\r\n" + "move $9, %4\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 1f\r\n" + "nop \r\n" + "subu $9, $9, 1\r\n" + "dsll $11, $9, 2\r\n" + "move $13, %5\r\n" + "daddu $13, $13, $11\r\n" + "lwc1 %0, 0($13)\r\n" + + "1: \r\n" + "li $8, 2\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 2f\r\n" + "nop \r\n" + "subu $9, $9, 2\r\n" + "dsll $11, $9, 2\r\n" + "move $13, %5\r\n" + "daddu $13, $13, $11\r\n" + "mov.s %1, %0\r\n" + "ldc1 %0, 0($13)\r\n" + + "2: \r\n" + "li $8, 4\r\n" + "and $10, $9, $8\r\n" + "beqz $10, 3f\r\n" + "nop \r\n" + "mov.s %2, %0\r\n" + "mov.s %3, %1\r\n" + "ldc1 %0, 0(%5)\r\n" + "ldc1 %1, 8(%5)\r\n" + + "3: \r\n" + "nop \r\n" + ".set reorder\r\n" + + : "=f" (mmA), "=f" (mmF), "=f" (mmD), "=f" (mmC) + : "r" (col), "r" (inptr) + : "$f0", "$f2", "$8", "$9", "$10", "$11", "$13", "memory" + ); + } else { + mmA = _mm_load_si64((__m64 *)&inptr[0]); + mmF = _mm_load_si64((__m64 *)&inptr[8]); + mmD = _mm_load_si64((__m64 *)&inptr[16]); + mmC = _mm_load_si64((__m64 *)&inptr[24]); + inptr += RGB_PIXELSIZE * 8; + } + mmB = mmA; + mmA = _mm_unpacklo_pi8(mmA, mmF); + mmB = _mm_unpackhi_pi8(mmB, mmF); + + mmG = mmD; + mmD = _mm_unpacklo_pi8(mmD, mmC); + mmG = _mm_unpackhi_pi8(mmG, mmC); + + mmE = mmA; + mmA = _mm_unpacklo_pi16(mmA, mmD); + mmE = _mm_unpackhi_pi16(mmE, mmD); + + mmH = mmB; + mmB = _mm_unpacklo_pi16(mmB, mmG); + mmH = _mm_unpackhi_pi16(mmH, mmG); + + mmC = mmA; + mmA = _mm_loadlo_pi8_f(mmA); + mmC = _mm_loadhi_pi8_f(mmC); + + mmD = mmB; + mmB = _mm_loadlo_pi8_f(mmB); + mmD = _mm_loadhi_pi8_f(mmD); + + mmG = mmE; + mmE = _mm_loadlo_pi8_f(mmE); + mmG = _mm_loadhi_pi8_f(mmG); + + mmF = mmH; + mmF = _mm_unpacklo_pi8(mmF, mmH); + mmH = _mm_unpackhi_pi8(mmH, mmH); + mmF = _mm_srli_pi16(mmF, BYTE_BIT); + mmH = _mm_srli_pi16(mmH, BYTE_BIT); + +#endif + + wk[0] = mm0; + wk[1] = mm1; + wk[2] = mm4; + wk[3] = mm5; + + mm6 = mm1; + mm1 = _mm_unpacklo_pi16(mm1, mm3); + mm6 = _mm_unpackhi_pi16(mm6, mm3); + mm7 = mm1; + mm4 = mm6; + mm1 = _mm_madd_pi16(mm1, PW_F0299_F0337); + mm6 = _mm_madd_pi16(mm6, PW_F0299_F0337); + mm7 = _mm_madd_pi16(mm7, PW_MF016_MF033); + mm4 = _mm_madd_pi16(mm4, PW_MF016_MF033); + + wk[4] = mm1; + wk[5] = mm6; + + mm1 = _mm_loadlo_pi16_f(mm5); + mm6 = _mm_loadhi_pi16_f(mm5); + mm1 = _mm_srli_pi32(mm1, 1); + mm6 = _mm_srli_pi32(mm6, 1); + + mm5 = PD_ONEHALFM1_CJ; + mm7 = _mm_add_pi32(mm7, mm1); + mm4 = _mm_add_pi32(mm4, mm6); + mm7 = _mm_add_pi32(mm7, mm5); + mm4 = _mm_add_pi32(mm4, mm5); + mm7 = _mm_srli_pi32(mm7, SCALEBITS); + mm4 = _mm_srli_pi32(mm4, SCALEBITS); + mm7 = _mm_packs_pi32(mm7, mm4); + + mm1 = wk[2]; + mm6 = mm0; + mm0 = _mm_unpacklo_pi16(mm0, mm2); + mm6 = _mm_unpackhi_pi16(mm6, mm2); + mm5 = mm0; + mm4 = mm6; + mm0 = _mm_madd_pi16(mm0, PW_F0299_F0337); + mm6 = _mm_madd_pi16(mm6, PW_F0299_F0337); + mm5 = _mm_madd_pi16(mm5, PW_MF016_MF033); + mm4 = _mm_madd_pi16(mm4, PW_MF016_MF033); + + wk[6] = mm0; + wk[7] = mm6; + mm0 = _mm_loadlo_pi16_f(mm1); + mm6 = _mm_loadhi_pi16_f(mm1); + mm0 = _mm_srli_pi32(mm0, 1); + mm6 = _mm_srli_pi32(mm6, 1); + + mm1 = PD_ONEHALFM1_CJ; + mm5 = _mm_add_pi32(mm5, mm0); + mm4 = _mm_add_pi32(mm4, mm6); + mm5 = _mm_add_pi32(mm5, mm1); + mm4 = _mm_add_pi32(mm4, mm1); + mm5 = _mm_srli_pi32(mm5, SCALEBITS); + mm4 = _mm_srli_pi32(mm4, SCALEBITS); + mm5 = _mm_packs_pi32(mm5, mm4); + + mm7 = _mm_slli_pi16(mm7, BYTE_BIT); + mm5 = _mm_or_si64(mm5, mm7); + Cb_RG = mm5; + + mm0 = wk[3]; + mm6 = wk[2]; + mm1 = wk[1]; + + mm4 = mm0; + mm0 = _mm_unpacklo_pi16(mm0, mm3); + mm4 = _mm_unpackhi_pi16(mm4, mm3); + mm7 = mm0; + mm5 = mm4; + mm0 = _mm_madd_pi16(mm0, PW_F0114_F0250); + mm4 = _mm_madd_pi16(mm4, PW_F0114_F0250); + mm7 = _mm_madd_pi16(mm7, PW_MF008_MF041); + mm5 = _mm_madd_pi16(mm5, PW_MF008_MF041); + + mm3 = PD_ONEHALF; + mm0 = _mm_add_pi32(mm0, wk[4]); + mm4 = _mm_add_pi32(mm4, wk[5]); + mm0 = _mm_add_pi32(mm0, mm3); + mm4 = _mm_add_pi32(mm4, mm3); + mm0 = _mm_srli_pi32(mm0, SCALEBITS); + mm4 = _mm_srli_pi32(mm4, SCALEBITS); + mm0 = _mm_packs_pi32(mm0, mm4); + + mm3 = _mm_loadlo_pi16_f(mm1); + mm4 = _mm_loadhi_pi16_f(mm1); + mm3 = _mm_srli_pi32(mm3, 1); + mm4 = _mm_srli_pi32(mm4, 1); + + mm1 = PD_ONEHALFM1_CJ; + mm7 = _mm_add_pi32(mm7, mm3); + mm5 = _mm_add_pi32(mm5, mm4); + mm7 = _mm_add_pi32(mm7, mm1); + mm5 = _mm_add_pi32(mm5, mm1); + mm7 = _mm_srli_pi32(mm7, SCALEBITS); + mm5 = _mm_srli_pi32(mm5, SCALEBITS); + mm7 = _mm_packs_pi32(mm7, mm5); + + mm3 = wk[0]; + mm4 = mm6; + mm6 = _mm_unpacklo_pi16(mm6, mm2); + mm4 = _mm_unpackhi_pi16(mm4, mm2); + mm1 = mm6; + mm5 = mm4; + mm6 = _mm_madd_pi16(mm6, PW_F0114_F0250); + mm4 = _mm_madd_pi16(mm4, PW_F0114_F0250); + mm1 = _mm_madd_pi16(mm1, PW_MF008_MF041); + mm5 = _mm_madd_pi16(mm5, PW_MF008_MF041); + + mm2 = PD_ONEHALF; + mm6 = _mm_add_pi32(mm6, wk[6]); + mm4 = _mm_add_pi32(mm4, wk[7]); + mm6 = _mm_add_pi32(mm6, mm2); + mm4 = _mm_add_pi32(mm4, mm2); + mm6 = _mm_srli_pi32(mm6, SCALEBITS); + mm4 = _mm_srli_pi32(mm4, SCALEBITS); + mm6 = _mm_packs_pi32(mm6, mm4); + + mm0 = _mm_slli_pi16(mm0, BYTE_BIT); + mm6 = _mm_or_si64(mm6, mm0); + Y_BG = mm6; + + mm2 = _mm_loadlo_pi16_f(mm3); + mm4 = _mm_loadhi_pi16_f(mm3); + mm2 = _mm_srli_pi32(mm2, 1); + mm4 = _mm_srli_pi32(mm4, 1); + + mm0 = PD_ONEHALFM1_CJ; + mm1 = _mm_add_pi32(mm1, mm2); + mm5 = _mm_add_pi32(mm5, mm4); + mm1 = _mm_add_pi32(mm1, mm0); + mm5 = _mm_add_pi32(mm5, mm0); + mm1 = _mm_srli_pi32(mm1, SCALEBITS); + mm5 = _mm_srli_pi32(mm5, SCALEBITS); + mm1 = _mm_packs_pi32(mm1, mm5); + + mm7 = _mm_slli_pi16(mm7, BYTE_BIT); + mm1 = _mm_or_si64(mm1, mm7); + Cr_BG = mm1; + + _mm_store_si64((__m64 *)&outptr0[0], Y_BG); + _mm_store_si64((__m64 *)&outptr1[0], Cb_RG); + _mm_store_si64((__m64 *)&outptr2[0], Cr_BG); + } + } +} + +#undef mmA +#undef mmB +#undef mmC +#undef mmD +#undef mmE +#undef mmF +#undef mmG +#undef mmH diff --git a/simd/loongson/jccolor-mmi.c b/simd/loongson/jccolor-mmi.c new file mode 100644 index 0000000..93ef5c7 --- /dev/null +++ b/simd/loongson/jccolor-mmi.c @@ -0,0 +1,148 @@ +/* + * Loongson MMI optimizations for libjpeg-turbo + * + * Copyright (C) 2011, 2014, D. R. Commander. All Rights Reserved. + * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. + * All Rights Reserved. + * Authors: ZhuChen + * CaiWanwei + * SunZhangzhi + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* RGB --> YCC CONVERSION */ + +#include "jsimd_mmi.h" + + +#define F_0_081 ((short)5329) /* FIX(0.08131) */ +#define F_0_114 ((short)7471) /* FIX(0.11400) */ +#define F_0_168 ((short)11059) /* FIX(0.16874) */ +#define F_0_250 ((short)16384) /* FIX(0.25000) */ +#define F_0_299 ((short)19595) /* FIX(0.29900) */ +#define F_0_331 ((short)21709) /* FIX(0.33126) */ +#define F_0_418 ((short)27439) /* FIX(0.41869) */ +#define F_0_587 ((short)38470) /* FIX(0.58700) */ +#define F_0_337 ((short)(F_0_587 - F_0_250)) /* FIX(0.58700) - FIX(0.25000) */ + +enum const_index { + index_PD_ONEHALF, + index_PW_F0299_F0337, + index_PW_F0114_F0250, + index_PW_MF016_MF033, + index_PW_MF008_MF041, + index_PD_ONEHALFM1_CJ +}; + +static uint64_t const_value[] = { + _uint64_set_pi32((int)(1 << (SCALEBITS - 1)), (int)(1 << (SCALEBITS - 1))), + _uint64_set_pi16(F_0_337, F_0_299, F_0_337, F_0_299), + _uint64_set_pi16(F_0_250, F_0_114, F_0_250, F_0_114), + _uint64_set_pi16(-F_0_331, -F_0_168, -F_0_331, -F_0_168), + _uint64_set_pi16(-F_0_418, -F_0_081, -F_0_418, -F_0_081), + _uint64_set_pi32(((1 << (SCALEBITS - 1)) - 1 + (CENTERJSAMPLE << SCALEBITS)), + ((1 << (SCALEBITS - 1)) - 1 + (CENTERJSAMPLE << SCALEBITS))) +}; + +#define get_const_value(index) (*(__m64 *)&const_value[index]) + +#define PD_ONEHALF get_const_value(index_PD_ONEHALF) +#define PW_F0299_F0337 get_const_value(index_PW_F0299_F0337) +#define PW_F0114_F0250 get_const_value(index_PW_F0114_F0250) +#define PW_MF016_MF033 get_const_value(index_PW_MF016_MF033) +#define PW_MF008_MF041 get_const_value(index_PW_MF008_MF041) +#define PD_ONEHALFM1_CJ get_const_value(index_PD_ONEHALFM1_CJ) + + +#include "jccolext-mmi.c" +#undef RGB_RED +#undef RGB_GREEN +#undef RGB_BLUE +#undef RGB_PIXELSIZE + +#define RGB_RED EXT_RGB_RED +#define RGB_GREEN EXT_RGB_GREEN +#define RGB_BLUE EXT_RGB_BLUE +#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +#define jsimd_rgb_ycc_convert_mmi jsimd_extrgb_ycc_convert_mmi +#include "jccolext-mmi.c" +#undef RGB_RED +#undef RGB_GREEN +#undef RGB_BLUE +#undef RGB_PIXELSIZE +#undef jsimd_rgb_ycc_convert_mmi + +#define RGB_RED EXT_RGBX_RED +#define RGB_GREEN EXT_RGBX_GREEN +#define RGB_BLUE EXT_RGBX_BLUE +#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +#define jsimd_rgb_ycc_convert_mmi jsimd_extrgbx_ycc_convert_mmi +#include "jccolext-mmi.c" +#undef RGB_RED +#undef RGB_GREEN +#undef RGB_BLUE +#undef RGB_PIXELSIZE +#undef jsimd_rgb_ycc_convert_mmi + +#define RGB_RED EXT_BGR_RED +#define RGB_GREEN EXT_BGR_GREEN +#define RGB_BLUE EXT_BGR_BLUE +#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +#define jsimd_rgb_ycc_convert_mmi jsimd_extbgr_ycc_convert_mmi +#include "jccolext-mmi.c" +#undef RGB_RED +#undef RGB_GREEN +#undef RGB_BLUE +#undef RGB_PIXELSIZE +#undef jsimd_rgb_ycc_convert_mmi + +#define RGB_RED EXT_BGRX_RED +#define RGB_GREEN EXT_BGRX_GREEN +#define RGB_BLUE EXT_BGRX_BLUE +#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +#define jsimd_rgb_ycc_convert_mmi jsimd_extbgrx_ycc_convert_mmi +#include "jccolext-mmi.c" +#undef RGB_RED +#undef RGB_GREEN +#undef RGB_BLUE +#undef RGB_PIXELSIZE +#undef jsimd_rgb_ycc_convert_mmi + +#define RGB_RED EXT_XBGR_RED +#define RGB_GREEN EXT_XBGR_GREEN +#define RGB_BLUE EXT_XBGR_BLUE +#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +#define jsimd_rgb_ycc_convert_mmi jsimd_extxbgr_ycc_convert_mmi +#include "jccolext-mmi.c" +#undef RGB_RED +#undef RGB_GREEN +#undef RGB_BLUE +#undef RGB_PIXELSIZE +#undef jsimd_rgb_ycc_convert_mmi + +#define RGB_RED EXT_XRGB_RED +#define RGB_GREEN EXT_XRGB_GREEN +#define RGB_BLUE EXT_XRGB_BLUE +#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +#define jsimd_rgb_ycc_convert_mmi jsimd_extxrgb_ycc_convert_mmi +#include "jccolext-mmi.c" +#undef RGB_RED +#undef RGB_GREEN +#undef RGB_BLUE +#undef RGB_PIXELSIZE +#undef jsimd_rgb_ycc_convert_mmi diff --git a/simd/loongson/jcsample-mmi.c b/simd/loongson/jcsample-mmi.c new file mode 100644 index 0000000..2f2d851 --- /dev/null +++ b/simd/loongson/jcsample-mmi.c @@ -0,0 +1,100 @@ +/* + * Loongson MMI optimizations for libjpeg-turbo + * + * Copyright (C) 2015, 2018, D. R. Commander. All Rights Reserved. + * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. + * All Rights Reserved. + * Authors: ZhuChen + * CaiWanwei + * SunZhangzhi + * + * Based on the x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* CHROMA DOWNSAMPLING */ + +#include "jsimd_mmi.h" +#include "jcsample.h" + + +void jsimd_h2v2_downsample_mmi(JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + int inrow, outrow, outcol, bias; + JDIMENSION output_cols = width_in_blocks * DCTSIZE; + JSAMPROW inptr0, inptr1, outptr; + __m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6 = 0.0, mm7; + + expand_right_edge(input_data, max_v_samp_factor, image_width, + output_cols * 2); + + bias = (1 << 17) + 1; /* 0x00020001 (bias pattern) */ + mm7 = _mm_set1_pi32(bias); /* mm7={1, 2, 1, 2} */ + mm6 = _mm_cmpeq_pi16(mm6, mm6); + mm6 = _mm_srli_pi16(mm6, BYTE_BIT); /* mm6={0xFF 0x00 0xFF 0x00 ..} */ + + for (inrow = 0, outrow = 0; outrow < v_samp_factor; + inrow += 2, outrow++) { + + inptr0 = input_data[inrow]; + inptr1 = input_data[inrow + 1]; + outptr = output_data[outrow]; + + for (outcol = output_cols; outcol > 0; + outcol -= 8, inptr0 += 16, inptr1 += 16, outptr += 8) { + + mm0 = _mm_load_si64((__m64 *)&inptr0[0]); + mm1 = _mm_load_si64((__m64 *)&inptr1[0]); + mm2 = _mm_load_si64((__m64 *)&inptr0[8]); + mm3 = _mm_load_si64((__m64 *)&inptr1[8]); + + mm4 = mm0; + mm5 = mm1; + mm0 = _mm_and_si64(mm0, mm6); + mm4 = _mm_srli_pi16(mm4, BYTE_BIT); + mm1 = _mm_and_si64(mm1, mm6); + mm5 = _mm_srli_pi16(mm5, BYTE_BIT); + mm0 = _mm_add_pi16(mm0, mm4); + mm1 = _mm_add_pi16(mm1, mm5); + + mm4 = mm2; + mm5 = mm3; + mm2 = _mm_and_si64(mm2, mm6); + mm4 = _mm_srli_pi16(mm4, BYTE_BIT); + mm3 = _mm_and_si64(mm3, mm6); + mm5 = _mm_srli_pi16(mm5, BYTE_BIT); + mm2 = _mm_add_pi16(mm2, mm4); + mm3 = _mm_add_pi16(mm3, mm5); + + mm0 = _mm_add_pi16(mm0, mm1); + mm2 = _mm_add_pi16(mm2, mm3); + mm0 = _mm_add_pi16(mm0, mm7); + mm2 = _mm_add_pi16(mm2, mm7); + mm0 = _mm_srli_pi16(mm0, 2); + mm2 = _mm_srli_pi16(mm2, 2); + + mm0 = _mm_packs_pu16(mm0, mm2); + + _mm_store_si64((__m64 *)&outptr[0], mm0); + } + } +} diff --git a/simd/jcsample.h b/simd/loongson/jcsample.h similarity index 76% rename from simd/jcsample.h rename to simd/loongson/jcsample.h index 2a50544..2ac4816 100644 --- a/simd/jcsample.h +++ b/simd/loongson/jcsample.h @@ -8,14 +8,14 @@ */ LOCAL(void) -expand_right_edge (JSAMPARRAY image_data, int num_rows, - JDIMENSION input_cols, JDIMENSION output_cols) +expand_right_edge(JSAMPARRAY image_data, int num_rows, JDIMENSION input_cols, + JDIMENSION output_cols) { register JSAMPROW ptr; register JSAMPLE pixval; register int count; int row; - int numcols = (int) (output_cols - input_cols); + int numcols = (int)(output_cols - input_cols); if (numcols > 0) { for (row = 0; row < num_rows; row++) { diff --git a/simd/loongson/jdcolext-mmi.c b/simd/loongson/jdcolext-mmi.c new file mode 100644 index 0000000..560d9b0 --- /dev/null +++ b/simd/loongson/jdcolext-mmi.c @@ -0,0 +1,424 @@ +/* + * Loongson MMI optimizations for libjpeg-turbo + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright (C) 2015, D. R. Commander. All Rights Reserved. + * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. + * All Rights Reserved. + * Authors: ZhuChen + * SunZhangzhi + * CaiWanwei + * + * Based on the x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* This file is included by jdcolor-mmi.c */ + + +#if RGB_RED == 0 +#define mmA mm0 +#define mmB mm1 +#elif RGB_GREEN == 0 +#define mmA mm2 +#define mmB mm3 +#elif RGB_BLUE == 0 +#define mmA mm4 +#define mmB mm5 +#else +#define mmA mm6 +#define mmB mm7 +#endif + +#if RGB_RED == 1 +#define mmC mm0 +#define mmD mm1 +#elif RGB_GREEN == 1 +#define mmC mm2 +#define mmD mm3 +#elif RGB_BLUE == 1 +#define mmC mm4 +#define mmD mm5 +#else +#define mmC mm6 +#define mmD mm7 +#endif + +#if RGB_RED == 2 +#define mmE mm0 +#define mmF mm1 +#elif RGB_GREEN == 2 +#define mmE mm2 +#define mmF mm3 +#elif RGB_BLUE == 2 +#define mmE mm4 +#define mmF mm5 +#else +#define mmE mm6 +#define mmF mm7 +#endif + +#if RGB_RED == 3 +#define mmG mm0 +#define mmH mm1 +#elif RGB_GREEN == 3 +#define mmG mm2 +#define mmH mm3 +#elif RGB_BLUE == 3 +#define mmG mm4 +#define mmH mm5 +#else +#define mmG mm6 +#define mmH mm7 +#endif + + +void jsimd_ycc_rgb_convert_mmi(JDIMENSION out_width, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) +{ + JSAMPROW outptr, inptr0, inptr1, inptr2; + int num_cols, col; + __m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7; + __m64 mm8, wk[2]; + + while (--num_rows >= 0) { + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + + for (num_cols = out_width; num_cols > 0; num_cols -= 8, + inptr0 += 8, inptr1 += 8, inptr2 += 8) { + + mm5 = _mm_load_si64((__m64 *)inptr1); + mm1 = _mm_load_si64((__m64 *)inptr2); + mm8 = _mm_load_si64((__m64 *)inptr0); + mm4 = 0; + mm7 = 0; + mm4 = _mm_cmpeq_pi16(mm4, mm4); + mm7 = _mm_cmpeq_pi16(mm7, mm7); + mm4 = _mm_srli_pi16(mm4, BYTE_BIT); + mm7 = _mm_slli_pi16(mm7, 7); /* mm7={0xFF80 0xFF80 0xFF80 0xFF80} */ + mm0 = mm4; /* mm0=mm4={0xFF 0x00 0xFF 0x00 ..} */ + + mm4 = _mm_and_si64(mm4, mm5); /* mm4=Cb(0246)=CbE */ + mm5 = _mm_srli_pi16(mm5, BYTE_BIT); /* mm5=Cb(1357)=CbO */ + mm0 = _mm_and_si64(mm0, mm1); /* mm0=Cr(0246)=CrE */ + mm1 = _mm_srli_pi16(mm1, BYTE_BIT); /* mm1=Cr(1357)=CrO */ + mm4 = _mm_add_pi16(mm4, mm7); + mm5 = _mm_add_pi16(mm5, mm7); + mm0 = _mm_add_pi16(mm0, mm7); + mm1 = _mm_add_pi16(mm1, mm7); + + /* (Original) + * R = Y + 1.40200 * Cr + * G = Y - 0.34414 * Cb - 0.71414 * Cr + * B = Y + 1.77200 * Cb + * + * (This implementation) + * R = Y + 0.40200 * Cr + Cr + * G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + * B = Y - 0.22800 * Cb + Cb + Cb + */ + + mm2 = mm4; /* mm2 = CbE */ + mm3 = mm5; /* mm3 = CbO */ + mm4 = _mm_add_pi16(mm4, mm4); /* mm4 = 2*CbE */ + mm5 = _mm_add_pi16(mm5, mm5); /* mm5 = 2*CbO */ + mm6 = mm0; /* mm6 = CrE */ + mm7 = mm1; /* mm7 = CrO */ + mm0 = _mm_add_pi16(mm0, mm0); /* mm0 = 2*CrE */ + mm1 = _mm_add_pi16(mm1, mm1); /* mm1 = 2*CrO */ + + mm4 = _mm_mulhi_pi16(mm4, PW_MF0228); /* mm4=(2*CbE * -FIX(0.22800) */ + mm5 = _mm_mulhi_pi16(mm5, PW_MF0228); /* mm5=(2*CbO * -FIX(0.22800) */ + mm0 = _mm_mulhi_pi16(mm0, PW_F0402); /* mm0=(2*CrE * FIX(0.40200)) */ + mm1 = _mm_mulhi_pi16(mm1, PW_F0402); /* mm1=(2*CrO * FIX(0.40200)) */ + + mm4 = _mm_add_pi16(mm4, PW_ONE); + mm5 = _mm_add_pi16(mm5, PW_ONE); + mm4 = _mm_srai_pi16(mm4, 1); /* mm4=(CbE * -FIX(0.22800)) */ + mm5 = _mm_srai_pi16(mm5, 1); /* mm5=(CbO * -FIX(0.22800)) */ + mm0 = _mm_add_pi16(mm0, PW_ONE); + mm1 = _mm_add_pi16(mm1, PW_ONE); + mm0 = _mm_srai_pi16(mm0, 1); /* mm0=(CrE * FIX(0.40200)) */ + mm1 = _mm_srai_pi16(mm1, 1); /* mm1=(CrO * FIX(0.40200)) */ + + mm4 = _mm_add_pi16(mm4, mm2); + mm5 = _mm_add_pi16(mm5, mm3); + mm4 = _mm_add_pi16(mm4, mm2); /* mm4=(CbE * FIX(1.77200))=(B-Y)E */ + mm5 = _mm_add_pi16(mm5, mm3); /* mm5=(CbO * FIX(1.77200))=(B-Y)O */ + mm0 = _mm_add_pi16(mm0, mm6); /* mm0=(CrE * FIX(1.40200))=(R-Y)E */ + mm1 = _mm_add_pi16(mm1, mm7); /* mm1=(CrO * FIX(1.40200))=(R-Y)O */ + + wk[0] = mm4; /* wk(0)=(B-Y)E */ + wk[1] = mm5; /* wk(1)=(B-Y)O */ + + mm4 = mm2; + mm5 = mm3; + mm2 = _mm_unpacklo_pi16(mm2, mm6); + mm4 = _mm_unpackhi_pi16(mm4, mm6); + mm2 = _mm_madd_pi16(mm2, PW_MF0344_F0285); + mm4 = _mm_madd_pi16(mm4, PW_MF0344_F0285); + mm3 = _mm_unpacklo_pi16(mm3, mm7); + mm5 = _mm_unpackhi_pi16(mm5, mm7); + mm3 = _mm_madd_pi16(mm3, PW_MF0344_F0285); + mm5 = _mm_madd_pi16(mm5, PW_MF0344_F0285); + + mm2 = _mm_add_pi32(mm2, PD_ONEHALF); + mm4 = _mm_add_pi32(mm4, PD_ONEHALF); + mm2 = _mm_srai_pi32(mm2, SCALEBITS); + mm4 = _mm_srai_pi32(mm4, SCALEBITS); + mm3 = _mm_add_pi32(mm3, PD_ONEHALF); + mm5 = _mm_add_pi32(mm5, PD_ONEHALF); + mm3 = _mm_srai_pi32(mm3, SCALEBITS); + mm5 = _mm_srai_pi32(mm5, SCALEBITS); + + mm2 = _mm_packs_pi32(mm2, mm4); /* mm2=CbE*-FIX(0.344)+CrE*FIX(0.285) */ + mm3 = _mm_packs_pi32(mm3, mm5); /* mm3=CbO*-FIX(0.344)+CrO*FIX(0.285) */ + mm2 = _mm_sub_pi16(mm2, mm6); /* mm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E */ + mm3 = _mm_sub_pi16(mm3, mm7); /* mm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O */ + + mm5 = mm8; /* mm5=Y(01234567) */ + + mm4 = _mm_cmpeq_pi16(mm4, mm4); + mm4 = _mm_srli_pi16(mm4, BYTE_BIT); /* mm4={0xFF 0x00 0xFF 0x00 ..} */ + mm4 = _mm_and_si64(mm4, mm5); /* mm4=Y(0246)=YE */ + mm5 = _mm_srli_pi16(mm5, BYTE_BIT); /* mm5=Y(1357)=YO */ + + mm0 = _mm_add_pi16(mm0, mm4); /* mm0=((R-Y)E+YE)=RE=(R0 R2 R4 R6) */ + mm1 = _mm_add_pi16(mm1, mm5); /* mm1=((R-Y)O+YO)=RO=(R1 R3 R5 R7) */ + mm0 = _mm_packs_pu16(mm0, mm0); /* mm0=(R0 R2 R4 R6 ** ** ** **) */ + mm1 = _mm_packs_pu16(mm1, mm1); /* mm1=(R1 R3 R5 R7 ** ** ** **) */ + + mm2 = _mm_add_pi16(mm2, mm4); /* mm2=((G-Y)E+YE)=GE=(G0 G2 G4 G6) */ + mm3 = _mm_add_pi16(mm3, mm5); /* mm3=((G-Y)O+YO)=GO=(G1 G3 G5 G7) */ + mm2 = _mm_packs_pu16(mm2, mm2); /* mm2=(G0 G2 G4 G6 ** ** ** **) */ + mm3 = _mm_packs_pu16(mm3, mm3); /* mm3=(G1 G3 G5 G7 ** ** ** **) */ + + mm4 = _mm_add_pi16(mm4, wk[0]); /* mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6) */ + mm5 = _mm_add_pi16(mm5, wk[1]); /* mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7) */ + mm4 = _mm_packs_pu16(mm4, mm4); /* mm4=(B0 B2 B4 B6 ** ** ** **) */ + mm5 = _mm_packs_pu16(mm5, mm5); /* mm5=(B1 B3 B5 B7 ** ** ** **) */ + +#if RGB_PIXELSIZE == 3 + + /* mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) */ + /* mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) */ + mmA = _mm_unpacklo_pi8(mmA, mmC); /* mmA=(00 10 02 12 04 14 06 16) */ + mmE = _mm_unpacklo_pi8(mmE, mmB); /* mmE=(20 01 22 03 24 05 26 07) */ + mmD = _mm_unpacklo_pi8(mmD, mmF); /* mmD=(11 21 13 23 15 25 17 27) */ + + mmG = mmA; + mmH = mmA; + mmA = _mm_unpacklo_pi16(mmA, mmE); /* mmA=(00 10 20 01 02 12 22 03) */ + mmG = _mm_unpackhi_pi16(mmG, mmE); /* mmG=(04 14 24 05 06 16 26 07) */ + + mmH = _mm_srli_si64(mmH, 2 * BYTE_BIT); + mmE = _mm_srli_si64(mmE, 2 * BYTE_BIT); + + mmC = mmD; + mmB = mmD; + mmD = _mm_unpacklo_pi16(mmD, mmH); /* mmD=(11 21 02 12 13 23 04 14) */ + mmC = _mm_unpackhi_pi16(mmC, mmH); /* mmC=(15 25 06 16 17 27 -- --) */ + + mmB = _mm_srli_si64(mmB, 2 * BYTE_BIT); /* mmB=(13 23 15 25 17 27 -- --) */ + + mmF = mmE; + mmE = _mm_unpacklo_pi16(mmE, mmB); /* mmE=(22 03 13 23 24 05 15 25) */ + mmF = _mm_unpackhi_pi16(mmF, mmB); /* mmF=(26 07 17 27 -- -- -- --) */ + + mmA = _mm_unpacklo_pi32(mmA, mmD); /* mmA=(00 10 20 01 11 21 02 12) */ + mmE = _mm_unpacklo_pi32(mmE, mmG); /* mmE=(22 03 13 23 04 14 24 05) */ + mmC = _mm_unpacklo_pi32(mmC, mmF); /* mmC=(15 25 06 16 26 07 17 27) */ + + if (num_cols >= 8) { + _mm_store_si64((__m64 *)outptr, mmA); + _mm_store_si64((__m64 *)(outptr + 8), mmE); + _mm_store_si64((__m64 *)(outptr + 16), mmC); + outptr += RGB_PIXELSIZE * 8; + } else { + col = num_cols * 3; + asm(".set noreorder\r\n" + + "li $8, 16\r\n" + "move $9, %4\r\n" + "mov.s $f4, %1\r\n" + "mov.s $f6, %3\r\n" + "move $10, %5\r\n" + "bltu $9, $8, 1f\r\n" + "nop \r\n" + "gssdlc1 $f4, 7($10)\r\n" + "gssdrc1 $f4, 0($10)\r\n" + "gssdlc1 $f6, 7+8($10)\r\n" + "gssdrc1 $f6, 8($10)\r\n" + "mov.s $f4, %2\r\n" + "subu $9, $9, 16\r\n" + "daddu $10, $10, 16\r\n" + "b 2f\r\n" + "nop \r\n" + + "1: \r\n" + "li $8, 8\r\n" /* st8 */ + "bltu $9, $8, 2f\r\n" + "nop \r\n" + "gssdlc1 $f4, 7($10)\r\n" + "gssdrc1 $f4, ($10)\r\n" + "mov.s $f4, %3\r\n" + "subu $9, $9, 8\r\n" + "daddu $10, $10, 8\r\n" + + "2: \r\n" + "li $8, 4\r\n" /* st4 */ + "mfc1 $11, $f4\r\n" + "bltu $9, $8, 3f\r\n" + "nop \r\n" + "swl $11, 3($10)\r\n" + "swr $11, 0($10)\r\n" + "li $8, 32\r\n" + "mtc1 $8, $f6\r\n" + "dsrl $f4, $f4, $f6\r\n" + "mfc1 $11, $f4\r\n" + "subu $9, $9, 4\r\n" + "daddu $10, $10, 4\r\n" + + "3: \r\n" + "li $8, 2\r\n" /* st2 */ + "bltu $9, $8, 4f\r\n" + "nop \r\n" + "ush $11, 0($10)\r\n" + "srl $11, 16\r\n" + "subu $9, $9, 2\r\n" + "daddu $10, $10, 2\r\n" + + "4: \r\n" + "li $8, 1\r\n" /* st1 */ + "bltu $9, $8, 5f\r\n" + "nop \r\n" + "sb $11, 0($10)\r\n" + + "5: \r\n" + "nop \r\n" /* end */ + : "=m" (*outptr) + : "f" (mmA), "f" (mmC), "f" (mmE), "r" (col), "r" (outptr) + : "$f4", "$f6", "$8", "$9", "$10", "$11", "memory" + ); + } + +#else /* RGB_PIXELSIZE == 4 */ + +#ifdef RGBX_FILLER_0XFF + mm6 = _mm_cmpeq_pi8(mm6, mm6); + mm7 = _mm_cmpeq_pi8(mm7, mm7); +#else + mm6 = _mm_xor_si64(mm6, mm6); + mm7 = _mm_xor_si64(mm7, mm7); +#endif + /* mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **) */ + /* mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **) */ + /* mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **) */ + /* mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **) */ + + mmA = _mm_unpacklo_pi8(mmA, mmC); /* mmA=(00 10 02 12 04 14 06 16) */ + mmE = _mm_unpacklo_pi8(mmE, mmG); /* mmE=(20 30 22 32 24 34 26 36) */ + mmB = _mm_unpacklo_pi8(mmB, mmD); /* mmB=(01 11 03 13 05 15 07 17) */ + mmF = _mm_unpacklo_pi8(mmF, mmH); /* mmF=(21 31 23 33 25 35 27 37) */ + + mmC = mmA; + mmA = _mm_unpacklo_pi16(mmA, mmE); /* mmA=(00 10 20 30 02 12 22 32) */ + mmC = _mm_unpackhi_pi16(mmC, mmE); /* mmC=(04 14 24 34 06 16 26 36) */ + mmG = mmB; + mmB = _mm_unpacklo_pi16(mmB, mmF); /* mmB=(01 11 21 31 03 13 23 33) */ + mmG = _mm_unpackhi_pi16(mmG, mmF); /* mmG=(05 15 25 35 07 17 27 37) */ + + mmD = mmA; + mmA = _mm_unpacklo_pi32(mmA, mmB); /* mmA=(00 10 20 30 01 11 21 31) */ + mmD = _mm_unpackhi_pi32(mmD, mmB); /* mmD=(02 12 22 32 03 13 23 33) */ + mmH = mmC; + mmC = _mm_unpacklo_pi32(mmC, mmG); /* mmC=(04 14 24 34 05 15 25 35) */ + mmH = _mm_unpackhi_pi32(mmH, mmG); /* mmH=(06 16 26 36 07 17 27 37) */ + + if (num_cols >= 8) { + _mm_store_si64((__m64 *)outptr, mmA); + _mm_store_si64((__m64 *)(outptr + 8), mmD); + _mm_store_si64((__m64 *)(outptr + 16), mmC); + _mm_store_si64((__m64 *)(outptr + 24), mmH); + outptr += RGB_PIXELSIZE * 8; + } else { + col = num_cols; + asm(".set noreorder\r\n" /* st16 */ + + "li $8, 4\r\n" + "move $9, %6\r\n" + "move $10, %7\r\n" + "mov.s $f4, %2\r\n" + "mov.s $f6, %4\r\n" + "bltu $9, $8, 1f\r\n" + "nop \r\n" + "gssdlc1 $f4, 7($10)\r\n" + "gssdrc1 $f4, ($10)\r\n" + "gssdlc1 $f6, 7+8($10)\r\n" + "gssdrc1 $f6, 8($10)\r\n" + "mov.s $f4, %3\r\n" + "mov.s $f6, %5\r\n" + "subu $9, $9, 4\r\n" + "daddu $10, $10, 16\r\n" + + "1: \r\n" + "li $8, 2\r\n" /* st8 */ + "bltu $9, $8, 2f\r\n" + "nop \r\n" + "gssdlc1 $f4, 7($10)\r\n" + "gssdrc1 $f4, 0($10)\r\n" + "mov.s $f4, $f6\r\n" + "subu $9, $9, 2\r\n" + "daddu $10, $10, 8\r\n" + + "2: \r\n" + "li $8, 1\r\n" /* st4 */ + "bltu $9, $8, 3f\r\n" + "nop \r\n" + "gsswlc1 $f4, 3($10)\r\n" + "gsswrc1 $f4, 0($10)\r\n" + + "3: \r\n" + "li %1, 0\r\n" /* end */ + : "=m" (*outptr), "=r" (col) + : "f" (mmA), "f" (mmC), "f" (mmD), "f" (mmH), "r" (col), + "r" (outptr) + : "$f4", "$f6", "$8", "$9", "$10", "memory" + ); + } + +#endif + + } + } +} + +#undef mmA +#undef mmB +#undef mmC +#undef mmD +#undef mmE +#undef mmF +#undef mmG +#undef mmH diff --git a/simd/loongson/jdcolor-mmi.c b/simd/loongson/jdcolor-mmi.c new file mode 100644 index 0000000..2c58263 --- /dev/null +++ b/simd/loongson/jdcolor-mmi.c @@ -0,0 +1,139 @@ +/* + * Loongson MMI optimizations for libjpeg-turbo + * + * Copyright (C) 2011, 2015, D. R. Commander. All Rights Reserved. + * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. + * All Rights Reserved. + * Authors: ZhuChen + * CaiWanwei + * SunZhangzhi + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* YCC --> RGB CONVERSION */ + +#include "jsimd_mmi.h" + + +#define F_0_344 ((short)22554) /* FIX(0.34414) */ +#define F_0_402 ((short)26345) /* FIX(1.40200) - FIX(1) */ +#define F_0_285 ((short)18734) /* FIX(1) - FIX(0.71414) */ +#define F_0_228 ((short)14942) /* FIX(2) - FIX(1.77200) */ + +enum const_index { + index_PW_ONE, + index_PW_F0402, + index_PW_MF0228, + index_PW_MF0344_F0285, + index_PD_ONEHALF +}; + +static uint64_t const_value[] = { + _uint64_set_pi16(1, 1, 1, 1), + _uint64_set_pi16(F_0_402, F_0_402, F_0_402, F_0_402), + _uint64_set_pi16(-F_0_228, -F_0_228, -F_0_228, -F_0_228), + _uint64_set_pi16(F_0_285, -F_0_344, F_0_285, -F_0_344), + _uint64_set_pi32((int)(1 << (SCALEBITS - 1)), (int)(1 << (SCALEBITS - 1))) +}; + +#define PW_ONE get_const_value(index_PW_ONE) +#define PW_F0402 get_const_value(index_PW_F0402) +#define PW_MF0228 get_const_value(index_PW_MF0228) +#define PW_MF0344_F0285 get_const_value(index_PW_MF0344_F0285) +#define PD_ONEHALF get_const_value(index_PD_ONEHALF) + +#define RGBX_FILLER_0XFF 1 + + +#include "jdcolext-mmi.c" +#undef RGB_RED +#undef RGB_GREEN +#undef RGB_BLUE +#undef RGB_PIXELSIZE + +#define RGB_RED EXT_RGB_RED +#define RGB_GREEN EXT_RGB_GREEN +#define RGB_BLUE EXT_RGB_BLUE +#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +#define jsimd_ycc_rgb_convert_mmi jsimd_ycc_extrgb_convert_mmi +#include "jdcolext-mmi.c" +#undef RGB_RED +#undef RGB_GREEN +#undef RGB_BLUE +#undef RGB_PIXELSIZE +#undef jsimd_ycc_rgb_convert_mmi + +#define RGB_RED EXT_RGBX_RED +#define RGB_GREEN EXT_RGBX_GREEN +#define RGB_BLUE EXT_RGBX_BLUE +#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +#define jsimd_ycc_rgb_convert_mmi jsimd_ycc_extrgbx_convert_mmi +#include "jdcolext-mmi.c" +#undef RGB_RED +#undef RGB_GREEN +#undef RGB_BLUE +#undef RGB_PIXELSIZE +#undef jsimd_ycc_rgb_convert_mmi + +#define RGB_RED EXT_BGR_RED +#define RGB_GREEN EXT_BGR_GREEN +#define RGB_BLUE EXT_BGR_BLUE +#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +#define jsimd_ycc_rgb_convert_mmi jsimd_ycc_extbgr_convert_mmi +#include "jdcolext-mmi.c" +#undef RGB_RED +#undef RGB_GREEN +#undef RGB_BLUE +#undef RGB_PIXELSIZE +#undef jsimd_ycc_rgb_convert_mmi + +#define RGB_RED EXT_BGRX_RED +#define RGB_GREEN EXT_BGRX_GREEN +#define RGB_BLUE EXT_BGRX_BLUE +#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +#define jsimd_ycc_rgb_convert_mmi jsimd_ycc_extbgrx_convert_mmi +#include "jdcolext-mmi.c" +#undef RGB_RED +#undef RGB_GREEN +#undef RGB_BLUE +#undef RGB_PIXELSIZE +#undef jsimd_ycc_rgb_convert_mmi + +#define RGB_RED EXT_XBGR_RED +#define RGB_GREEN EXT_XBGR_GREEN +#define RGB_BLUE EXT_XBGR_BLUE +#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +#define jsimd_ycc_rgb_convert_mmi jsimd_ycc_extxbgr_convert_mmi +#include "jdcolext-mmi.c" +#undef RGB_RED +#undef RGB_GREEN +#undef RGB_BLUE +#undef RGB_PIXELSIZE +#undef jsimd_ycc_rgb_convert_mmi + +#define RGB_RED EXT_XRGB_RED +#define RGB_GREEN EXT_XRGB_GREEN +#define RGB_BLUE EXT_XRGB_BLUE +#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +#define jsimd_ycc_rgb_convert_mmi jsimd_ycc_extxrgb_convert_mmi +#include "jdcolext-mmi.c" +#undef RGB_RED +#undef RGB_GREEN +#undef RGB_BLUE +#undef RGB_PIXELSIZE +#undef jsimd_ycc_rgb_convert_mmi diff --git a/simd/loongson/jdsample-mmi.c b/simd/loongson/jdsample-mmi.c new file mode 100644 index 0000000..00a6265 --- /dev/null +++ b/simd/loongson/jdsample-mmi.c @@ -0,0 +1,245 @@ +/* + * Loongson MMI optimizations for libjpeg-turbo + * + * Copyright (C) 2015, 2018, D. R. Commander. All Rights Reserved. + * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. + * All Rights Reserved. + * Authors: ZhuChen + * CaiWanwei + * SunZhangzhi + * + * Based on the x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* CHROMA UPSAMPLING */ + +#include "jsimd_mmi.h" + + +enum const_index { + index_PW_THREE, + index_PW_SEVEN, + index_PW_EIGHT, +}; + +static uint64_t const_value[] = { + _uint64_set_pi16(3, 3, 3, 3), + _uint64_set_pi16(7, 7, 7, 7), + _uint64_set_pi16(8, 8, 8, 8), +}; + +#define PW_THREE get_const_value(index_PW_THREE) +#define PW_SEVEN get_const_value(index_PW_SEVEN) +#define PW_EIGHT get_const_value(index_PW_EIGHT) + + +#define PROCESS_ROW(r) { \ + mm7 = _mm_load_si64((__m64 *)outptr##r); /* mm7=IntrL=( 0 1 2 3) */ \ + mm3 = _mm_load_si64((__m64 *)outptr##r + 1); /* mm3=IntrH=( 4 5 6 7) */ \ + \ + mm0 = mm7; \ + mm4 = mm3; \ + mm0 = _mm_srli_si64(mm0, 2 * BYTE_BIT); /* mm0=( 1 2 3 -) */ \ + mm4 = _mm_slli_si64(mm4, (SIZEOF_MMWORD - 2) * BYTE_BIT); /* mm4=( - - - 4) */ \ + mm5 = mm7; \ + mm6 = mm3; \ + mm5 = _mm_srli_si64(mm5, (SIZEOF_MMWORD - 2) * BYTE_BIT); /* mm5=( 3 - - -) */ \ + mm6 = _mm_slli_si64(mm6, 2 * BYTE_BIT); /* mm6=( - 4 5 6) */ \ + \ + mm0 = _mm_or_si64(mm0, mm4); /* mm0=( 1 2 3 4) */ \ + mm5 = _mm_or_si64(mm5, mm6); /* mm5=( 3 4 5 6) */ \ + \ + mm1 = mm7; \ + mm2 = mm3; \ + mm1 = _mm_slli_si64(mm1, 2 * BYTE_BIT); /* mm1=( - 0 1 2) */ \ + mm2 = _mm_srli_si64(mm2, 2 * BYTE_BIT); /* mm2=( 5 6 7 -) */ \ + mm4 = mm3; \ + mm4 = _mm_srli_si64(mm4, (SIZEOF_MMWORD - 2) * BYTE_BIT); /* mm4=( 7 - - -) */ \ + \ + mm1 = _mm_or_si64(mm1, wk[r]); /* mm1=(-1 0 1 2) */ \ + mm2 = _mm_or_si64(mm2, wk[r + 2]); /* mm2=( 5 6 6 8) */ \ + \ + wk[r] = mm4; \ + \ + mm7 = _mm_mullo_pi16(mm7, PW_THREE); \ + mm3 = _mm_mullo_pi16(mm3, PW_THREE); \ + mm1 = _mm_add_pi16(mm1, PW_EIGHT); \ + mm5 = _mm_add_pi16(mm5, PW_EIGHT); \ + mm0 = _mm_add_pi16(mm0, PW_SEVEN); \ + mm2 = _mm_add_pi16(mm2, PW_SEVEN); \ + \ + mm1 = _mm_add_pi16(mm1, mm7); \ + mm5 = _mm_add_pi16(mm5, mm3); \ + mm1 = _mm_srli_pi16(mm1, 4); /* mm1=OutrLE=( 0 2 4 6) */ \ + mm5 = _mm_srli_pi16(mm5, 4); /* mm5=OutrHE=( 8 10 12 14) */ \ + mm0 = _mm_add_pi16(mm0, mm7); \ + mm2 = _mm_add_pi16(mm2, mm3); \ + mm0 = _mm_srli_pi16(mm0, 4); /* mm0=OutrLO=( 1 3 5 7) */ \ + mm2 = _mm_srli_pi16(mm2, 4); /* mm2=OutrHO=( 9 11 13 15) */ \ + \ + mm0 = _mm_slli_pi16(mm0, BYTE_BIT); \ + mm2 = _mm_slli_pi16(mm2, BYTE_BIT); \ + mm1 = _mm_or_si64(mm1, mm0); /* mm1=OutrL=( 0 1 2 3 4 5 6 7) */ \ + mm5 = _mm_or_si64(mm5, mm2); /* mm5=OutrH=( 8 9 10 11 12 13 14 15) */ \ + \ + _mm_store_si64((__m64 *)outptr##r, mm1); \ + _mm_store_si64((__m64 *)outptr##r + 1, mm5); \ +} + +void jsimd_h2v2_fancy_upsample_mmi(int max_v_samp_factor, + JDIMENSION downsampled_width, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) +{ + JSAMPARRAY output_data = *output_data_ptr; + JSAMPROW inptr_1, inptr0, inptr1, outptr0, outptr1; + int inrow, outrow, incol, tmp, tmp1; + __m64 mm0, mm1, mm2, mm3 = 0.0, mm4, mm5, mm6, mm7 = 0.0; + __m64 wk[4], mm_tmp; + + for (inrow = 0, outrow = 0; outrow < max_v_samp_factor; inrow++) { + + inptr_1 = input_data[inrow - 1]; + inptr0 = input_data[inrow]; + inptr1 = input_data[inrow + 1]; + outptr0 = output_data[outrow++]; + outptr1 = output_data[outrow++]; + + if (downsampled_width & 7) { + tmp = (downsampled_width - 1) * sizeof(JSAMPLE); + tmp1 = downsampled_width * sizeof(JSAMPLE); + asm("daddu $8, %3, %6\r\n" + "lb $9, ($8)\r\n" + "daddu $8, %3, %7\r\n" + "sb $9, ($8)\r\n" + "daddu $8, %4, %6\r\n" + "lb $9, ($8)\r\n" + "daddu $8, %4, %7\r\n" + "sb $9, ($8)\r\n" + "daddu $8, %5, %6\r\n" + "lb $9, ($8)\r\n" + "daddu $8, %5, %7\r\n" + "sb $9, ($8)\r\n" + : "=m" (*inptr_1), "=m" (*inptr0), "=m" (*inptr1) + : "r" (inptr_1), "r" (inptr0), "r" (inptr1), "r" (tmp), "r" (tmp1) + : "$8", "$9" + ); + } + + /* process the first column block */ + mm0 = _mm_load_si64((__m64 *)inptr0); /* mm0 = row[ 0][0] */ + mm1 = _mm_load_si64((__m64 *)inptr_1); /* mm1 = row[-1][0] */ + mm2 = _mm_load_si64((__m64 *)inptr1); /* mm2 = row[ 1][0] */ + + mm3 = _mm_xor_si64(mm3, mm3); /* mm3 = (all 0's) */ + mm4 = mm0; + mm0 = _mm_unpacklo_pi8(mm0, mm3); /* mm0 = row[ 0][0]( 0 1 2 3) */ + mm4 = _mm_unpackhi_pi8(mm4, mm3); /* mm4 = row[ 0][0]( 4 5 6 7) */ + mm5 = mm1; + mm1 = _mm_unpacklo_pi8(mm1, mm3); /* mm1 = row[-1][0]( 0 1 2 3) */ + mm5 = _mm_unpackhi_pi8(mm5, mm3); /* mm5 = row[-1][0]( 4 5 6 7) */ + mm6 = mm2; + mm2 = _mm_unpacklo_pi8(mm2, mm3); /* mm2 = row[+1][0]( 0 1 2 3) */ + mm6 = _mm_unpackhi_pi8(mm6, mm3); /* mm6 = row[+1][0]( 4 5 6 7) */ + + mm0 = _mm_mullo_pi16(mm0, PW_THREE); + mm4 = _mm_mullo_pi16(mm4, PW_THREE); + + mm7 = _mm_cmpeq_pi8(mm7, mm7); + mm7 = _mm_srli_si64(mm7, (SIZEOF_MMWORD - 2) * BYTE_BIT); + + mm1 = _mm_add_pi16(mm1, mm0); /* mm1=Int0L=( 0 1 2 3) */ + mm5 = _mm_add_pi16(mm5, mm4); /* mm5=Int0H=( 4 5 6 7) */ + mm2 = _mm_add_pi16(mm2, mm0); /* mm2=Int1L=( 0 1 2 3) */ + mm6 = _mm_add_pi16(mm6, mm4); /* mm6=Int1H=( 4 5 6 7) */ + + _mm_store_si64((__m64 *)outptr0, mm1); /* temporarily save */ + _mm_store_si64((__m64 *)outptr0 + 1, mm5); /* the intermediate data */ + _mm_store_si64((__m64 *)outptr1, mm2); + _mm_store_si64((__m64 *)outptr1 + 1, mm6); + + mm1 = _mm_and_si64(mm1, mm7); /* mm1=( 0 - - -) */ + mm2 = _mm_and_si64(mm2, mm7); /* mm2=( 0 - - -) */ + + wk[0] = mm1; + wk[1] = mm2; + + for (incol = downsampled_width; incol > 0; + incol -= 8, inptr_1 += 8, inptr0 += 8, inptr1 += 8, + outptr0 += 16, outptr1 += 16) { + + if (incol > 8) { + /* process the next column block */ + mm0 = _mm_load_si64((__m64 *)inptr0 + 1); /* mm0 = row[ 0][1] */ + mm1 = _mm_load_si64((__m64 *)inptr_1 + 1); /* mm1 = row[-1][1] */ + mm2 = _mm_load_si64((__m64 *)inptr1 + 1); /* mm2 = row[+1][1] */ + + mm3 = _mm_setzero_si64(); /* mm3 = (all 0's) */ + mm4 = mm0; + mm0 = _mm_unpacklo_pi8(mm0, mm3); /* mm0 = row[ 0][1]( 0 1 2 3) */ + mm4 = _mm_unpackhi_pi8(mm4, mm3); /* mm4 = row[ 0][1]( 4 5 6 7) */ + mm5 = mm1; + mm1 = _mm_unpacklo_pi8(mm1, mm3); /* mm1 = row[-1][1]( 0 1 2 3) */ + mm5 = _mm_unpackhi_pi8(mm5, mm3); /* mm5 = row[-1][1]( 4 5 6 7) */ + mm6 = mm2; + mm2 = _mm_unpacklo_pi8(mm2, mm3); /* mm2 = row[+1][1]( 0 1 2 3) */ + mm6 = _mm_unpackhi_pi8(mm6, mm3); /* mm6 = row[+1][1]( 4 5 6 7) */ + + mm0 = _mm_mullo_pi16(mm0, PW_THREE); + mm4 = _mm_mullo_pi16(mm4, PW_THREE); + + mm1 = _mm_add_pi16(mm1, mm0); /* mm1 = Int0L = ( 0 1 2 3) */ + mm5 = _mm_add_pi16(mm5, mm4); /* mm5 = Int0H = ( 4 5 6 7) */ + mm2 = _mm_add_pi16(mm2, mm0); /* mm2 = Int1L = ( 0 1 2 3) */ + mm6 = _mm_add_pi16(mm6, mm4); /* mm6 = Int1H = ( 4 5 6 7) */ + + _mm_store_si64((__m64 *)outptr0 + 2, mm1); /* temporarily save */ + _mm_store_si64((__m64 *)outptr0 + 3, mm5); /* the intermediate data */ + _mm_store_si64((__m64 *)outptr1 + 2, mm2); + _mm_store_si64((__m64 *)outptr1 + 3, mm6); + + mm1 = _mm_slli_si64(mm1, (SIZEOF_MMWORD - 2) * BYTE_BIT); /* mm1=( - - - 0) */ + mm2 = _mm_slli_si64(mm2, (SIZEOF_MMWORD - 2) * BYTE_BIT); /* mm2=( - - - 0) */ + + wk[2] = mm1; + wk[3] = mm2; + } else { + /* process the last column block */ + mm1 = _mm_cmpeq_pi8(mm1, mm1); + mm1 = _mm_slli_si64(mm1, (SIZEOF_MMWORD - 2) * BYTE_BIT); + mm2 = mm1; + + mm_tmp = _mm_load_si64((__m64 *)outptr0 + 1); + mm1 = _mm_and_si64(mm1, mm_tmp); /* mm1=( - - - 7) */ + mm_tmp = _mm_load_si64((__m64 *)outptr1 + 1); + mm2 = _mm_and_si64(mm2, mm_tmp); /* mm2=( - - - 7) */ + + wk[2] = mm1; + wk[3] = mm2; + } + + /* process the upper row */ + PROCESS_ROW(0) + + /* process the lower row */ + PROCESS_ROW(1) + } + } +} diff --git a/simd/loongson/jfdctint-mmi.c b/simd/loongson/jfdctint-mmi.c new file mode 100644 index 0000000..a0ea692 --- /dev/null +++ b/simd/loongson/jfdctint-mmi.c @@ -0,0 +1,398 @@ +/* + * Loongson MMI optimizations for libjpeg-turbo + * + * Copyright (C) 2014, 2018, D. R. Commander. All Rights Reserved. + * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. + * All Rights Reserved. + * Authors: ZhuChen + * CaiWanwei + * SunZhangzhi + * + * Based on the x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* SLOW INTEGER FORWARD DCT */ + +#include "jsimd_mmi.h" + + +#define CONST_BITS 13 +#define PASS1_BITS 2 +#define DESCALE_P1 (CONST_BITS - PASS1_BITS) +#define DESCALE_P2 (CONST_BITS + PASS1_BITS) + +#define FIX_0_298 ((short)2446) /* FIX(0.298631336) */ +#define FIX_0_390 ((short)3196) /* FIX(0.390180644) */ +#define FIX_0_541 ((short)4433) /* FIX(0.541196100) */ +#define FIX_0_765 ((short)6270) /* FIX(0.765366865) */ +#define FIX_0_899 ((short)7373) /* FIX(0.899976223) */ +#define FIX_1_175 ((short)9633) /* FIX(1.175875602) */ +#define FIX_1_501 ((short)12299) /* FIX(1.501321110) */ +#define FIX_1_847 ((short)15137) /* FIX(1.847759065) */ +#define FIX_1_961 ((short)16069) /* FIX(1.961570560) */ +#define FIX_2_053 ((short)16819) /* FIX(2.053119869) */ +#define FIX_2_562 ((short)20995) /* FIX(2.562915447) */ +#define FIX_3_072 ((short)25172) /* FIX(3.072711026) */ + +enum const_index { + index_PW_F130_F054, + index_PW_F054_MF130, + index_PW_MF078_F117, + index_PW_F117_F078, + index_PW_MF060_MF089, + index_PW_MF089_F060, + index_PW_MF050_MF256, + index_PW_MF256_F050, + index_PD_DESCALE_P1, + index_PD_DESCALE_P2, + index_PW_DESCALE_P2X +}; + +static uint64_t const_value[] = { + _uint64_set_pi16(FIX_0_541, (FIX_0_541 + FIX_0_765), + FIX_0_541, (FIX_0_541 + FIX_0_765)), + _uint64_set_pi16((FIX_0_541 - FIX_1_847), FIX_0_541, + (FIX_0_541 - FIX_1_847), FIX_0_541), + _uint64_set_pi16(FIX_1_175, (FIX_1_175 - FIX_1_961), + FIX_1_175, (FIX_1_175 - FIX_1_961)), + _uint64_set_pi16((FIX_1_175 - FIX_0_390), FIX_1_175, + (FIX_1_175 - FIX_0_390), FIX_1_175), + _uint64_set_pi16(-FIX_0_899, (FIX_0_298 - FIX_0_899), + -FIX_0_899, (FIX_0_298 - FIX_0_899)), + _uint64_set_pi16((FIX_1_501 - FIX_0_899), -FIX_0_899, + (FIX_1_501 - FIX_0_899), -FIX_0_899), + _uint64_set_pi16(-FIX_2_562, (FIX_2_053 - FIX_2_562), + -FIX_2_562, (FIX_2_053 - FIX_2_562)), + _uint64_set_pi16((FIX_3_072 - FIX_2_562), -FIX_2_562, + (FIX_3_072 - FIX_2_562), -FIX_2_562), + _uint64_set_pi32((1 << (DESCALE_P1 - 1)), (1 << (DESCALE_P1 - 1))), + _uint64_set_pi32((1 << (DESCALE_P2 - 1)), (1 << (DESCALE_P2 - 1))), + _uint64_set_pi16((1 << (PASS1_BITS - 1)), (1 << (PASS1_BITS - 1)), + (1 << (PASS1_BITS - 1)), (1 << (PASS1_BITS - 1))) +}; + +#define PW_F130_F054 get_const_value(index_PW_F130_F054) +#define PW_F054_MF130 get_const_value(index_PW_F054_MF130) +#define PW_MF078_F117 get_const_value(index_PW_MF078_F117) +#define PW_F117_F078 get_const_value(index_PW_F117_F078) +#define PW_MF060_MF089 get_const_value(index_PW_MF060_MF089) +#define PW_MF089_F060 get_const_value(index_PW_MF089_F060) +#define PW_MF050_MF256 get_const_value(index_PW_MF050_MF256) +#define PW_MF256_F050 get_const_value(index_PW_MF256_F050) +#define PD_DESCALE_P1 get_const_value(index_PD_DESCALE_P1) +#define PD_DESCALE_P2 get_const_value(index_PD_DESCALE_P2) +#define PW_DESCALE_P2X get_const_value(index_PW_DESCALE_P2X) + + +#define DO_FDCT_COMMON(PASS) { \ + __m64 tmp1312l, tmp1312h, tmp47l, tmp47h, tmp4l, tmp4h, tmp7l, tmp7h; \ + __m64 tmp56l, tmp56h, tmp5l, tmp5h, tmp6l, tmp6h; \ + __m64 out1l, out1h, out2l, out2h, out3l, out3h; \ + __m64 out5l, out5h, out6l, out6h, out7l, out7h; \ + __m64 z34l, z34h, z3l, z3h, z4l, z4h, z3, z4; \ + \ + /* (Original) \ + * z1 = (tmp12 + tmp13) * 0.541196100; \ + * out2 = z1 + tmp13 * 0.765366865; \ + * out6 = z1 + tmp12 * -1.847759065; \ + * \ + * (This implementation) \ + * out2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; \ + * out6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); \ + */ \ + \ + tmp1312l = _mm_unpacklo_pi16(tmp13, tmp12); \ + tmp1312h = _mm_unpackhi_pi16(tmp13, tmp12); \ + \ + out2l = _mm_madd_pi16(tmp1312l, PW_F130_F054); \ + out2h = _mm_madd_pi16(tmp1312h, PW_F130_F054); \ + out6l = _mm_madd_pi16(tmp1312l, PW_F054_MF130); \ + out6h = _mm_madd_pi16(tmp1312h, PW_F054_MF130); \ + \ + out2l = _mm_add_pi32(out2l, PD_DESCALE_P##PASS); \ + out2h = _mm_add_pi32(out2h, PD_DESCALE_P##PASS); \ + out2l = _mm_srai_pi32(out2l, DESCALE_P##PASS); \ + out2h = _mm_srai_pi32(out2h, DESCALE_P##PASS); \ + \ + out6l = _mm_add_pi32(out6l, PD_DESCALE_P##PASS); \ + out6h = _mm_add_pi32(out6h, PD_DESCALE_P##PASS); \ + out6l = _mm_srai_pi32(out6l, DESCALE_P##PASS); \ + out6h = _mm_srai_pi32(out6h, DESCALE_P##PASS); \ + \ + out2 = _mm_packs_pi32(out2l, out2h); \ + out6 = _mm_packs_pi32(out6l, out6h); \ + \ + /* Odd part */ \ + \ + z3 = _mm_add_pi16(tmp4, tmp6); \ + z4 = _mm_add_pi16(tmp5, tmp7); \ + \ + /* (Original) \ + * z5 = (z3 + z4) * 1.175875602; \ + * z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \ + * z3 += z5; z4 += z5; \ + * \ + * (This implementation) \ + * z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \ + * z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \ + */ \ + \ + z34l = _mm_unpacklo_pi16(z3, z4); \ + z34h = _mm_unpackhi_pi16(z3, z4); \ + z3l = _mm_madd_pi16(z34l, PW_MF078_F117); \ + z3h = _mm_madd_pi16(z34h, PW_MF078_F117); \ + z4l = _mm_madd_pi16(z34l, PW_F117_F078); \ + z4h = _mm_madd_pi16(z34h, PW_F117_F078); \ + \ + /* (Original) \ + * z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; \ + * tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; \ + * tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; \ + * z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \ + * out7 = tmp4 + z1 + z3; out5 = tmp5 + z2 + z4; \ + * out3 = tmp6 + z2 + z3; out1 = tmp7 + z1 + z4; \ + * \ + * (This implementation) \ + * tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; \ + * tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; \ + * tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); \ + * tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); \ + * out7 = tmp4 + z3; out5 = tmp5 + z4; \ + * out3 = tmp6 + z3; out1 = tmp7 + z4; \ + */ \ + \ + tmp47l = _mm_unpacklo_pi16(tmp4, tmp7); \ + tmp47h = _mm_unpackhi_pi16(tmp4, tmp7); \ + \ + tmp4l = _mm_madd_pi16(tmp47l, PW_MF060_MF089); \ + tmp4h = _mm_madd_pi16(tmp47h, PW_MF060_MF089); \ + tmp7l = _mm_madd_pi16(tmp47l, PW_MF089_F060); \ + tmp7h = _mm_madd_pi16(tmp47h, PW_MF089_F060); \ + \ + out7l = _mm_add_pi32(tmp4l, z3l); \ + out7h = _mm_add_pi32(tmp4h, z3h); \ + out1l = _mm_add_pi32(tmp7l, z4l); \ + out1h = _mm_add_pi32(tmp7h, z4h); \ + \ + out7l = _mm_add_pi32(out7l, PD_DESCALE_P##PASS); \ + out7h = _mm_add_pi32(out7h, PD_DESCALE_P##PASS); \ + out7l = _mm_srai_pi32(out7l, DESCALE_P##PASS); \ + out7h = _mm_srai_pi32(out7h, DESCALE_P##PASS); \ + \ + out1l = _mm_add_pi32(out1l, PD_DESCALE_P##PASS); \ + out1h = _mm_add_pi32(out1h, PD_DESCALE_P##PASS); \ + out1l = _mm_srai_pi32(out1l, DESCALE_P##PASS); \ + out1h = _mm_srai_pi32(out1h, DESCALE_P##PASS); \ + \ + out7 = _mm_packs_pi32(out7l, out7h); \ + out1 = _mm_packs_pi32(out1l, out1h); \ + \ + tmp56l = _mm_unpacklo_pi16(tmp5, tmp6); \ + tmp56h = _mm_unpackhi_pi16(tmp5, tmp6); \ + \ + tmp5l = _mm_madd_pi16(tmp56l, PW_MF050_MF256); \ + tmp5h = _mm_madd_pi16(tmp56h, PW_MF050_MF256); \ + tmp6l = _mm_madd_pi16(tmp56l, PW_MF256_F050); \ + tmp6h = _mm_madd_pi16(tmp56h, PW_MF256_F050); \ + \ + out5l = _mm_add_pi32(tmp5l, z4l); \ + out5h = _mm_add_pi32(tmp5h, z4h); \ + out3l = _mm_add_pi32(tmp6l, z3l); \ + out3h = _mm_add_pi32(tmp6h, z3h); \ + \ + out5l = _mm_add_pi32(out5l, PD_DESCALE_P##PASS); \ + out5h = _mm_add_pi32(out5h, PD_DESCALE_P##PASS); \ + out5l = _mm_srai_pi32(out5l, DESCALE_P##PASS); \ + out5h = _mm_srai_pi32(out5h, DESCALE_P##PASS); \ + \ + out3l = _mm_add_pi32(out3l, PD_DESCALE_P##PASS); \ + out3h = _mm_add_pi32(out3h, PD_DESCALE_P##PASS); \ + out3l = _mm_srai_pi32(out3l, DESCALE_P##PASS); \ + out3h = _mm_srai_pi32(out3h, DESCALE_P##PASS); \ + \ + out5 = _mm_packs_pi32(out5l, out5h); \ + out3 = _mm_packs_pi32(out3l, out3h); \ +} + +#define DO_FDCT_PASS1() { \ + __m64 row0l, row0h, row1l, row1h, row2l, row2h, row3l, row3h; \ + __m64 row01a, row01b, row01c, row01d, row23a, row23b, row23c, row23d; \ + __m64 col0, col1, col2, col3, col4, col5, col6, col7; \ + __m64 tmp10, tmp11; \ + \ + row0l = _mm_load_si64((__m64 *)&dataptr[DCTSIZE * 0]); /* (00 01 02 03) */ \ + row0h = _mm_load_si64((__m64 *)&dataptr[DCTSIZE * 0 + 4]); /* (04 05 06 07) */ \ + row1l = _mm_load_si64((__m64 *)&dataptr[DCTSIZE * 1]); /* (10 11 12 13) */ \ + row1h = _mm_load_si64((__m64 *)&dataptr[DCTSIZE * 1 + 4]); /* (14 15 16 17) */ \ + row2l = _mm_load_si64((__m64 *)&dataptr[DCTSIZE * 2]); /* (20 21 22 23) */ \ + row2h = _mm_load_si64((__m64 *)&dataptr[DCTSIZE * 2 + 4]); /* (24 25 26 27) */ \ + row3l = _mm_load_si64((__m64 *)&dataptr[DCTSIZE * 3]); /* (30 31 32 33) */ \ + row3h = _mm_load_si64((__m64 *)&dataptr[DCTSIZE * 3 + 4]); /* (34 35 36 37) */ \ + \ + /* Transpose coefficients */ \ + \ + row23a = _mm_unpacklo_pi16(row2l, row3l); /* row23a=(20 30 21 31) */ \ + row23b = _mm_unpackhi_pi16(row2l, row3l); /* row23b=(22 32 23 33) */ \ + row23c = _mm_unpacklo_pi16(row2h, row3h); /* row23c=(24 34 25 35) */ \ + row23d = _mm_unpackhi_pi16(row2h, row3h); /* row23d=(26 36 27 37) */ \ + \ + row01a = _mm_unpacklo_pi16(row0l, row1l); /* row01a=(00 10 01 11) */ \ + row01b = _mm_unpackhi_pi16(row0l, row1l); /* row01b=(02 12 03 13) */ \ + row01c = _mm_unpacklo_pi16(row0h, row1h); /* row01c=(04 14 05 15) */ \ + row01d = _mm_unpackhi_pi16(row0h, row1h); /* row01d=(06 16 07 17) */ \ + \ + col0 = _mm_unpacklo_pi32(row01a, row23a); /* col0=(00 10 20 30) */ \ + col1 = _mm_unpackhi_pi32(row01a, row23a); /* col1=(01 11 21 31) */ \ + col6 = _mm_unpacklo_pi32(row01d, row23d); /* col6=(06 16 26 36) */ \ + col7 = _mm_unpackhi_pi32(row01d, row23d); /* col7=(07 17 27 37) */ \ + \ + tmp6 = _mm_sub_pi16(col1, col6); /* tmp6=col1-col6 */ \ + tmp7 = _mm_sub_pi16(col0, col7); /* tmp7=col0-col7 */ \ + tmp1 = _mm_add_pi16(col1, col6); /* tmp1=col1+col6 */ \ + tmp0 = _mm_add_pi16(col0, col7); /* tmp0=col0+col7 */ \ + \ + col2 = _mm_unpacklo_pi32(row01b, row23b); /* col2=(02 12 22 32) */ \ + col3 = _mm_unpackhi_pi32(row01b, row23b); /* col3=(03 13 23 33) */ \ + col4 = _mm_unpacklo_pi32(row01c, row23c); /* col4=(04 14 24 34) */ \ + col5 = _mm_unpackhi_pi32(row01c, row23c); /* col5=(05 15 25 35) */ \ + \ + tmp3 = _mm_add_pi16(col3, col4); /* tmp3=col3+col4 */ \ + tmp2 = _mm_add_pi16(col2, col5); /* tmp2=col2+col5 */ \ + tmp4 = _mm_sub_pi16(col3, col4); /* tmp4=col3-col4 */ \ + tmp5 = _mm_sub_pi16(col2, col5); /* tmp5=col2-col5 */ \ + \ + /* Even part */ \ + \ + tmp10 = _mm_add_pi16(tmp0, tmp3); /* tmp10=tmp0+tmp3 */ \ + tmp13 = _mm_sub_pi16(tmp0, tmp3); /* tmp13=tmp0-tmp3 */ \ + tmp11 = _mm_add_pi16(tmp1, tmp2); /* tmp11=tmp1+tmp2 */ \ + tmp12 = _mm_sub_pi16(tmp1, tmp2); /* tmp12=tmp1-tmp2 */ \ + \ + out0 = _mm_add_pi16(tmp10, tmp11); /* out0=tmp10+tmp11 */ \ + out4 = _mm_sub_pi16(tmp10, tmp11); /* out4=tmp10-tmp11 */ \ + out0 = _mm_slli_pi16(out0, PASS1_BITS); \ + out4 = _mm_slli_pi16(out4, PASS1_BITS); \ + \ + DO_FDCT_COMMON(1) \ + \ + _mm_store_si64((__m64 *)&dataptr[DCTSIZE * 0], out0); \ + _mm_store_si64((__m64 *)&dataptr[DCTSIZE * 0 + 4], out4); \ + _mm_store_si64((__m64 *)&dataptr[DCTSIZE * 1], out1); \ + _mm_store_si64((__m64 *)&dataptr[DCTSIZE * 1 + 4], out5); \ + _mm_store_si64((__m64 *)&dataptr[DCTSIZE * 2], out2); \ + _mm_store_si64((__m64 *)&dataptr[DCTSIZE * 2 + 4], out6); \ + _mm_store_si64((__m64 *)&dataptr[DCTSIZE * 3], out3); \ + _mm_store_si64((__m64 *)&dataptr[DCTSIZE * 3 + 4], out7); \ +} + +#define DO_FDCT_PASS2() { \ + __m64 col0l, col0h, col1l, col1h, col2l, col2h, col3l, col3h; \ + __m64 col01a, col01b, col01c, col01d, col23a, col23b, col23c, col23d; \ + __m64 row0, row1, row2, row3, row4, row5, row6, row7; \ + __m64 tmp10, tmp11; \ + \ + col0l = _mm_load_si64((__m64 *)&dataptr[DCTSIZE * 0]); /* (00 10 20 30) */ \ + col1l = _mm_load_si64((__m64 *)&dataptr[DCTSIZE * 1]); /* (01 11 21 31) */ \ + col2l = _mm_load_si64((__m64 *)&dataptr[DCTSIZE * 2]); /* (02 12 22 32) */ \ + col3l = _mm_load_si64((__m64 *)&dataptr[DCTSIZE * 3]); /* (03 13 23 33) */ \ + col0h = _mm_load_si64((__m64 *)&dataptr[DCTSIZE * 4]); /* (40 50 60 70) */ \ + col1h = _mm_load_si64((__m64 *)&dataptr[DCTSIZE * 5]); /* (41 51 61 71) */ \ + col2h = _mm_load_si64((__m64 *)&dataptr[DCTSIZE * 6]); /* (42 52 62 72) */ \ + col3h = _mm_load_si64((__m64 *)&dataptr[DCTSIZE * 7]); /* (43 53 63 73) */ \ + \ + /* Transpose coefficients */ \ + \ + col23a = _mm_unpacklo_pi16(col2l, col3l); /* col23a=(02 03 12 13) */ \ + col23b = _mm_unpackhi_pi16(col2l, col3l); /* col23b=(22 23 32 33) */ \ + col23c = _mm_unpacklo_pi16(col2h, col3h); /* col23c=(42 43 52 53) */ \ + col23d = _mm_unpackhi_pi16(col2h, col3h); /* col23d=(62 63 72 73) */ \ + \ + col01a = _mm_unpacklo_pi16(col0l, col1l); /* col01a=(00 01 10 11) */ \ + col01b = _mm_unpackhi_pi16(col0l, col1l); /* col01b=(20 21 30 31) */ \ + col01c = _mm_unpacklo_pi16(col0h, col1h); /* col01c=(40 41 50 51) */ \ + col01d = _mm_unpackhi_pi16(col0h, col1h); /* col01d=(60 61 70 71) */ \ + \ + row0 = _mm_unpacklo_pi32(col01a, col23a); /* row0=(00 01 02 03) */ \ + row1 = _mm_unpackhi_pi32(col01a, col23a); /* row1=(10 11 12 13) */ \ + row6 = _mm_unpacklo_pi32(col01d, col23d); /* row6=(60 61 62 63) */ \ + row7 = _mm_unpackhi_pi32(col01d, col23d); /* row7=(70 71 72 73) */ \ + \ + tmp6 = _mm_sub_pi16(row1, row6); /* tmp6=row1-row6 */ \ + tmp7 = _mm_sub_pi16(row0, row7); /* tmp7=row0-row7 */ \ + tmp1 = _mm_add_pi16(row1, row6); /* tmp1=row1+row6 */ \ + tmp0 = _mm_add_pi16(row0, row7); /* tmp0=row0+row7 */ \ + \ + row2 = _mm_unpacklo_pi32(col01b, col23b); /* row2=(20 21 22 23) */ \ + row3 = _mm_unpackhi_pi32(col01b, col23b); /* row3=(30 31 32 33) */ \ + row4 = _mm_unpacklo_pi32(col01c, col23c); /* row4=(40 41 42 43) */ \ + row5 = _mm_unpackhi_pi32(col01c, col23c); /* row5=(50 51 52 53) */ \ + \ + tmp3 = _mm_add_pi16(row3, row4); /* tmp3=row3+row4 */ \ + tmp2 = _mm_add_pi16(row2, row5); /* tmp2=row2+row5 */ \ + tmp4 = _mm_sub_pi16(row3, row4); /* tmp4=row3-row4 */ \ + tmp5 = _mm_sub_pi16(row2, row5); /* tmp5=row2-row5 */ \ + \ + /* Even part */ \ + \ + tmp10 = _mm_add_pi16(tmp0, tmp3); /* tmp10=tmp0+tmp3 */ \ + tmp13 = _mm_sub_pi16(tmp0, tmp3); /* tmp13=tmp0-tmp3 */ \ + tmp11 = _mm_add_pi16(tmp1, tmp2); /* tmp11=tmp1+tmp2 */ \ + tmp12 = _mm_sub_pi16(tmp1, tmp2); /* tmp12=tmp1-tmp2 */ \ + \ + out0 = _mm_add_pi16(tmp10, tmp11); /* out0=tmp10+tmp11 */ \ + out4 = _mm_sub_pi16(tmp10, tmp11); /* out4=tmp10-tmp11 */ \ + \ + out0 = _mm_add_pi16(out0, PW_DESCALE_P2X); \ + out4 = _mm_add_pi16(out4, PW_DESCALE_P2X); \ + out0 = _mm_srai_pi16(out0, PASS1_BITS); \ + out4 = _mm_srai_pi16(out4, PASS1_BITS); \ + \ + DO_FDCT_COMMON(2) \ + \ + _mm_store_si64((__m64 *)&dataptr[DCTSIZE * 0], out0); \ + _mm_store_si64((__m64 *)&dataptr[DCTSIZE * 1], out1); \ + _mm_store_si64((__m64 *)&dataptr[DCTSIZE * 2], out2); \ + _mm_store_si64((__m64 *)&dataptr[DCTSIZE * 3], out3); \ + _mm_store_si64((__m64 *)&dataptr[DCTSIZE * 4], out4); \ + _mm_store_si64((__m64 *)&dataptr[DCTSIZE * 5], out5); \ + _mm_store_si64((__m64 *)&dataptr[DCTSIZE * 6], out6); \ + _mm_store_si64((__m64 *)&dataptr[DCTSIZE * 7], out7); \ +} + +void jsimd_fdct_islow_mmi(DCTELEM *data) +{ + __m64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + __m64 out0, out1, out2, out3, out4, out5, out6, out7; + __m64 tmp12, tmp13; + DCTELEM *dataptr = data; + + /* Pass 1: process rows. */ + + DO_FDCT_PASS1() + dataptr += DCTSIZE * 4; + DO_FDCT_PASS1() + + /* Pass 2: process columns. */ + + dataptr = data; + DO_FDCT_PASS2() + dataptr += 4; + DO_FDCT_PASS2() +} diff --git a/simd/loongson/jidctint-mmi.c b/simd/loongson/jidctint-mmi.c new file mode 100644 index 0000000..419c638 --- /dev/null +++ b/simd/loongson/jidctint-mmi.c @@ -0,0 +1,571 @@ +/* + * Loongson MMI optimizations for libjpeg-turbo + * + * Copyright (C) 2014-2015, 2018, D. R. Commander. All Rights Reserved. + * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. + * All Rights Reserved. + * Authors: ZhuChen + * CaiWanwei + * SunZhangzhi + * + * Based on the x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* SLOW INTEGER INVERSE DCT */ + +#include "jsimd_mmi.h" + + +#define CONST_BITS 13 +#define PASS1_BITS 2 +#define DESCALE_P1 (CONST_BITS - PASS1_BITS) +#define DESCALE_P2 (CONST_BITS + PASS1_BITS + 3) +#define CENTERJSAMPLE 128 + +#define FIX_0_298 ((short)2446) /* FIX(0.298631336) */ +#define FIX_0_390 ((short)3196) /* FIX(0.390180644) */ +#define FIX_0_899 ((short)7373) /* FIX(0.899976223) */ +#define FIX_0_541 ((short)4433) /* FIX(0.541196100) */ +#define FIX_0_765 ((short)6270) /* FIX(0.765366865) */ +#define FIX_1_175 ((short)9633) /* FIX(1.175875602) */ +#define FIX_1_501 ((short)12299) /* FIX(1.501321110) */ +#define FIX_1_847 ((short)15137) /* FIX(1.847759065) */ +#define FIX_1_961 ((short)16069) /* FIX(1.961570560) */ +#define FIX_2_053 ((short)16819) /* FIX(2.053119869) */ +#define FIX_2_562 ((short)20995) /* FIX(2.562915447) */ +#define FIX_3_072 ((short)25172) /* FIX(3.072711026) */ + +enum const_index { + index_PW_F130_F054, + index_PW_F054_MF130, + index_PW_MF078_F117, + index_PW_F117_F078, + index_PW_MF060_MF089, + index_PW_MF089_F060, + index_PW_MF050_MF256, + index_PW_MF256_F050, + index_PD_DESCALE_P1, + index_PD_DESCALE_P2, + index_PB_CENTERJSAMP +}; + +static uint64_t const_value[] = { + _uint64_set_pi16(FIX_0_541, (FIX_0_541 + FIX_0_765), + FIX_0_541, (FIX_0_541 + FIX_0_765)), + _uint64_set_pi16((FIX_0_541 - FIX_1_847), FIX_0_541, + (FIX_0_541 - FIX_1_847), FIX_0_541), + _uint64_set_pi16(FIX_1_175, (FIX_1_175 - FIX_1_961), + FIX_1_175, (FIX_1_175 - FIX_1_961)), + _uint64_set_pi16((FIX_1_175 - FIX_0_390), FIX_1_175, + (FIX_1_175 - FIX_0_390), FIX_1_175), + _uint64_set_pi16(-FIX_0_899, (FIX_0_298 - FIX_0_899), + -FIX_0_899, (FIX_0_298 - FIX_0_899)), + _uint64_set_pi16((FIX_1_501 - FIX_0_899), -FIX_0_899, + (FIX_1_501 - FIX_0_899), -FIX_0_899), + _uint64_set_pi16(-FIX_2_562, (FIX_2_053 - FIX_2_562), + -FIX_2_562, (FIX_2_053 - FIX_2_562)), + _uint64_set_pi16((FIX_3_072 - FIX_2_562), -FIX_2_562, + (FIX_3_072 - FIX_2_562), -FIX_2_562), + _uint64_set_pi32((1 << (DESCALE_P1 - 1)), (1 << (DESCALE_P1 - 1))), + _uint64_set_pi32((1 << (DESCALE_P2 - 1)), (1 << (DESCALE_P2 - 1))), + _uint64_set_pi8(CENTERJSAMPLE, CENTERJSAMPLE, CENTERJSAMPLE, CENTERJSAMPLE, + CENTERJSAMPLE, CENTERJSAMPLE, CENTERJSAMPLE, CENTERJSAMPLE) +}; + +#define PW_F130_F054 get_const_value(index_PW_F130_F054) +#define PW_F054_MF130 get_const_value(index_PW_F054_MF130) +#define PW_MF078_F117 get_const_value(index_PW_MF078_F117) +#define PW_F117_F078 get_const_value(index_PW_F117_F078) +#define PW_MF060_MF089 get_const_value(index_PW_MF060_MF089) +#define PW_MF089_F060 get_const_value(index_PW_MF089_F060) +#define PW_MF050_MF256 get_const_value(index_PW_MF050_MF256) +#define PW_MF256_F050 get_const_value(index_PW_MF256_F050) +#define PD_DESCALE_P1 get_const_value(index_PD_DESCALE_P1) +#define PD_DESCALE_P2 get_const_value(index_PD_DESCALE_P2) +#define PB_CENTERJSAMP get_const_value(index_PB_CENTERJSAMP) + + +#define test_m32_zero(mm32) (!(*(uint32_t *)&mm32)) +#define test_m64_zero(mm64) (!(*(uint64_t *)&mm64)) + + +#define DO_IDCT_COMMON(PASS) { \ + __m64 tmp0_3l, tmp0_3h, tmp1_2l, tmp1_2h; \ + __m64 tmp0l, tmp0h, tmp1l, tmp1h, tmp2l, tmp2h, tmp3l, tmp3h; \ + __m64 z34l, z34h, z3l, z3h, z4l, z4h, z3, z4; \ + __m64 out0l, out0h, out1l, out1h, out2l, out2h, out3l, out3h; \ + __m64 out4l, out4h, out5l, out5h, out6l, out6h, out7l, out7h; \ + \ + z3 = _mm_add_pi16(tmp0, tmp2); \ + z4 = _mm_add_pi16(tmp1, tmp3); \ + \ + /* (Original) \ + * z5 = (z3 + z4) * 1.175875602; \ + * z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \ + * z3 += z5; z4 += z5; \ + * \ + * (This implementation) \ + * z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \ + * z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \ + */ \ + \ + z34l = _mm_unpacklo_pi16(z3, z4); \ + z34h = _mm_unpackhi_pi16(z3, z4); \ + z3l = _mm_madd_pi16(z34l, PW_MF078_F117); \ + z3h = _mm_madd_pi16(z34h, PW_MF078_F117); \ + z4l = _mm_madd_pi16(z34l, PW_F117_F078); \ + z4h = _mm_madd_pi16(z34h, PW_F117_F078); \ + \ + /* (Original) \ + * z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; \ + * tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; \ + * tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; \ + * z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \ + * tmp0 += z1 + z3; tmp1 += z2 + z4; \ + * tmp2 += z2 + z3; tmp3 += z1 + z4; \ + * \ + * (This implementation) \ + * tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; \ + * tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; \ + * tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); \ + * tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); \ + * tmp0 += z3; tmp1 += z4; \ + * tmp2 += z3; tmp3 += z4; \ + */ \ + \ + tmp0_3l = _mm_unpacklo_pi16(tmp0, tmp3); \ + tmp0_3h = _mm_unpackhi_pi16(tmp0, tmp3); \ + \ + tmp0l = _mm_madd_pi16(tmp0_3l, PW_MF060_MF089); \ + tmp0h = _mm_madd_pi16(tmp0_3h, PW_MF060_MF089); \ + tmp3l = _mm_madd_pi16(tmp0_3l, PW_MF089_F060); \ + tmp3h = _mm_madd_pi16(tmp0_3h, PW_MF089_F060); \ + \ + tmp0l = _mm_add_pi32(tmp0l, z3l); \ + tmp0h = _mm_add_pi32(tmp0h, z3h); \ + tmp3l = _mm_add_pi32(tmp3l, z4l); \ + tmp3h = _mm_add_pi32(tmp3h, z4h); \ + \ + tmp1_2l = _mm_unpacklo_pi16(tmp1, tmp2); \ + tmp1_2h = _mm_unpackhi_pi16(tmp1, tmp2); \ + \ + tmp1l = _mm_madd_pi16(tmp1_2l, PW_MF050_MF256); \ + tmp1h = _mm_madd_pi16(tmp1_2h, PW_MF050_MF256); \ + tmp2l = _mm_madd_pi16(tmp1_2l, PW_MF256_F050); \ + tmp2h = _mm_madd_pi16(tmp1_2h, PW_MF256_F050); \ + \ + tmp1l = _mm_add_pi32(tmp1l, z4l); \ + tmp1h = _mm_add_pi32(tmp1h, z4h); \ + tmp2l = _mm_add_pi32(tmp2l, z3l); \ + tmp2h = _mm_add_pi32(tmp2h, z3h); \ + \ + /* Final output stage */ \ + \ + out0l = _mm_add_pi32(tmp10l, tmp3l); \ + out0h = _mm_add_pi32(tmp10h, tmp3h); \ + out7l = _mm_sub_pi32(tmp10l, tmp3l); \ + out7h = _mm_sub_pi32(tmp10h, tmp3h); \ + \ + out0l = _mm_add_pi32(out0l, PD_DESCALE_P##PASS); \ + out0h = _mm_add_pi32(out0h, PD_DESCALE_P##PASS); \ + out0l = _mm_srai_pi32(out0l, DESCALE_P##PASS); \ + out0h = _mm_srai_pi32(out0h, DESCALE_P##PASS); \ + \ + out7l = _mm_add_pi32(out7l, PD_DESCALE_P##PASS); \ + out7h = _mm_add_pi32(out7h, PD_DESCALE_P##PASS); \ + out7l = _mm_srai_pi32(out7l, DESCALE_P##PASS); \ + out7h = _mm_srai_pi32(out7h, DESCALE_P##PASS); \ + \ + out0 = _mm_packs_pi32(out0l, out0h); \ + out7 = _mm_packs_pi32(out7l, out7h); \ + \ + out1l = _mm_add_pi32(tmp11l, tmp2l); \ + out1h = _mm_add_pi32(tmp11h, tmp2h); \ + out6l = _mm_sub_pi32(tmp11l, tmp2l); \ + out6h = _mm_sub_pi32(tmp11h, tmp2h); \ + \ + out1l = _mm_add_pi32(out1l, PD_DESCALE_P##PASS); \ + out1h = _mm_add_pi32(out1h, PD_DESCALE_P##PASS); \ + out1l = _mm_srai_pi32(out1l, DESCALE_P##PASS); \ + out1h = _mm_srai_pi32(out1h, DESCALE_P##PASS); \ + \ + out6l = _mm_add_pi32(out6l, PD_DESCALE_P##PASS); \ + out6h = _mm_add_pi32(out6h, PD_DESCALE_P##PASS); \ + out6l = _mm_srai_pi32(out6l, DESCALE_P##PASS); \ + out6h = _mm_srai_pi32(out6h, DESCALE_P##PASS); \ + \ + out1 = _mm_packs_pi32(out1l, out1h); \ + out6 = _mm_packs_pi32(out6l, out6h); \ + \ + out2l = _mm_add_pi32(tmp12l, tmp1l); \ + out2h = _mm_add_pi32(tmp12h, tmp1h); \ + out5l = _mm_sub_pi32(tmp12l, tmp1l); \ + out5h = _mm_sub_pi32(tmp12h, tmp1h); \ + \ + out2l = _mm_add_pi32(out2l, PD_DESCALE_P##PASS); \ + out2h = _mm_add_pi32(out2h, PD_DESCALE_P##PASS); \ + out2l = _mm_srai_pi32(out2l, DESCALE_P##PASS); \ + out2h = _mm_srai_pi32(out2h, DESCALE_P##PASS); \ + \ + out5l = _mm_add_pi32(out5l, PD_DESCALE_P##PASS); \ + out5h = _mm_add_pi32(out5h, PD_DESCALE_P##PASS); \ + out5l = _mm_srai_pi32(out5l, DESCALE_P##PASS); \ + out5h = _mm_srai_pi32(out5h, DESCALE_P##PASS); \ + \ + out2 = _mm_packs_pi32(out2l, out2h); \ + out5 = _mm_packs_pi32(out5l, out5h); \ + \ + out3l = _mm_add_pi32(tmp13l, tmp0l); \ + out3h = _mm_add_pi32(tmp13h, tmp0h); \ + \ + out4l = _mm_sub_pi32(tmp13l, tmp0l); \ + out4h = _mm_sub_pi32(tmp13h, tmp0h); \ + \ + out3l = _mm_add_pi32(out3l, PD_DESCALE_P##PASS); \ + out3h = _mm_add_pi32(out3h, PD_DESCALE_P##PASS); \ + out3l = _mm_srai_pi32(out3l, DESCALE_P##PASS); \ + out3h = _mm_srai_pi32(out3h, DESCALE_P##PASS); \ + \ + out4l = _mm_add_pi32(out4l, PD_DESCALE_P##PASS); \ + out4h = _mm_add_pi32(out4h, PD_DESCALE_P##PASS); \ + out4l = _mm_srai_pi32(out4l, DESCALE_P##PASS); \ + out4h = _mm_srai_pi32(out4h, DESCALE_P##PASS); \ + \ + out3 = _mm_packs_pi32(out3l, out3h); \ + out4 = _mm_packs_pi32(out4l, out4h); \ +} + +#define DO_IDCT_PASS1(iter) { \ + __m64 col0l, col1l, col2l, col3l, col4l, col5l, col6l, col7l; \ + __m64 quant0l, quant1l, quant2l, quant3l; \ + __m64 quant4l, quant5l, quant6l, quant7l; \ + __m64 z23, z2, z3, z23l, z23h; \ + __m64 row01a, row01b, row01c, row01d, row23a, row23b, row23c, row23d; \ + __m64 row0l, row0h, row1l, row1h, row2l, row2h, row3l, row3h; \ + __m64 tmp0l, tmp0h, tmp1l, tmp1h, tmp2l, tmp2h, tmp3l, tmp3h; \ + __m64 tmp10l, tmp10h, tmp11l, tmp11h, tmp12l, tmp12h, tmp13l, tmp13h; \ + __m32 col0a, col1a, mm0; \ + \ + col0a = _mm_load_si32((__m32 *)&inptr[DCTSIZE * 1]); \ + col1a = _mm_load_si32((__m32 *)&inptr[DCTSIZE * 2]); \ + mm0 = _mm_or_si32(col0a, col1a); \ + \ + if (test_m32_zero(mm0)) { \ + __m64 mm1, mm2; \ + \ + col0l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 0]); \ + col1l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 1]); \ + col2l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 2]); \ + col3l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 3]); \ + col4l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 4]); \ + col5l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 5]); \ + col6l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 6]); \ + col7l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 7]); \ + \ + mm1 = _mm_or_si64(col1l, col3l); \ + mm2 = _mm_or_si64(col2l, col4l); \ + mm1 = _mm_or_si64(mm1, col5l); \ + mm2 = _mm_or_si64(mm2, col6l); \ + mm1 = _mm_or_si64(mm1, col7l); \ + mm1 = _mm_or_si64(mm1, mm2); \ + \ + if (test_m64_zero(mm1)) { \ + __m64 dcval, dcvall, dcvalh, row0, row1, row2, row3; \ + \ + /* AC terms all zero */ \ + \ + quant0l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 0]); \ + \ + dcval = _mm_mullo_pi16(col0l, quant0l); \ + dcval = _mm_slli_pi16(dcval, PASS1_BITS); /* dcval=(00 10 20 30) */ \ + \ + dcvall = _mm_unpacklo_pi16(dcval, dcval); /* dcvall=(00 00 10 10) */ \ + dcvalh = _mm_unpackhi_pi16(dcval, dcval); /* dcvalh=(20 20 30 30) */ \ + \ + row0 = _mm_unpacklo_pi32(dcvall, dcvall); /* row0=(00 00 00 00) */ \ + row1 = _mm_unpackhi_pi32(dcvall, dcvall); /* row1=(10 10 10 10) */ \ + row2 = _mm_unpacklo_pi32(dcvalh, dcvalh); /* row2=(20 20 20 20) */ \ + row3 = _mm_unpackhi_pi32(dcvalh, dcvalh); /* row3=(30 30 30 30) */ \ + \ + _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 0], row0); \ + _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 0 + 4], row0); \ + _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 1], row1); \ + _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 1 + 4], row1); \ + _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 2], row2); \ + _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 2 + 4], row2); \ + _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 3], row3); \ + _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 3 + 4], row3); \ + \ + goto nextcolumn##iter; \ + } \ + } \ + \ + /* Even part \ + * \ + * (Original) \ + * z1 = (z2 + z3) * 0.541196100; \ + * tmp2 = z1 + z3 * -1.847759065; \ + * tmp3 = z1 + z2 * 0.765366865; \ + * \ + * (This implementation) \ + * tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); \ + * tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; \ + */ \ + \ + col0l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 0]); /* (00 10 20 30) */ \ + col2l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 2]); /* (02 12 22 32) */ \ + col4l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 4]); /* (04 14 24 34) */ \ + col6l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 6]); /* (06 16 26 36) */ \ + \ + quant0l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 0]); \ + quant2l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 2]); \ + quant4l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 4]); \ + quant6l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 6]); \ + \ + z2 = _mm_mullo_pi16(col2l, quant2l); \ + z3 = _mm_mullo_pi16(col6l, quant6l); \ + \ + z23l = _mm_unpacklo_pi16(z2, z3); \ + z23h = _mm_unpackhi_pi16(z2, z3); \ + tmp3l = _mm_madd_pi16(z23l, PW_F130_F054); \ + tmp3h = _mm_madd_pi16(z23h, PW_F130_F054); \ + tmp2l = _mm_madd_pi16(z23l, PW_F054_MF130); \ + tmp2h = _mm_madd_pi16(z23h, PW_F054_MF130); \ + \ + z2 = _mm_mullo_pi16(col0l, quant0l); \ + z3 = _mm_mullo_pi16(col4l, quant4l); \ + \ + z23 = _mm_add_pi16(z2, z3); \ + tmp0l = _mm_loadlo_pi16_f(z23); \ + tmp0h = _mm_loadhi_pi16_f(z23); \ + tmp0l = _mm_srai_pi32(tmp0l, (16 - CONST_BITS)); \ + tmp0h = _mm_srai_pi32(tmp0h, (16 - CONST_BITS)); \ + \ + tmp10l = _mm_add_pi32(tmp0l, tmp3l); \ + tmp10h = _mm_add_pi32(tmp0h, tmp3h); \ + tmp13l = _mm_sub_pi32(tmp0l, tmp3l); \ + tmp13h = _mm_sub_pi32(tmp0h, tmp3h); \ + \ + z23 = _mm_sub_pi16(z2, z3); \ + tmp1l = _mm_loadlo_pi16_f(z23); \ + tmp1h = _mm_loadhi_pi16_f(z23); \ + tmp1l = _mm_srai_pi32(tmp1l, (16 - CONST_BITS)); \ + tmp1h = _mm_srai_pi32(tmp1h, (16 - CONST_BITS)); \ + \ + tmp11l = _mm_add_pi32(tmp1l, tmp2l); \ + tmp11h = _mm_add_pi32(tmp1h, tmp2h); \ + tmp12l = _mm_sub_pi32(tmp1l, tmp2l); \ + tmp12h = _mm_sub_pi32(tmp1h, tmp2h); \ + \ + /* Odd part */ \ + \ + col1l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 1]); /* (01 11 21 31) */ \ + col3l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 3]); /* (03 13 23 33) */ \ + col5l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 5]); /* (05 15 25 35) */ \ + col7l = _mm_load_si64((__m64 *)&inptr[DCTSIZE * 7]); /* (07 17 27 37) */ \ + \ + quant1l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 1]); \ + quant3l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 3]); \ + quant5l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 5]); \ + quant7l = _mm_load_si64((__m64 *)&quantptr[DCTSIZE * 7]); \ + \ + tmp0 = _mm_mullo_pi16(col7l, quant7l); \ + tmp1 = _mm_mullo_pi16(col5l, quant5l); \ + tmp2 = _mm_mullo_pi16(col3l, quant3l); \ + tmp3 = _mm_mullo_pi16(col1l, quant1l); \ + \ + DO_IDCT_COMMON(1) \ + \ + /* out0=(00 10 20 30), out1=(01 11 21 31) */ \ + /* out2=(02 12 22 32), out3=(03 13 23 33) */ \ + /* out4=(04 14 24 34), out5=(05 15 25 35) */ \ + /* out6=(06 16 26 36), out7=(07 17 27 37) */ \ + \ + /* Transpose coefficients */ \ + \ + row01a = _mm_unpacklo_pi16(out0, out1); /* row01a=(00 01 10 11) */ \ + row23a = _mm_unpackhi_pi16(out0, out1); /* row23a=(20 21 30 31) */ \ + row01d = _mm_unpacklo_pi16(out6, out7); /* row01d=(06 07 16 17) */ \ + row23d = _mm_unpackhi_pi16(out6, out7); /* row23d=(26 27 36 37) */ \ + \ + row01b = _mm_unpacklo_pi16(out2, out3); /* row01b=(02 03 12 13) */ \ + row23b = _mm_unpackhi_pi16(out2, out3); /* row23b=(22 23 32 33) */ \ + row01c = _mm_unpacklo_pi16(out4, out5); /* row01c=(04 05 14 15) */ \ + row23c = _mm_unpackhi_pi16(out4, out5); /* row23c=(24 25 34 35) */ \ + \ + row0l = _mm_unpacklo_pi32(row01a, row01b); /* row0l=(00 01 02 03) */ \ + row1l = _mm_unpackhi_pi32(row01a, row01b); /* row1l=(10 11 12 13) */ \ + row2l = _mm_unpacklo_pi32(row23a, row23b); /* row2l=(20 21 22 23) */ \ + row3l = _mm_unpackhi_pi32(row23a, row23b); /* row3l=(30 31 32 33) */ \ + \ + row0h = _mm_unpacklo_pi32(row01c, row01d); /* row0h=(04 05 06 07) */ \ + row1h = _mm_unpackhi_pi32(row01c, row01d); /* row1h=(14 15 16 17) */ \ + row2h = _mm_unpacklo_pi32(row23c, row23d); /* row2h=(24 25 26 27) */ \ + row3h = _mm_unpackhi_pi32(row23c, row23d); /* row3h=(34 35 36 37) */ \ + \ + _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 0], row0l); \ + _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 0 + 4], row0h); \ + _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 1], row1l); \ + _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 1 + 4], row1h); \ + _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 2], row2l); \ + _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 2 + 4], row2h); \ + _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 3], row3l); \ + _mm_store_si64((__m64 *)&wsptr[DCTSIZE * 3 + 4], row3h); \ +} + +#define DO_IDCT_PASS2(ctr) { \ + __m64 row0l, row1l, row2l, row3l, row4l, row5l, row6l, row7l; \ + __m64 z23, z23l, z23h; \ + __m64 col0123a, col0123b, col0123c, col0123d; \ + __m64 col01l, col01h, col23l, col23h, row06, row17, row24, row35; \ + __m64 col0, col1, col2, col3; \ + __m64 tmp0l, tmp0h, tmp1l, tmp1h, tmp2l, tmp2h, tmp3l, tmp3h; \ + __m64 tmp10l, tmp10h, tmp11l, tmp11h, tmp12l, tmp12h, tmp13l, tmp13h; \ + \ + row0l = _mm_load_si64((__m64 *)&wsptr[DCTSIZE * 0]); /* (00 01 02 03) */ \ + row1l = _mm_load_si64((__m64 *)&wsptr[DCTSIZE * 1]); /* (10 11 12 13) */ \ + row2l = _mm_load_si64((__m64 *)&wsptr[DCTSIZE * 2]); /* (20 21 22 23) */ \ + row3l = _mm_load_si64((__m64 *)&wsptr[DCTSIZE * 3]); /* (30 31 32 33) */ \ + row4l = _mm_load_si64((__m64 *)&wsptr[DCTSIZE * 4]); /* (40 41 42 43) */ \ + row5l = _mm_load_si64((__m64 *)&wsptr[DCTSIZE * 5]); /* (50 51 52 53) */ \ + row6l = _mm_load_si64((__m64 *)&wsptr[DCTSIZE * 6]); /* (60 61 62 63) */ \ + row7l = _mm_load_si64((__m64 *)&wsptr[DCTSIZE * 7]); /* (70 71 72 73) */ \ + \ + /* Even part \ + * \ + * (Original) \ + * z1 = (z2 + z3) * 0.541196100; \ + * tmp2 = z1 + z3 * -1.847759065; \ + * tmp3 = z1 + z2 * 0.765366865; \ + * \ + * (This implementation) \ + * tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); \ + * tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; \ + */ \ + \ + z23l = _mm_unpacklo_pi16(row2l, row6l); \ + z23h = _mm_unpackhi_pi16(row2l, row6l); \ + \ + tmp3l = _mm_madd_pi16(z23l, PW_F130_F054); \ + tmp3h = _mm_madd_pi16(z23h, PW_F130_F054); \ + tmp2l = _mm_madd_pi16(z23l, PW_F054_MF130); \ + tmp2h = _mm_madd_pi16(z23h, PW_F054_MF130); \ + \ + z23 = _mm_add_pi16(row0l, row4l); \ + tmp0l = _mm_loadlo_pi16_f(z23); \ + tmp0h = _mm_loadhi_pi16_f(z23); \ + tmp0l = _mm_srai_pi32(tmp0l, (16 - CONST_BITS)); \ + tmp0h = _mm_srai_pi32(tmp0h, (16 - CONST_BITS)); \ + \ + tmp10l = _mm_add_pi32(tmp0l, tmp3l); \ + tmp10h = _mm_add_pi32(tmp0h, tmp3h); \ + tmp13l = _mm_sub_pi32(tmp0l, tmp3l); \ + tmp13h = _mm_sub_pi32(tmp0h, tmp3h); \ + \ + z23 = _mm_sub_pi16(row0l, row4l); \ + tmp1l = _mm_loadlo_pi16_f(z23); \ + tmp1h = _mm_loadhi_pi16_f(z23); \ + tmp1l = _mm_srai_pi32(tmp1l, (16 - CONST_BITS)); \ + tmp1h = _mm_srai_pi32(tmp1h, (16 - CONST_BITS)); \ + \ + tmp11l = _mm_add_pi32(tmp1l, tmp2l); \ + tmp11h = _mm_add_pi32(tmp1h, tmp2h); \ + tmp12l = _mm_sub_pi32(tmp1l, tmp2l); \ + tmp12h = _mm_sub_pi32(tmp1h, tmp2h); \ + \ + /* Odd part */ \ + \ + tmp0 = row7l; \ + tmp1 = row5l; \ + tmp2 = row3l; \ + tmp3 = row1l; \ + \ + DO_IDCT_COMMON(2) \ + \ + /* out0=(00 01 02 03), out1=(10 11 12 13) */ \ + /* out2=(20 21 22 23), out3=(30 31 32 33) */ \ + /* out4=(40 41 42 43), out5=(50 51 52 53) */ \ + /* out6=(60 61 62 63), out7=(70 71 72 73) */ \ + \ + row06 = _mm_packs_pi16(out0, out6); /* row06=(00 01 02 03 60 61 62 63) */ \ + row17 = _mm_packs_pi16(out1, out7); /* row17=(10 11 12 13 70 71 72 73) */ \ + row24 = _mm_packs_pi16(out2, out4); /* row24=(20 21 22 23 40 41 42 43) */ \ + row35 = _mm_packs_pi16(out3, out5); /* row35=(30 31 32 33 50 51 52 53) */ \ + \ + row06 = _mm_add_pi8(row06, PB_CENTERJSAMP); \ + row17 = _mm_add_pi8(row17, PB_CENTERJSAMP); \ + row24 = _mm_add_pi8(row24, PB_CENTERJSAMP); \ + row35 = _mm_add_pi8(row35, PB_CENTERJSAMP); \ + \ + /* Transpose coefficients */ \ + \ + col0123a = _mm_unpacklo_pi8(row06, row17); /* col0123a=(00 10 01 11 02 12 03 13) */ \ + col0123d = _mm_unpackhi_pi8(row06, row17); /* col0123d=(60 70 61 71 62 72 63 73) */ \ + col0123b = _mm_unpacklo_pi8(row24, row35); /* col0123b=(20 30 21 31 22 32 23 33) */ \ + col0123c = _mm_unpackhi_pi8(row24, row35); /* col0123c=(40 50 41 51 42 52 43 53) */ \ + \ + col01l = _mm_unpacklo_pi16(col0123a, col0123b); /* col01l=(00 10 20 30 01 11 21 31) */ \ + col23l = _mm_unpackhi_pi16(col0123a, col0123b); /* col23l=(02 12 22 32 03 13 23 33) */ \ + col01h = _mm_unpacklo_pi16(col0123c, col0123d); /* col01h=(40 50 60 70 41 51 61 71) */ \ + col23h = _mm_unpackhi_pi16(col0123c, col0123d); /* col23h=(42 52 62 72 43 53 63 73) */ \ + \ + col0 = _mm_unpacklo_pi32(col01l, col01h); /* col0=(00 10 20 30 40 50 60 70) */ \ + col1 = _mm_unpackhi_pi32(col01l, col01h); /* col1=(01 11 21 31 41 51 61 71) */ \ + col2 = _mm_unpacklo_pi32(col23l, col23h); /* col2=(02 12 22 32 42 52 62 72) */ \ + col3 = _mm_unpackhi_pi32(col23l, col23h); /* col3=(03 13 23 33 43 53 63 73) */ \ + \ + _mm_store_si64((__m64 *)(output_buf[ctr + 0] + output_col), col0); \ + _mm_store_si64((__m64 *)(output_buf[ctr + 1] + output_col), col1); \ + _mm_store_si64((__m64 *)(output_buf[ctr + 2] + output_col), col2); \ + _mm_store_si64((__m64 *)(output_buf[ctr + 3] + output_col), col3); \ +} + +void jsimd_idct_islow_mmi(void *dct_table, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + __m64 tmp0, tmp1, tmp2, tmp3; + __m64 out0, out1, out2, out3, out4, out5, out6, out7; + JCOEFPTR inptr; + ISLOW_MULT_TYPE *quantptr; + JCOEF *wsptr; + JCOEF workspace[DCTSIZE2]; /* buffers data between passes */ + + /* Pass 1: process columns. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *)dct_table; + wsptr = workspace; + + DO_IDCT_PASS1(1) +nextcolumn1: + inptr += 4; + quantptr += 4; + wsptr += DCTSIZE * 4; + DO_IDCT_PASS1(2) +nextcolumn2: + + /* Pass 2: process rows. */ + + wsptr = workspace; + + DO_IDCT_PASS2(0) + wsptr += 4; + DO_IDCT_PASS2(4) +} diff --git a/simd/loongson/jquanti-mmi.c b/simd/loongson/jquanti-mmi.c new file mode 100644 index 0000000..f9a3f81 --- /dev/null +++ b/simd/loongson/jquanti-mmi.c @@ -0,0 +1,130 @@ +/* + * Loongson MMI optimizations for libjpeg-turbo + * + * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. + * All Rights Reserved. + * Authors: ZhuChen + * CaiWanwei + * SunZhangzhi + * Copyright (C) 2018, D. R. Commander. All Rights Reserved. + * + * Based on the x86 SIMD extension for IJG JPEG library + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* INTEGER QUANTIZATION AND SAMPLE CONVERSION */ + +#include "jsimd_mmi.h" + + +#define DO_QUANT() { \ + mm2 = _mm_load_si64((__m64 *)&workspace[0]); \ + mm3 = _mm_load_si64((__m64 *)&workspace[4]); \ + \ + mm0 = mm2; \ + mm1 = mm3; \ + \ + mm2 = _mm_srai_pi16(mm2, (WORD_BIT - 1)); /* -1 if value < 0, */ \ + /* 0 otherwise */ \ + mm3 = _mm_srai_pi16(mm3, (WORD_BIT - 1)); \ + \ + mm0 = _mm_xor_si64(mm0, mm2); /* val = -val */ \ + mm1 = _mm_xor_si64(mm1, mm3); \ + mm0 = _mm_sub_pi16(mm0, mm2); \ + mm1 = _mm_sub_pi16(mm1, mm3); \ + \ + corr0 = _mm_load_si64((__m64 *)&divisors[DCTSIZE2 * 1]); /* correction */ \ + corr1 = _mm_load_si64((__m64 *)&divisors[DCTSIZE2 * 1 + 4]); \ + \ + mm0 = _mm_add_pi16(mm0, corr0); /* correction + roundfactor */ \ + mm1 = _mm_add_pi16(mm1, corr1); \ + \ + mm4 = mm0; \ + mm5 = mm1; \ + \ + recip0 = _mm_load_si64((__m64 *)&divisors[DCTSIZE2 * 0]); /* reciprocal */ \ + recip1 = _mm_load_si64((__m64 *)&divisors[DCTSIZE2 * 0 + 4]); \ + \ + mm0 = _mm_mulhi_pi16(mm0, recip0); \ + mm1 = _mm_mulhi_pi16(mm1, recip1); \ + \ + mm0 = _mm_add_pi16(mm0, mm4); /* reciprocal is always negative */ \ + mm1 = _mm_add_pi16(mm1, mm5); /* (MSB=1), so we always need to add the */ \ + /* initial value (input value is never */ \ + /* negative as we inverted it at the */ \ + /* start of this routine) */ \ + \ + scale0 = _mm_load_si64((__m64 *)&divisors[DCTSIZE2 * 2]); /* scale */ \ + scale1 = _mm_load_si64((__m64 *)&divisors[DCTSIZE2 * 2 + 4]); \ + \ + mm6 = scale0; \ + mm7 = scale1; \ + mm4 = mm0; \ + mm5 = mm1; \ + \ + mm0 = _mm_mulhi_pi16(mm0, mm6); \ + mm1 = _mm_mulhi_pi16(mm1, mm7); \ + \ + mm6 = _mm_srai_pi16(mm6, (WORD_BIT - 1)); /* determine if scale... */ \ + /* is negative */ \ + mm7 = _mm_srai_pi16(mm7, (WORD_BIT - 1)); \ + \ + mm6 = _mm_and_si64(mm6, mm4); /* and add input if it is */ \ + mm7 = _mm_and_si64(mm7, mm5); \ + mm0 = _mm_add_pi16(mm0, mm6); \ + mm1 = _mm_add_pi16(mm1, mm7); \ + \ + mm4 = _mm_srai_pi16(mm4, (WORD_BIT - 1)); /* then check if... */ \ + mm5 = _mm_srai_pi16(mm5, (WORD_BIT - 1)); /* negative input */ \ + \ + mm4 = _mm_and_si64(mm4, scale0); /* and add scale if it is */ \ + mm5 = _mm_and_si64(mm5, scale1); \ + mm0 = _mm_add_pi16(mm0, mm4); \ + mm1 = _mm_add_pi16(mm1, mm5); \ + \ + mm0 = _mm_xor_si64(mm0, mm2); /* val = -val */ \ + mm1 = _mm_xor_si64(mm1, mm3); \ + mm0 = _mm_sub_pi16(mm0, mm2); \ + mm1 = _mm_sub_pi16(mm1, mm3); \ + \ + _mm_store_si64((__m64 *)&output_ptr[0], mm0); \ + _mm_store_si64((__m64 *)&output_ptr[4], mm1); \ + \ + workspace += DCTSIZE; \ + divisors += DCTSIZE; \ + output_ptr += DCTSIZE; \ +} + + +void jsimd_quantize_mmi(JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) +{ + JCOEFPTR output_ptr = coef_block; + __m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7; + __m64 corr0, corr1, recip0, recip1, scale0, scale1; + + DO_QUANT() + DO_QUANT() + DO_QUANT() + DO_QUANT() + DO_QUANT() + DO_QUANT() + DO_QUANT() + DO_QUANT() +} diff --git a/simd/loongson/jsimd.c b/simd/loongson/jsimd.c new file mode 100644 index 0000000..e8b1832 --- /dev/null +++ b/simd/loongson/jsimd.c @@ -0,0 +1,610 @@ +/* + * jsimd_loongson.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright (C) 2009-2011, 2014, 2016, 2018, D. R. Commander. + * Copyright (C) 2013-2014, MIPS Technologies, Inc., California. + * Copyright (C) 2015, 2018, Matthieu Darbois. + * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains the interface between the "normal" portions + * of the library and the SIMD implementations when running on a + * Loongson architecture. + */ + +#define JPEG_INTERNALS +#include "../../jinclude.h" +#include "../../jpeglib.h" +#include "../../jsimd.h" +#include "../../jdct.h" +#include "../../jsimddct.h" +#include "../jsimd.h" + +static unsigned int simd_support = ~0; + +/* + * Check what SIMD accelerations are supported. + * + * FIXME: This code is racy under a multi-threaded environment. + */ +LOCAL(void) +init_simd(void) +{ +#ifndef NO_GETENV + char *env = NULL; +#endif + + if (simd_support != ~0U) + return; + + simd_support |= JSIMD_MMI; + +#ifndef NO_GETENV + /* Force different settings through environment variables */ + env = getenv("JSIMD_FORCENONE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = 0; +#endif +} + +GLOBAL(int) +jsimd_can_rgb_ycc(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_MMI) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_MMI) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_c_can_null_convert(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) +{ + void (*mmifct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + mmifct = jsimd_extrgb_ycc_convert_mmi; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + mmifct = jsimd_extrgbx_ycc_convert_mmi; + break; + case JCS_EXT_BGR: + mmifct = jsimd_extbgr_ycc_convert_mmi; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + mmifct = jsimd_extbgrx_ycc_convert_mmi; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + mmifct = jsimd_extxbgr_ycc_convert_mmi; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + mmifct = jsimd_extxrgb_ycc_convert_mmi; + break; + default: + mmifct = jsimd_rgb_ycc_convert_mmi; + break; + } + + mmifct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) +{ +} + +GLOBAL(void) +jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) +{ + void (*mmifct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + mmifct = jsimd_ycc_extrgb_convert_mmi; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + mmifct = jsimd_ycc_extrgbx_convert_mmi; + break; + case JCS_EXT_BGR: + mmifct = jsimd_ycc_extbgr_convert_mmi; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + mmifct = jsimd_ycc_extbgrx_convert_mmi; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + mmifct = jsimd_ycc_extxbgr_convert_mmi; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + mmifct = jsimd_ycc_extxrgb_convert_mmi; + break; + default: + mmifct = jsimd_ycc_rgb_convert_mmi; + break; + } + + mmifct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) +{ +} + +GLOBAL(void) +jsimd_c_null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_downsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MMI) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v2_smooth_downsample(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v2_downsample_mmi(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(void) +jsimd_h2v2_smooth_downsample(j_compress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ +} + +GLOBAL(void) +jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_upsample(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_int_upsample(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(void) +jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(void) +jsimd_int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_MMI) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v2_fancy_upsample_mmi(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) +{ +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) +{ +} + +GLOBAL(int) +jsimd_can_convsamp(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ +} + +GLOBAL(void) +jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) +{ +} + +GLOBAL(int) +jsimd_can_fdct_islow(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_MMI) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow(DCTELEM *data) +{ + jsimd_fdct_islow_mmi(data); +} + +GLOBAL(void) +jsimd_fdct_ifast(DCTELEM *data) +{ +} + +GLOBAL(void) +jsimd_fdct_float(FAST_FLOAT *data) +{ +} + +GLOBAL(int) +jsimd_can_quantize(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_MMI) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) +{ + jsimd_quantize_mmi(coef_block, divisors, workspace); +} + +GLOBAL(void) +jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) +{ +} + +GLOBAL(int) +jsimd_can_idct_2x2(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_6x6(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_12x12(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(int) +jsimd_can_idct_islow(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_MMI) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_islow_mmi(compptr->dct_table, coef_block, output_buf, output_col); +} + +GLOBAL(void) +jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(void) +jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(int) +jsimd_can_huff_encode_one_block(void) +{ + return 0; +} + +GLOBAL(JOCTET *) +jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + return NULL; +} + +GLOBAL(int) +jsimd_can_encode_mcu_AC_first_prepare(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *values, size_t *zerobits) +{ +} + +GLOBAL(int) +jsimd_can_encode_mcu_AC_refine_prepare(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *absvalues, size_t *bits) +{ + return 0; +} diff --git a/simd/loongson/jsimd_mmi.h b/simd/loongson/jsimd_mmi.h new file mode 100644 index 0000000..2506aa8 --- /dev/null +++ b/simd/loongson/jsimd_mmi.h @@ -0,0 +1,57 @@ +/* + * Loongson MMI optimizations for libjpeg-turbo + * + * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. + * All Rights Reserved. + * Authors: ZhuChen + * CaiWanwei + * SunZhangzhi + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#define JPEG_INTERNALS +#include "../../jinclude.h" +#include "../../jpeglib.h" +#include "../../jdct.h" +#include "loongson-mmintrin.h" + + +/* Common code */ + +#define SIZEOF_MMWORD 8 +#define BYTE_BIT 8 +#define WORD_BIT 16 +#define SCALEBITS 16 + +#define _uint64_set_pi8(a, b, c, d, e, f, g, h) \ + (((uint64_t)(uint8_t)a << 56) | \ + ((uint64_t)(uint8_t)b << 48) | \ + ((uint64_t)(uint8_t)c << 40) | \ + ((uint64_t)(uint8_t)d << 32) | \ + ((uint64_t)(uint8_t)e << 24) | \ + ((uint64_t)(uint8_t)f << 16) | \ + ((uint64_t)(uint8_t)g << 8) | \ + ((uint64_t)(uint8_t)h)) +#define _uint64_set_pi16(a, b, c, d) (((uint64_t)(uint16_t)a << 48) | \ + ((uint64_t)(uint16_t)b << 32) | \ + ((uint64_t)(uint16_t)c << 16) | \ + ((uint64_t)(uint16_t)d)) +#define _uint64_set_pi32(a, b) (((uint64_t)(uint32_t)a << 32) | \ + ((uint64_t)(uint32_t)b)) + +#define get_const_value(index) (*(__m64 *)&const_value[index]) diff --git a/simd/loongson/loongson-mmintrin.h b/simd/loongson/loongson-mmintrin.h new file mode 100644 index 0000000..4aea763 --- /dev/null +++ b/simd/loongson/loongson-mmintrin.h @@ -0,0 +1,1307 @@ +/* + * Loongson MMI optimizations for libjpeg-turbo + * + * Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing. + * All Rights Reserved. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#ifndef __LOONGSON_MMINTRIN_H__ +#define __LOONGSON_MMINTRIN_H__ + +#include + + +#define FUNCTION_ATTRIBS \ + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + + +/* Vectors are stored in 64-bit floating-point registers. */ +typedef double __m64; + +/* Having a 32-bit datatype allows us to use 32-bit loads in places like + load8888. */ +typedef float __m32; + + +/********** Set Operations **********/ + +extern __inline __m64 +_mm_setzero_si64(void) +{ + return 0.0; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_set_pi8(uint8_t __b7, uint8_t __b6, uint8_t __b5, uint8_t __b4, + uint8_t __b3, uint8_t __b2, uint8_t __b1, uint8_t __b0) +{ + __m64 ret; + uint32_t lo = ((uint32_t)__b6 << 24) | + ((uint32_t)__b4 << 16) | + ((uint32_t)__b2 << 8) | + (uint32_t)__b0; + uint32_t hi = ((uint32_t)__b7 << 24) | + ((uint32_t)__b5 << 16) | + ((uint32_t)__b3 << 8) | + (uint32_t)__b1; + + asm("mtc1 %1, %0\n\t" + "mtc1 %2, $f0\n\t" + "punpcklbh %0, %0, $f0\n\t" + : "=f" (ret) + : "r" (lo), "r" (hi) + : "$f0" + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_set_pi16(uint16_t __h3, uint16_t __h2, uint16_t __h1, uint16_t __h0) +{ + __m64 ret; + uint32_t lo = ((uint32_t)__h2 << 16) | (uint32_t)__h0; + uint32_t hi = ((uint32_t)__h3 << 16) | (uint32_t)__h1; + + asm("mtc1 %1, %0\n\t" + "mtc1 %2, $f0\n\t" + "punpcklhw %0, %0, $f0\n\t" + : "=f" (ret) + : "r" (lo), "r" (hi) + : "$f0" + ); + + return ret; +} + +#define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \ + (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_set_pi32(uint32_t __i1, uint32_t __i0) +{ + if (__builtin_constant_p(__i1) && __builtin_constant_p(__i0)) { + uint64_t val = ((uint64_t)__i1 << 32) | + ((uint64_t)__i0 << 0); + + return *(__m64 *)&val; + } else if (__i1 == __i0) { + uint64_t imm = _MM_SHUFFLE(1, 0, 1, 0); + __m64 ret; + + asm("pshufh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (*(__m32 *)&__i1), "f" (*(__m64 *)&imm) + ); + + return ret; + } else { + uint64_t val = ((uint64_t)__i1 << 32) | + ((uint64_t)__i0 << 0); + + return *(__m64 *)&val; + } +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_set1_pi8(uint8_t __b0) +{ + __m64 ret; + + asm("sll $8, %1, 8\n\t" + "or %1, %1, $8\n\t" + "mtc1 %1, %0\n\t" + "mtc1 $0, $f0\n\t" + "pshufh %0, %0, $f0\n\t" + : "=f" (ret) + : "r" (__b0) + : "$8", "$f0" + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_set1_pi16(uint16_t __h0) +{ + __m64 ret; + + asm("mtc1 %1, %0\n\t" + "mtc1 $0, $f0\n\t" + "pshufh %0, %0, $f0\n\t" + : "=f" (ret) + : "r" (__h0) + : "$8", "$f0" + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_set1_pi32(unsigned __i0) +{ + return _mm_set_pi32(__i0, __i0); +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_setr_pi8(uint8_t __h0, uint8_t __h1, uint8_t __h2, uint8_t __h3, + uint8_t __h4, uint8_t __h5, uint8_t __h6, uint8_t __h7) +{ + return _mm_set_pi8(__h7, __h6, __h5, __h4, + __h3, __h2, __h1, __h0); +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_setr_pi16(uint16_t __w0, uint16_t __w1, uint16_t __w2, uint16_t __w3) +{ + return _mm_set_pi16(__w3, __w2, __w1, __w0); +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_setr_pi32(uint32_t __i0, uint32_t __i1) +{ + return _mm_set_pi32(__i1, __i0); +} + + +/********** Arithmetic Operations **********/ + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_add_pi8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("paddb %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_add_pi16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("paddh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_add_pi32(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("paddw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_add_si64(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("paddd %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_adds_pi8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("paddsb %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_adds_pi16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("paddsh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_adds_pu8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("paddusb %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_adds_pu16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("paddush %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_avg_pu8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pavgb %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_avg_pu16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pavgh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_madd_pi16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pmaddhw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_max_pi16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pmaxsh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_max_pu8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pmaxub %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_min_pi16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pminsh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_min_pu8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pminub %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline int FUNCTION_ATTRIBS +_mm_movemask_pi8(__m64 __m1) +{ + int ret; + + asm("pmovmskb %0, %1\n\t" + : "=r" (ret) + : "y" (__m1) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_mulhi_pi16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pmulhh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_mulhi_pu16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pmulhuh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_mullo_pi16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pmullh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_mul_pu32(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pmuluw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_sad_pu8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("psadbh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_asub_pu8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pasubub %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_biadd_pu8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("biadd %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_sub_pi8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("psubb %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_sub_pi16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("psubh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_sub_pi32(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("psubw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_sub_si64(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("psubd %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_subs_pi8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("psubsb %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_subs_pi16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("psubsh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_subs_pu8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("psubusb %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_subs_pu16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("psubush %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + + +/********** Logical Operations **********/ + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_and_si64(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("and %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_andnot_si64(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("andn %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_or_si32(__m32 __m1, __m32 __m2) +{ + __m32 ret; + + asm("or %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_or_si64(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("or %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_xor_si64(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("xor %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + + +/********** Shift Operations **********/ + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_slli_pi16(__m64 __m, int64_t __count) +{ + __m64 ret; + + asm("psllh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_slli_pi32(__m64 __m, int64_t __count) +{ + __m64 ret; + + asm("psllw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_slli_si64(__m64 __m, int64_t __count) +{ + __m64 ret; + + asm("dsll %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_srli_pi16(__m64 __m, int64_t __count) +{ + __m64 ret; + + asm("psrlh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_srli_pi32(__m64 __m, int64_t __count) +{ + __m64 ret; + + asm("psrlw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_srli_si64(__m64 __m, int64_t __count) +{ + __m64 ret; + + asm("dsrl %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_srai_pi16(__m64 __m, int64_t __count) +{ + __m64 ret; + + asm("psrah %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_srai_pi32(__m64 __m, int64_t __count) +{ + __m64 ret; + + asm("psraw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_srai_si64(__m64 __m, int64_t __count) +{ + __m64 ret; + + asm("dsra %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__count) + ); + + return ret; +} + + +/********** Conversion Intrinsics **********/ + +extern __inline __m64 FUNCTION_ATTRIBS +to_m64(uint64_t x) +{ + return *(__m64 *)&x; +} + +extern __inline uint64_t FUNCTION_ATTRIBS +to_uint64(__m64 x) +{ + return *(uint64_t *)&x; +} + + +/********** Comparison Intrinsics **********/ + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_cmpeq_pi8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pcmpeqb %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_cmpeq_pi16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pcmpeqh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_cmpeq_pi32(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pcmpeqw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_cmpgt_pi8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pcmpgtb %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_cmpgt_pi16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pcmpgth %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_cmpgt_pi32(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pcmpgtw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_cmplt_pi8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pcmpltb %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_cmplt_pi16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pcmplth %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_cmplt_pi32(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("pcmpltw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + + +/********** Miscellaneous Operations **********/ + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_packs_pi16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("packsshb %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_packs_pi32(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("packsswh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_packs_pi32_f(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("packsswh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_packs_pu16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("packushb %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_extract_pi16(__m64 __m, int64_t __pos) +{ + __m64 ret; + + asm("pextrh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__pos) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_insert_pi16(__m64 __m1, __m64 __m2, int64_t __pos) +{ + __m64 ret; + + switch (__pos) { + case 0: + + asm("pinsrh_0 %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2), "i" (__pos) + ); + + break; + + case 1: + + asm("pinsrh_1 %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2), "i" (__pos) + ); + + break; + case 2: + + asm("pinsrh_2 %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2), "i" (__pos) + ); + + break; + + case 3: + + asm("pinsrh_3 %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2), "i" (__pos) + ); + + break; + } + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_shuffle_pi16(__m64 __m, int64_t __n) +{ + __m64 ret; + + asm("pshufh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m), "f" (*(__m64 *)&__n) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("punpckhbh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_unpackhi_pi8_f(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("punpckhbh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("punpckhhw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_unpackhi_pi16_f(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("punpckhhw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("punpckhwd %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("punpcklbh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +/* Since punpcklbh cares about the high 32-bits, we use the __m64 datatype, + which preserves the data. */ + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_unpacklo_pi8_f64(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("punpcklbh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +/* Since punpcklbh doesn't care about the high 32-bits, we use the __m32, + datatype, which allows load8888 to use 32-bit loads. */ + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_unpacklo_pi8_f(__m32 __m1, __m64 __m2) +{ + __m64 ret; + + asm("punpcklbh %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("punpcklhw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_unpacklo_pi16_f(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("punpcklhw %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("punpcklwd %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_unpacklo_pi32_f(__m64 __m1, __m64 __m2) +{ + __m64 ret; + + asm("punpcklwd %0, %1, %2\n\t" + : "=f" (ret) + : "f" (__m1), "f" (__m2) + ); + + return ret; +} + +extern __inline void FUNCTION_ATTRIBS +_mm_store_pi32(__m32 *dest, __m64 src) +{ + src = _mm_packs_pu16(src, _mm_setzero_si64()); + + asm("swc1 %1, %0\n\t" + : "=m" (*dest) + : "f" (src) + : "memory" + ); +} + +extern __inline void FUNCTION_ATTRIBS +_mm_store_si64(__m64 *dest, __m64 src) +{ + asm("gssdlc1 %1, 7+%0\n\t" + "gssdrc1 %1, %0\n\t" + : "=m" (*dest) + : "f" (src) + : "memory" + ); +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_load_si32(const __m32 *src) +{ + __m32 ret; + + asm("lwc1 %0, %1\n\t" + : "=f" (ret) + : "m" (*src) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_load_si64(const __m64 *src) +{ + __m64 ret; + + asm("ldc1 %0, %1\n\t" + : "=f" (ret) + : "m" (*src) + ); + + return ret; +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_loadlo_pi8(const uint32_t *src) +{ + return _mm_unpacklo_pi8_f(*(__m32 *)src, _mm_setzero_si64()); +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_loadlo_pi8_f(__m64 src) +{ + return _mm_unpacklo_pi8_f64(src, _mm_setzero_si64()); +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_loadhi_pi8_f(__m64 src) +{ + return _mm_unpackhi_pi8_f(src, _mm_setzero_si64()); +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_loadlo_pi16(__m64 src) +{ + return _mm_unpacklo_pi16(src, _mm_setzero_si64()); +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_loadlo_pi16_f(__m64 src) +{ + return _mm_unpacklo_pi16_f(_mm_setzero_si64(), src); +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_loadhi_pi16(__m64 src) +{ + return _mm_unpackhi_pi16(src, _mm_setzero_si64()); +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_loadhi_pi16_f(__m64 src) +{ + return _mm_unpackhi_pi16_f(_mm_setzero_si64(), src); +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_expand_alpha(__m64 pixel) +{ + return _mm_shuffle_pi16(pixel, _MM_SHUFFLE(3, 3, 3, 3)); +} + +extern __inline __m64 FUNCTION_ATTRIBS +_mm_expand_alpha_rev(__m64 pixel) +{ + return _mm_shuffle_pi16(pixel, _MM_SHUFFLE(0, 0, 0, 0)); +} + +#endif /* __LOONGSON_MMINTRIN_H__ */ diff --git a/simd/mips/jsimd.c b/simd/mips/jsimd.c new file mode 100644 index 0000000..454cc99 --- /dev/null +++ b/simd/mips/jsimd.c @@ -0,0 +1,1123 @@ +/* + * jsimd_mips.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright (C) 2009-2011, 2014, 2016, 2018, D. R. Commander. + * Copyright (C) 2013-2014, MIPS Technologies, Inc., California. + * Copyright (C) 2015-2016, 2018, Matthieu Darbois. + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains the interface between the "normal" portions + * of the library and the SIMD implementations when running on a + * MIPS architecture. + */ + +#define JPEG_INTERNALS +#include "../../jinclude.h" +#include "../../jpeglib.h" +#include "../../jsimd.h" +#include "../../jdct.h" +#include "../../jsimddct.h" +#include "../jsimd.h" + +#include +#include +#include + +static unsigned int simd_support = ~0; + +#if defined(__linux__) + +LOCAL(int) +parse_proc_cpuinfo(const char *search_string) +{ + const char *file_name = "/proc/cpuinfo"; + char cpuinfo_line[256]; + FILE *f = NULL; + + simd_support = 0; + + if ((f = fopen(file_name, "r")) != NULL) { + while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f) != NULL) { + if (strstr(cpuinfo_line, search_string) != NULL) { + fclose(f); + simd_support |= JSIMD_DSPR2; + return 1; + } + } + fclose(f); + } + /* Did not find string in the proc file, or not Linux ELF. */ + return 0; +} + +#endif + +/* + * Check what SIMD accelerations are supported. + * + * FIXME: This code is racy under a multi-threaded environment. + */ +LOCAL(void) +init_simd(void) +{ +#ifndef NO_GETENV + char *env = NULL; +#endif + + if (simd_support != ~0U) + return; + + simd_support = 0; + +#if defined(__MIPSEL__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2) + simd_support |= JSIMD_DSPR2; +#elif defined(__linux__) + /* We still have a chance to use MIPS DSPR2 regardless of globally used + * -mdspr2 options passed to gcc by performing runtime detection via + * /proc/cpuinfo parsing on linux */ + if (!parse_proc_cpuinfo("MIPS 74K")) + return; +#endif + +#ifndef NO_GETENV + /* Force different settings through environment variables */ + env = getenv("JSIMD_FORCEDSPR2"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = JSIMD_DSPR2; + env = getenv("JSIMD_FORCENONE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = 0; +#endif +} + +static const int mips_idct_ifast_coefs[4] = { + 0x45404540, /* FIX( 1.082392200 / 2) = 17734 = 0x4546 */ + 0x5A805A80, /* FIX( 1.414213562 / 2) = 23170 = 0x5A82 */ + 0x76407640, /* FIX( 1.847759065 / 2) = 30274 = 0x7642 */ + 0xAC60AC60 /* FIX(-2.613125930 / 4) = -21407 = 0xAC61 */ +}; + +/* The following struct is borrowed from jdsample.c */ +typedef void (*upsample1_ptr) (j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); +typedef struct { + struct jpeg_upsampler pub; + JSAMPARRAY color_buf[MAX_COMPONENTS]; + upsample1_ptr methods[MAX_COMPONENTS]; + int next_row_out; + JDIMENSION rows_to_go; + int rowgroup_height[MAX_COMPONENTS]; + UINT8 h_expand[MAX_COMPONENTS]; + UINT8 v_expand[MAX_COMPONENTS]; +} my_upsampler; + +typedef my_upsampler *my_upsample_ptr; + +GLOBAL(int) +jsimd_can_rgb_ycc(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_c_can_null_convert(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) +{ + void (*dspr2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + dspr2fct = jsimd_extrgb_ycc_convert_dspr2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + dspr2fct = jsimd_extrgbx_ycc_convert_dspr2; + break; + case JCS_EXT_BGR: + dspr2fct = jsimd_extbgr_ycc_convert_dspr2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + dspr2fct = jsimd_extbgrx_ycc_convert_dspr2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + dspr2fct = jsimd_extxbgr_ycc_convert_dspr2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + dspr2fct = jsimd_extxrgb_ycc_convert_dspr2; + break; + default: + dspr2fct = jsimd_extrgb_ycc_convert_dspr2; + break; + } + + dspr2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) +{ + void (*dspr2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + dspr2fct = jsimd_extrgb_gray_convert_dspr2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + dspr2fct = jsimd_extrgbx_gray_convert_dspr2; + break; + case JCS_EXT_BGR: + dspr2fct = jsimd_extbgr_gray_convert_dspr2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + dspr2fct = jsimd_extbgrx_gray_convert_dspr2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + dspr2fct = jsimd_extxbgr_gray_convert_dspr2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + dspr2fct = jsimd_extxrgb_gray_convert_dspr2; + break; + default: + dspr2fct = jsimd_extrgb_gray_convert_dspr2; + break; + } + + dspr2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) +{ + void (*dspr2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + dspr2fct = jsimd_ycc_extrgb_convert_dspr2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + dspr2fct = jsimd_ycc_extrgbx_convert_dspr2; + break; + case JCS_EXT_BGR: + dspr2fct = jsimd_ycc_extbgr_convert_dspr2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + dspr2fct = jsimd_ycc_extbgrx_convert_dspr2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + dspr2fct = jsimd_ycc_extxbgr_convert_dspr2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + dspr2fct = jsimd_ycc_extxrgb_convert_dspr2; + break; + default: + dspr2fct = jsimd_ycc_extrgb_convert_dspr2; + break; + } + + dspr2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) +{ +} + +GLOBAL(void) +jsimd_c_null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) +{ + jsimd_c_null_convert_dspr2(cinfo->image_width, input_buf, output_buf, + output_row, num_rows, cinfo->num_components); +} + +GLOBAL(int) +jsimd_can_h2v2_downsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v2_smooth_downsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (DCTSIZE != 8) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v2_downsample_dspr2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(void) +jsimd_h2v2_smooth_downsample(j_compress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v2_smooth_downsample_dspr2(input_data, output_data, + compptr->v_samp_factor, + cinfo->max_v_samp_factor, + cinfo->smoothing_factor, + compptr->width_in_blocks, + cinfo->image_width); +} + +GLOBAL(void) +jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + jsimd_h2v1_downsample_dspr2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, compptr->width_in_blocks, + input_data, output_data); +} + +GLOBAL(int) +jsimd_can_h2v2_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_int_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v2_upsample_dspr2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v1_upsample_dspr2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(void) +jsimd_int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample; + + jsimd_int_upsample_dspr2(upsample->h_expand[compptr->component_index], + upsample->v_expand[compptr->component_index], + input_data, output_data_ptr, cinfo->output_width, + cinfo->max_v_samp_factor); +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v2_fancy_upsample_dspr2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + jsimd_h2v1_fancy_upsample_dspr2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) +{ + void (*dspr2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, JSAMPLE *); + + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + dspr2fct = jsimd_h2v2_extrgb_merged_upsample_dspr2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + dspr2fct = jsimd_h2v2_extrgbx_merged_upsample_dspr2; + break; + case JCS_EXT_BGR: + dspr2fct = jsimd_h2v2_extbgr_merged_upsample_dspr2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + dspr2fct = jsimd_h2v2_extbgrx_merged_upsample_dspr2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + dspr2fct = jsimd_h2v2_extxbgr_merged_upsample_dspr2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + dspr2fct = jsimd_h2v2_extxrgb_merged_upsample_dspr2; + break; + default: + dspr2fct = jsimd_h2v2_extrgb_merged_upsample_dspr2; + break; + } + + dspr2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf, + cinfo->sample_range_limit); +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) +{ + void (*dspr2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, JSAMPLE *); + + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + dspr2fct = jsimd_h2v1_extrgb_merged_upsample_dspr2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + dspr2fct = jsimd_h2v1_extrgbx_merged_upsample_dspr2; + break; + case JCS_EXT_BGR: + dspr2fct = jsimd_h2v1_extbgr_merged_upsample_dspr2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + dspr2fct = jsimd_h2v1_extbgrx_merged_upsample_dspr2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + dspr2fct = jsimd_h2v1_extxbgr_merged_upsample_dspr2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + dspr2fct = jsimd_h2v1_extxrgb_merged_upsample_dspr2; + break; + default: + dspr2fct = jsimd_h2v1_extrgb_merged_upsample_dspr2; + break; + } + + dspr2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf, + cinfo->sample_range_limit); +} + +GLOBAL(int) +jsimd_can_convsamp(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + +#ifndef __mips_soft_float + if (simd_support & JSIMD_DSPR2) + return 1; +#endif + + return 0; +} + +GLOBAL(void) +jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ + jsimd_convsamp_dspr2(sample_data, start_col, workspace); +} + +GLOBAL(void) +jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) +{ +#ifndef __mips_soft_float + jsimd_convsamp_float_dspr2(sample_data, start_col, workspace); +#endif +} + +GLOBAL(int) +jsimd_can_fdct_islow(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow(DCTELEM *data) +{ + jsimd_fdct_islow_dspr2(data); +} + +GLOBAL(void) +jsimd_fdct_ifast(DCTELEM *data) +{ + jsimd_fdct_ifast_dspr2(data); +} + +GLOBAL(void) +jsimd_fdct_float(FAST_FLOAT *data) +{ +} + +GLOBAL(int) +jsimd_can_quantize(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + +#ifndef __mips_soft_float + if (simd_support & JSIMD_DSPR2) + return 1; +#endif + + return 0; +} + +GLOBAL(void) +jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) +{ + jsimd_quantize_dspr2(coef_block, divisors, workspace); +} + +GLOBAL(void) +jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) +{ +#ifndef __mips_soft_float + jsimd_quantize_float_dspr2(coef_block, divisors, workspace); +#endif +} + +GLOBAL(int) +jsimd_can_idct_2x2(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_6x6(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_12x12(void) +{ + init_simd(); + + if (BITS_IN_JSAMPLE != 8) + return 0; + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_2x2_dspr2(compptr->dct_table, coef_block, output_buf, output_col); +} + +GLOBAL(void) +jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + int workspace[DCTSIZE * 4]; /* buffers data between passes */ + + jsimd_idct_4x4_dspr2(compptr->dct_table, coef_block, output_buf, output_col, + workspace); +} + +GLOBAL(void) +jsimd_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_6x6_dspr2(compptr->dct_table, coef_block, output_buf, output_col); +} + +GLOBAL(void) +jsimd_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + int workspace[96]; + int output[12] = { + (int)(output_buf[0] + output_col), + (int)(output_buf[1] + output_col), + (int)(output_buf[2] + output_col), + (int)(output_buf[3] + output_col), + (int)(output_buf[4] + output_col), + (int)(output_buf[5] + output_col), + (int)(output_buf[6] + output_col), + (int)(output_buf[7] + output_col), + (int)(output_buf[8] + output_col), + (int)(output_buf[9] + output_col), + (int)(output_buf[10] + output_col), + (int)(output_buf[11] + output_col) + }; + + jsimd_idct_12x12_pass1_dspr2(coef_block, compptr->dct_table, workspace); + jsimd_idct_12x12_pass2_dspr2(workspace, output); +} + +GLOBAL(int) +jsimd_can_idct_islow(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(IFAST_MULT_TYPE) != 2) + return 0; + if (IFAST_SCALE_BITS != 2) + return 0; + + if (simd_support & JSIMD_DSPR2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + int output[8] = { + (int)(output_buf[0] + output_col), + (int)(output_buf[1] + output_col), + (int)(output_buf[2] + output_col), + (int)(output_buf[3] + output_col), + (int)(output_buf[4] + output_col), + (int)(output_buf[5] + output_col), + (int)(output_buf[6] + output_col), + (int)(output_buf[7] + output_col) + }; + + jsimd_idct_islow_dspr2(coef_block, compptr->dct_table, output, + IDCT_range_limit(cinfo)); +} + +GLOBAL(void) +jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + JCOEFPTR inptr; + IFAST_MULT_TYPE *quantptr; + DCTELEM workspace[DCTSIZE2]; /* buffers data between passes */ + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (IFAST_MULT_TYPE *)compptr->dct_table; + + jsimd_idct_ifast_cols_dspr2(inptr, quantptr, workspace, + mips_idct_ifast_coefs); + + /* Pass 2: process rows from work array, store into output array. */ + /* Note that we must descale the results by a factor of 8 == 2**3, */ + /* and also undo the PASS1_BITS scaling. */ + + jsimd_idct_ifast_rows_dspr2(workspace, output_buf, output_col, + mips_idct_ifast_coefs); +} + +GLOBAL(void) +jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ +} + +GLOBAL(int) +jsimd_can_huff_encode_one_block(void) +{ + return 0; +} + +GLOBAL(JOCTET *) +jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + return NULL; +} + +GLOBAL(int) +jsimd_can_encode_mcu_AC_first_prepare(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *values, size_t *zerobits) +{ +} + +GLOBAL(int) +jsimd_can_encode_mcu_AC_refine_prepare(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *absvalues, size_t *bits) +{ + return 0; +} diff --git a/simd/mips/jsimd_dspr2.S b/simd/mips/jsimd_dspr2.S new file mode 100644 index 0000000..a28c116 --- /dev/null +++ b/simd/mips/jsimd_dspr2.S @@ -0,0 +1,4479 @@ +/* + * MIPS DSPr2 optimizations for libjpeg-turbo + * + * Copyright (C) 2013-2014, MIPS Technologies, Inc., California. + * All Rights Reserved. + * Authors: Teodora Novkovic + * Darko Laus + * Copyright (C) 2015, D. R. Commander. All Rights Reserved. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#include "jsimd_dspr2_asm.h" + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_c_null_convert_dspr2) +/* + * a0 = cinfo->image_width + * a1 = input_buf + * a2 = output_buf + * a3 = output_row + * 16(sp) = num_rows + * 20(sp) = cinfo->num_components + * + * Null conversion for compression + */ + SAVE_REGS_ON_STACK 8, s0, s1 + + lw t9, 24(sp) // t9 = num_rows + lw s0, 28(sp) // s0 = cinfo->num_components + andi t0, a0, 3 // t0 = cinfo->image_width & 3 + beqz t0, 4f // no residual + nop +0: + addiu t9, t9, -1 + bltz t9, 7f + li t1, 0 +1: + sll t3, t1, 2 + lwx t5, t3(a2) // t5 = outptr = output_buf[ci] + lw t2, 0(a1) // t2 = inptr = *input_buf + sll t4, a3, 2 + lwx t5, t4(t5) // t5 = outptr = output_buf[ci][output_row] + addu t2, t2, t1 + addu s1, t5, a0 + addu t6, t5, t0 +2: + lbu t3, 0(t2) + addiu t5, t5, 1 + sb t3, -1(t5) + bne t6, t5, 2b + addu t2, t2, s0 +3: + lbu t3, 0(t2) + addu t4, t2, s0 + addu t7, t4, s0 + addu t8, t7, s0 + addu t2, t8, s0 + lbu t4, 0(t4) + lbu t7, 0(t7) + lbu t8, 0(t8) + addiu t5, t5, 4 + sb t3, -4(t5) + sb t4, -3(t5) + sb t7, -2(t5) + bne s1, t5, 3b + sb t8, -1(t5) + addiu t1, t1, 1 + bne t1, s0, 1b + nop + addiu a1, a1, 4 + bgez t9, 0b + addiu a3, a3, 1 + b 7f + nop +4: + addiu t9, t9, -1 + bltz t9, 7f + li t1, 0 +5: + sll t3, t1, 2 + lwx t5, t3(a2) // t5 = outptr = output_buf[ci] + lw t2, 0(a1) // t2 = inptr = *input_buf + sll t4, a3, 2 + lwx t5, t4(t5) // t5 = outptr = output_buf[ci][output_row] + addu t2, t2, t1 + addu s1, t5, a0 + addu t6, t5, t0 +6: + lbu t3, 0(t2) + addu t4, t2, s0 + addu t7, t4, s0 + addu t8, t7, s0 + addu t2, t8, s0 + lbu t4, 0(t4) + lbu t7, 0(t7) + lbu t8, 0(t8) + addiu t5, t5, 4 + sb t3, -4(t5) + sb t4, -3(t5) + sb t7, -2(t5) + bne s1, t5, 6b + sb t8, -1(t5) + addiu t1, t1, 1 + bne t1, s0, 5b + nop + addiu a1, a1, 4 + bgez t9, 4b + addiu a3, a3, 1 +7: + RESTORE_REGS_FROM_STACK 8, s0, s1 + + j ra + nop + +END(jsimd_c_null_convert_dspr2) + + +/*****************************************************************************/ +/* + * jsimd_extrgb_ycc_convert_dspr2 + * jsimd_extbgr_ycc_convert_dspr2 + * jsimd_extrgbx_ycc_convert_dspr2 + * jsimd_extbgrx_ycc_convert_dspr2 + * jsimd_extxbgr_ycc_convert_dspr2 + * jsimd_extxrgb_ycc_convert_dspr2 + * + * Colorspace conversion RGB -> YCbCr + */ + +.macro GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 colorid, pixel_size, \ + r_offs, g_offs, b_offs + +.macro DO_RGB_TO_YCC r, g, b, inptr + lbu \r, \r_offs(\inptr) + lbu \g, \g_offs(\inptr) + lbu \b, \b_offs(\inptr) + addiu \inptr, \pixel_size +.endm + +LEAF_DSPR2(jsimd_\colorid\()_ycc_convert_dspr2) +/* + * a0 = cinfo->image_width + * a1 = input_buf + * a2 = output_buf + * a3 = output_row + * 16(sp) = num_rows + */ + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + lw t7, 48(sp) // t7 = num_rows + li s0, 0x4c8b // FIX(0.29900) + li s1, 0x9646 // FIX(0.58700) + li s2, 0x1d2f // FIX(0.11400) + li s3, 0xffffd4cd // -FIX(0.16874) + li s4, 0xffffab33 // -FIX(0.33126) + li s5, 0x8000 // FIX(0.50000) + li s6, 0xffff94d1 // -FIX(0.41869) + li s7, 0xffffeb2f // -FIX(0.08131) + li t8, 0x807fff // CBCR_OFFSET + ONE_HALF-1 + +0: + addiu t7, -1 // --num_rows + lw t6, 0(a1) // t6 = input_buf[0] + lw t0, 0(a2) + lw t1, 4(a2) + lw t2, 8(a2) + sll t3, a3, 2 + lwx t0, t3(t0) // t0 = output_buf[0][output_row] + lwx t1, t3(t1) // t1 = output_buf[1][output_row] + lwx t2, t3(t2) // t2 = output_buf[2][output_row] + + addu t9, t2, a0 // t9 = end address + addiu a3, 1 + +1: + DO_RGB_TO_YCC t3, t4, t5, t6 + + mtlo s5, $ac0 + mtlo t8, $ac1 + mtlo t8, $ac2 + maddu $ac0, s2, t5 + maddu $ac1, s5, t5 + maddu $ac2, s5, t3 + maddu $ac0, s0, t3 + maddu $ac1, s3, t3 + maddu $ac2, s6, t4 + maddu $ac0, s1, t4 + maddu $ac1, s4, t4 + maddu $ac2, s7, t5 + extr.w t3, $ac0, 16 + extr.w t4, $ac1, 16 + extr.w t5, $ac2, 16 + sb t3, 0(t0) + sb t4, 0(t1) + sb t5, 0(t2) + addiu t0, 1 + addiu t2, 1 + bne t2, t9, 1b + addiu t1, 1 + bgtz t7, 0b + addiu a1, 4 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop +END(jsimd_\colorid\()_ycc_convert_dspr2) + +.purgem DO_RGB_TO_YCC + +.endm + +/*-------------------------------------id -- pix R G B */ +GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extrgb, 3, 0, 1, 2 +GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extbgr, 3, 2, 1, 0 +GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extrgbx, 4, 0, 1, 2 +GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extbgrx, 4, 2, 1, 0 +GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extxbgr, 4, 3, 2, 1 +GENERATE_JSIMD_RGB_YCC_CONVERT_DSPR2 extxrgb, 4, 1, 2, 3 + + +/*****************************************************************************/ +/* + * jsimd_ycc_extrgb_convert_dspr2 + * jsimd_ycc_extbgr_convert_dspr2 + * jsimd_ycc_extrgbx_convert_dspr2 + * jsimd_ycc_extbgrx_convert_dspr2 + * jsimd_ycc_extxbgr_convert_dspr2 + * jsimd_ycc_extxrgb_convert_dspr2 + * + * Colorspace conversion YCbCr -> RGB + */ + +.macro GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 colorid, pixel_size, \ + r_offs, g_offs, b_offs, a_offs + +.macro STORE_YCC_TO_RGB scratch0 scratch1 scratch2 outptr + sb \scratch0, \r_offs(\outptr) + sb \scratch1, \g_offs(\outptr) + sb \scratch2, \b_offs(\outptr) +.if (\pixel_size == 4) + li t0, 0xFF + sb t0, \a_offs(\outptr) +.endif + addiu \outptr, \pixel_size +.endm + +LEAF_DSPR2(jsimd_ycc_\colorid\()_convert_dspr2) +/* + * a0 = cinfo->image_width + * a1 = input_buf + * a2 = input_row + * a3 = output_buf + * 16(sp) = num_rows + */ + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + lw s1, 48(sp) + li t3, 0x8000 + li t4, 0x166e9 // FIX(1.40200) + li t5, 0x1c5a2 // FIX(1.77200) + li t6, 0xffff492e // -FIX(0.71414) + li t7, 0xffffa7e6 // -FIX(0.34414) + repl.ph t8, 128 + +0: + lw s0, 0(a3) + lw t0, 0(a1) + lw t1, 4(a1) + lw t2, 8(a1) + sll s5, a2, 2 + addiu s1, -1 + lwx s2, s5(t0) + lwx s3, s5(t1) + lwx s4, s5(t2) + addu t9, s2, a0 + addiu a2, 1 + +1: + lbu s7, 0(s4) // cr + lbu s6, 0(s3) // cb + lbu s5, 0(s2) // y + addiu s2, 1 + addiu s4, 1 + addiu s7, -128 + addiu s6, -128 + mul t2, t7, s6 + mul t0, t6, s7 // Crgtab[cr] + sll s7, 15 + mulq_rs.w t1, t4, s7 // Crrtab[cr] + sll s6, 15 + addu t2, t3 // Cbgtab[cb] + addu t2, t0 + + mulq_rs.w t0, t5, s6 // Cbbtab[cb] + sra t2, 16 + addu t1, s5 + addu t2, s5 // add y + ins t2, t1, 16, 16 + subu.ph t2, t2, t8 + addu t0, s5 + shll_s.ph t2, t2, 8 + subu t0, 128 + shra.ph t2, t2, 8 + shll_s.w t0, t0, 24 + addu.ph t2, t2, t8 // clip & store + sra t0, t0, 24 + sra t1, t2, 16 + addiu t0, 128 + + STORE_YCC_TO_RGB t1, t2, t0, s0 + + bne s2, t9, 1b + addiu s3, 1 + bgtz s1, 0b + addiu a3, 4 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop +END(jsimd_ycc_\colorid\()_convert_dspr2) + +.purgem STORE_YCC_TO_RGB + +.endm + +/*-------------------------------------id -- pix R G B A */ +GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extrgb, 3, 0, 1, 2, 3 +GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extbgr, 3, 2, 1, 0, 3 +GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extrgbx, 4, 0, 1, 2, 3 +GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extbgrx, 4, 2, 1, 0, 3 +GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extxbgr, 4, 3, 2, 1, 0 +GENERATE_JSIMD_YCC_RGB_CONVERT_DSPR2 extxrgb, 4, 1, 2, 3, 0 + + +/*****************************************************************************/ +/* + * jsimd_extrgb_gray_convert_dspr2 + * jsimd_extbgr_gray_convert_dspr2 + * jsimd_extrgbx_gray_convert_dspr2 + * jsimd_extbgrx_gray_convert_dspr2 + * jsimd_extxbgr_gray_convert_dspr2 + * jsimd_extxrgb_gray_convert_dspr2 + * + * Colorspace conversion RGB -> GRAY + */ + +.macro GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 colorid, pixel_size, \ + r_offs, g_offs, b_offs + +.macro DO_RGB_TO_GRAY r, g, b, inptr + lbu \r, \r_offs(\inptr) + lbu \g, \g_offs(\inptr) + lbu \b, \b_offs(\inptr) + addiu \inptr, \pixel_size +.endm + +LEAF_DSPR2(jsimd_\colorid\()_gray_convert_dspr2) +/* + * a0 = cinfo->image_width + * a1 = input_buf + * a2 = output_buf + * a3 = output_row + * 16(sp) = num_rows + */ + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + li s0, 0x4c8b // s0 = FIX(0.29900) + li s1, 0x9646 // s1 = FIX(0.58700) + li s2, 0x1d2f // s2 = FIX(0.11400) + li s7, 0x8000 // s7 = FIX(0.50000) + lw s6, 48(sp) + andi t7, a0, 3 + +0: + addiu s6, -1 // s6 = num_rows + lw t0, 0(a1) + lw t1, 0(a2) + sll t3, a3, 2 + lwx t1, t3(t1) + addiu a3, 1 + addu t9, t1, a0 + subu t8, t9, t7 + beq t1, t8, 2f + nop + +1: + DO_RGB_TO_GRAY t3, t4, t5, t0 + DO_RGB_TO_GRAY s3, s4, s5, t0 + + mtlo s7, $ac0 + maddu $ac0, s2, t5 + maddu $ac0, s1, t4 + maddu $ac0, s0, t3 + mtlo s7, $ac1 + maddu $ac1, s2, s5 + maddu $ac1, s1, s4 + maddu $ac1, s0, s3 + extr.w t6, $ac0, 16 + + DO_RGB_TO_GRAY t3, t4, t5, t0 + DO_RGB_TO_GRAY s3, s4, s5, t0 + + mtlo s7, $ac0 + maddu $ac0, s2, t5 + maddu $ac0, s1, t4 + extr.w t2, $ac1, 16 + maddu $ac0, s0, t3 + mtlo s7, $ac1 + maddu $ac1, s2, s5 + maddu $ac1, s1, s4 + maddu $ac1, s0, s3 + extr.w t5, $ac0, 16 + sb t6, 0(t1) + sb t2, 1(t1) + extr.w t3, $ac1, 16 + addiu t1, 4 + sb t5, -2(t1) + sb t3, -1(t1) + bne t1, t8, 1b + nop + +2: + beqz t7, 4f + nop + +3: + DO_RGB_TO_GRAY t3, t4, t5, t0 + + mtlo s7, $ac0 + maddu $ac0, s2, t5 + maddu $ac0, s1, t4 + maddu $ac0, s0, t3 + extr.w t6, $ac0, 16 + sb t6, 0(t1) + addiu t1, 1 + bne t1, t9, 3b + nop + +4: + bgtz s6, 0b + addiu a1, 4 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop +END(jsimd_\colorid\()_gray_convert_dspr2) + +.purgem DO_RGB_TO_GRAY + +.endm + +/*-------------------------------------id -- pix R G B */ +GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extrgb, 3, 0, 1, 2 +GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extbgr, 3, 2, 1, 0 +GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extrgbx, 4, 0, 1, 2 +GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extbgrx, 4, 2, 1, 0 +GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extxbgr, 4, 3, 2, 1 +GENERATE_JSIMD_RGB_GRAY_CONVERT_DSPR2 extxrgb, 4, 1, 2, 3 + + +/*****************************************************************************/ +/* + * jsimd_h2v2_merged_upsample_dspr2 + * jsimd_h2v2_extrgb_merged_upsample_dspr2 + * jsimd_h2v2_extrgbx_merged_upsample_dspr2 + * jsimd_h2v2_extbgr_merged_upsample_dspr2 + * jsimd_h2v2_extbgrx_merged_upsample_dspr2 + * jsimd_h2v2_extxbgr_merged_upsample_dspr2 + * jsimd_h2v2_extxrgb_merged_upsample_dspr2 + * + * Merged h2v2 upsample routines + */ +.macro GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 colorid, pixel_size, \ + r1_offs, g1_offs, \ + b1_offs, a1_offs, \ + r2_offs, g2_offs, \ + b2_offs, a2_offs + +.macro STORE_H2V2_2_PIXELS scratch0 scratch1 scratch2 scratch3 scratch4 \ + scratch5 outptr + sb \scratch0, \r1_offs(\outptr) + sb \scratch1, \g1_offs(\outptr) + sb \scratch2, \b1_offs(\outptr) + sb \scratch3, \r2_offs(\outptr) + sb \scratch4, \g2_offs(\outptr) + sb \scratch5, \b2_offs(\outptr) +.if (\pixel_size == 8) + li \scratch0, 0xFF + sb \scratch0, \a1_offs(\outptr) + sb \scratch0, \a2_offs(\outptr) +.endif + addiu \outptr, \pixel_size +.endm + +.macro STORE_H2V2_1_PIXEL scratch0 scratch1 scratch2 outptr + sb \scratch0, \r1_offs(\outptr) + sb \scratch1, \g1_offs(\outptr) + sb \scratch2, \b1_offs(\outptr) + +.if (\pixel_size == 8) + li t0, 0xFF + sb t0, \a1_offs(\outptr) +.endif +.endm + +LEAF_DSPR2(jsimd_h2v2_\colorid\()_merged_upsample_dspr2) +/* + * a0 = cinfo->output_width + * a1 = input_buf + * a2 = in_row_group_ctr + * a3 = output_buf + * 16(sp) = cinfo->sample_range_limit + */ + SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra + + lw t9, 56(sp) // cinfo->sample_range_limit + lw v0, 0(a1) + lw v1, 4(a1) + lw t0, 8(a1) + sll t1, a2, 3 + addiu t2, t1, 4 + sll t3, a2, 2 + lw t4, 0(a3) // t4 = output_buf[0] + lwx t1, t1(v0) // t1 = input_buf[0][in_row_group_ctr*2] + lwx t2, t2(v0) // t2 = input_buf[0][in_row_group_ctr*2 + 1] + lwx t5, t3(v1) // t5 = input_buf[1][in_row_group_ctr] + lwx t6, t3(t0) // t6 = input_buf[2][in_row_group_ctr] + lw t7, 4(a3) // t7 = output_buf[1] + li s1, 0xe6ea + addiu t8, s1, 0x7fff // t8 = 0x166e9 [FIX(1.40200)] + addiu s0, t8, 0x5eb9 // s0 = 0x1c5a2 [FIX(1.77200)] + addiu s1, zero, 0xa7e6 // s4 = 0xffffa7e6 [-FIX(0.34414)] + xori s2, s1, 0xeec8 // s3 = 0xffff492e [-FIX(0.71414)] + srl t3, a0, 1 + blez t3, 2f + addu t0, t5, t3 // t0 = end address + 1: + lbu t3, 0(t5) + lbu s3, 0(t6) + addiu t5, t5, 1 + addiu t3, t3, -128 // (cb - 128) + addiu s3, s3, -128 // (cr - 128) + mult $ac1, s1, t3 + madd $ac1, s2, s3 + sll s3, s3, 15 + sll t3, t3, 15 + mulq_rs.w s4, t8, s3 // s4 = (C1 * cr + ONE_HALF)>> SCALEBITS + extr_r.w s5, $ac1, 16 + mulq_rs.w s6, s0, t3 // s6 = (C2 * cb + ONE_HALF)>> SCALEBITS + lbu v0, 0(t1) + addiu t6, t6, 1 + addiu t1, t1, 2 + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu AT, 0(t3) + lbu s7, 0(s3) + lbu ra, 0(v1) + lbu v0, -1(t1) + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu t3, 0(t3) + lbu s3, 0(s3) + lbu v1, 0(v1) + lbu v0, 0(t2) + + STORE_H2V2_2_PIXELS AT, s7, ra, t3, s3, v1, t4 + + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu AT, 0(t3) + lbu s7, 0(s3) + lbu ra, 0(v1) + lbu v0, 1(t2) + addiu t2, t2, 2 + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu t3, 0(t3) + lbu s3, 0(s3) + lbu v1, 0(v1) + + STORE_H2V2_2_PIXELS AT, s7, ra, t3, s3, v1, t7 + + bne t0, t5, 1b + nop +2: + andi t0, a0, 1 + beqz t0, 4f + lbu t3, 0(t5) + lbu s3, 0(t6) + addiu t3, t3, -128 // (cb - 128) + addiu s3, s3, -128 // (cr - 128) + mult $ac1, s1, t3 + madd $ac1, s2, s3 + sll s3, s3, 15 + sll t3, t3, 15 + lbu v0, 0(t1) + extr_r.w s5, $ac1, 16 + mulq_rs.w s4, t8, s3 // s4 = (C1 * cr + ONE_HALF)>> SCALEBITS + mulq_rs.w s6, s0, t3 // s6 = (C2 * cb + ONE_HALF)>> SCALEBITS + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu t3, 0(t3) + lbu s3, 0(s3) + lbu v1, 0(v1) + lbu v0, 0(t2) + + STORE_H2V2_1_PIXEL t3, s3, v1, t4 + + addu t3, v0, s4 // y+cred + addu s3, v0, s5 // y+cgreen + addu v1, v0, s6 // y+cblue + addu t3, t9, t3 // y+cred + addu s3, t9, s3 // y+cgreen + addu v1, t9, v1 // y+cblue + lbu t3, 0(t3) + lbu s3, 0(s3) + lbu v1, 0(v1) + + STORE_H2V2_1_PIXEL t3, s3, v1, t7 +4: + RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra + + j ra + nop + +END(jsimd_h2v2_\colorid\()_merged_upsample_dspr2) + +.purgem STORE_H2V2_1_PIXEL +.purgem STORE_H2V2_2_PIXELS +.endm + +/*------------------------------------id -- pix R1 G1 B1 A1 R2 G2 B2 A2 */ +GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extrgb, 6, 0, 1, 2, 6, 3, 4, 5, 6 +GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extbgr, 6, 2, 1, 0, 3, 5, 4, 3, 6 +GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extrgbx, 8, 0, 1, 2, 3, 4, 5, 6, 7 +GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extbgrx, 8, 2, 1, 0, 3, 6, 5, 4, 7 +GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extxbgr, 8, 3, 2, 1, 0, 7, 6, 5, 4 +GENERATE_H2V2_MERGED_UPSAMPLE_DSPR2 extxrgb, 8, 1, 2, 3, 0, 5, 6, 7, 4 + + +/*****************************************************************************/ +/* + * jsimd_h2v1_merged_upsample_dspr2 + * jsimd_h2v1_extrgb_merged_upsample_dspr2 + * jsimd_h2v1_extrgbx_merged_upsample_dspr2 + * jsimd_h2v1_extbgr_merged_upsample_dspr2 + * jsimd_h2v1_extbgrx_merged_upsample_dspr2 + * jsimd_h2v1_extxbgr_merged_upsample_dspr2 + * jsimd_h2v1_extxrgb_merged_upsample_dspr2 + * + * Merged h2v1 upsample routines + */ + +.macro GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 colorid, pixel_size, \ + r1_offs, g1_offs, \ + b1_offs, a1_offs, \ + r2_offs, g2_offs, \ + b2_offs, a2_offs + +.macro STORE_H2V1_2_PIXELS scratch0 scratch1 scratch2 scratch3 scratch4 \ + scratch5 outptr + sb \scratch0, \r1_offs(\outptr) + sb \scratch1, \g1_offs(\outptr) + sb \scratch2, \b1_offs(\outptr) + sb \scratch3, \r2_offs(\outptr) + sb \scratch4, \g2_offs(\outptr) + sb \scratch5, \b2_offs(\outptr) +.if (\pixel_size == 8) + li t0, 0xFF + sb t0, \a1_offs(\outptr) + sb t0, \a2_offs(\outptr) +.endif + addiu \outptr, \pixel_size +.endm + +.macro STORE_H2V1_1_PIXEL scratch0 scratch1 scratch2 outptr + sb \scratch0, \r1_offs(\outptr) + sb \scratch1, \g1_offs(\outptr) + sb \scratch2, \b1_offs(\outptr) +.if (\pixel_size == 8) + li t0, 0xFF + sb t0, \a1_offs(\outptr) +.endif +.endm + +LEAF_DSPR2(jsimd_h2v1_\colorid\()_merged_upsample_dspr2) +/* + * a0 = cinfo->output_width + * a1 = input_buf + * a2 = in_row_group_ctr + * a3 = output_buf + * 16(sp) = range_limit + */ + SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra + + li t0, 0xe6ea + lw t1, 0(a1) // t1 = input_buf[0] + lw t2, 4(a1) // t2 = input_buf[1] + lw t3, 8(a1) // t3 = input_buf[2] + lw t8, 56(sp) // t8 = range_limit + addiu s1, t0, 0x7fff // s1 = 0x166e9 [FIX(1.40200)] + addiu s2, s1, 0x5eb9 // s2 = 0x1c5a2 [FIX(1.77200)] + addiu s0, t0, 0x9916 // s0 = 0x8000 + addiu s4, zero, 0xa7e6 // s4 = 0xffffa7e6 [-FIX(0.34414)] + xori s3, s4, 0xeec8 // s3 = 0xffff492e [-FIX(0.71414)] + srl t0, a0, 1 + sll t4, a2, 2 + lwx s5, t4(t1) // s5 = inptr0 + lwx s6, t4(t2) // s6 = inptr1 + lwx s7, t4(t3) // s7 = inptr2 + lw t7, 0(a3) // t7 = outptr + blez t0, 2f + addu t9, s6, t0 // t9 = end address +1: + lbu t2, 0(s6) // t2 = cb + lbu t0, 0(s7) // t0 = cr + lbu t1, 0(s5) // t1 = y + addiu t2, t2, -128 // t2 = cb - 128 + addiu t0, t0, -128 // t0 = cr - 128 + mult $ac1, s4, t2 + madd $ac1, s3, t0 + sll t0, t0, 15 + sll t2, t2, 15 + mulq_rs.w t0, s1, t0 // t0 = (C1*cr + ONE_HALF)>> SCALEBITS + extr_r.w t5, $ac1, 16 + mulq_rs.w t6, s2, t2 // t6 = (C2*cb + ONE_HALF)>> SCALEBITS + addiu s7, s7, 1 + addiu s6, s6, 1 + addu t2, t1, t0 // t2 = y + cred + addu t3, t1, t5 // t3 = y + cgreen + addu t4, t1, t6 // t4 = y + cblue + addu t2, t8, t2 + addu t3, t8, t3 + addu t4, t8, t4 + lbu t1, 1(s5) + lbu v0, 0(t2) + lbu v1, 0(t3) + lbu ra, 0(t4) + addu t2, t1, t0 + addu t3, t1, t5 + addu t4, t1, t6 + addu t2, t8, t2 + addu t3, t8, t3 + addu t4, t8, t4 + lbu t2, 0(t2) + lbu t3, 0(t3) + lbu t4, 0(t4) + + STORE_H2V1_2_PIXELS v0, v1, ra, t2, t3, t4, t7 + + bne t9, s6, 1b + addiu s5, s5, 2 +2: + andi t0, a0, 1 + beqz t0, 4f + nop +3: + lbu t2, 0(s6) + lbu t0, 0(s7) + lbu t1, 0(s5) + addiu t2, t2, -128 // (cb - 128) + addiu t0, t0, -128 // (cr - 128) + mul t3, s4, t2 + mul t4, s3, t0 + sll t0, t0, 15 + sll t2, t2, 15 + mulq_rs.w t0, s1, t0 // (C1*cr + ONE_HALF)>> SCALEBITS + mulq_rs.w t6, s2, t2 // (C2*cb + ONE_HALF)>> SCALEBITS + addu t3, t3, s0 + addu t3, t4, t3 + sra t5, t3, 16 // (C4*cb + ONE_HALF + C3*cr)>> SCALEBITS + addu t2, t1, t0 // y + cred + addu t3, t1, t5 // y + cgreen + addu t4, t1, t6 // y + cblue + addu t2, t8, t2 + addu t3, t8, t3 + addu t4, t8, t4 + lbu t2, 0(t2) + lbu t3, 0(t3) + lbu t4, 0(t4) + + STORE_H2V1_1_PIXEL t2, t3, t4, t7 +4: + RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra + + j ra + nop + +END(jsimd_h2v1_\colorid\()_merged_upsample_dspr2) + +.purgem STORE_H2V1_1_PIXEL +.purgem STORE_H2V1_2_PIXELS +.endm + +/*------------------------------------id -- pix R1 G1 B1 A1 R2 G2 B2 A2 */ +GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extrgb, 6, 0, 1, 2, 6, 3, 4, 5, 6 +GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extbgr, 6, 2, 1, 0, 3, 5, 4, 3, 6 +GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extrgbx, 8, 0, 1, 2, 3, 4, 5, 6, 7 +GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extbgrx, 8, 2, 1, 0, 3, 6, 5, 4, 7 +GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extxbgr, 8, 3, 2, 1, 0, 7, 6, 5, 4 +GENERATE_H2V1_MERGED_UPSAMPLE_DSPR2 extxrgb, 8, 1, 2, 3, 0, 5, 6, 7, 4 + + +/*****************************************************************************/ +/* + * jsimd_h2v2_fancy_upsample_dspr2 + * + * Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. + */ +LEAF_DSPR2(jsimd_h2v2_fancy_upsample_dspr2) +/* + * a0 = cinfo->max_v_samp_factor + * a1 = downsampled_width + * a2 = input_data + * a3 = output_data_ptr + */ + SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, s5 + + li s4, 0 + lw s2, 0(a3) // s2 = *output_data_ptr +0: + li t9, 2 + lw s1, -4(a2) // s1 = inptr1 + +1: + lw s0, 0(a2) // s0 = inptr0 + lwx s3, s4(s2) + addiu s5, a1, -2 // s5 = downsampled_width - 2 + srl t4, s5, 1 + sll t4, t4, 1 + lbu t0, 0(s0) + lbu t1, 1(s0) + lbu t2, 0(s1) + lbu t3, 1(s1) + addiu s0, 2 + addiu s1, 2 + addu t8, s0, t4 // t8 = end address + andi s5, s5, 1 // s5 = residual + sll t4, t0, 1 + sll t6, t1, 1 + addu t0, t0, t4 // t0 = (*inptr0++) * 3 + addu t1, t1, t6 // t1 = (*inptr0++) * 3 + addu t7, t0, t2 // t7 = thiscolsum + addu t6, t1, t3 // t5 = nextcolsum + sll t0, t7, 2 // t0 = thiscolsum * 4 + subu t1, t0, t7 // t1 = thiscolsum * 3 + shra_r.w t0, t0, 4 + addiu t1, 7 + addu t1, t1, t6 + srl t1, t1, 4 + sb t0, 0(s3) + sb t1, 1(s3) + beq t8, s0, 22f // skip to final iteration if width == 3 + addiu s3, 2 +2: + lh t0, 0(s0) // t0 = A3|A2 + lh t2, 0(s1) // t2 = B3|B2 + addiu s0, 2 + addiu s1, 2 + preceu.ph.qbr t0, t0 // t0 = 0|A3|0|A2 + preceu.ph.qbr t2, t2 // t2 = 0|B3|0|B2 + shll.ph t1, t0, 1 + sll t3, t6, 1 + addu.ph t0, t1, t0 // t0 = A3*3|A2*3 + addu t3, t3, t6 // t3 = this * 3 + addu.ph t0, t0, t2 // t0 = next2|next1 + addu t1, t3, t7 + andi t7, t0, 0xFFFF // t7 = next1 + sll t2, t7, 1 + addu t2, t7, t2 // t2 = next1*3 + addu t4, t2, t6 + srl t6, t0, 16 // t6 = next2 + shra_r.w t1, t1, 4 // t1 = (this*3 + last + 8) >> 4 + addu t0, t3, t7 + addiu t0, 7 + srl t0, t0, 4 // t0 = (this*3 + next1 + 7) >> 4 + shra_r.w t4, t4, 4 // t3 = (next1*3 + this + 8) >> 4 + addu t2, t2, t6 + addiu t2, 7 + srl t2, t2, 4 // t2 = (next1*3 + next2 + 7) >> 4 + sb t1, 0(s3) + sb t0, 1(s3) + sb t4, 2(s3) + sb t2, 3(s3) + bne t8, s0, 2b + addiu s3, 4 +22: + beqz s5, 4f + addu t8, s0, s5 +3: + lbu t0, 0(s0) + lbu t2, 0(s1) + addiu s0, 1 + addiu s1, 1 + sll t3, t6, 1 + sll t1, t0, 1 + addu t1, t0, t1 // t1 = inptr0 * 3 + addu t3, t3, t6 // t3 = thiscolsum * 3 + addu t5, t1, t2 + addu t1, t3, t7 + shra_r.w t1, t1, 4 + addu t0, t3, t5 + addiu t0, 7 + srl t0, t0, 4 + sb t1, 0(s3) + sb t0, 1(s3) + addiu s3, 2 + move t7, t6 + bne t8, s0, 3b + move t6, t5 +4: + sll t0, t6, 2 // t0 = thiscolsum * 4 + subu t1, t0, t6 // t1 = thiscolsum * 3 + addu t1, t1, t7 + addiu s4, 4 + shra_r.w t1, t1, 4 + addiu t0, 7 + srl t0, t0, 4 + sb t1, 0(s3) + sb t0, 1(s3) + addiu t9, -1 + addiu s3, 2 + bnez t9, 1b + lw s1, 4(a2) + srl t0, s4, 2 + subu t0, a0, t0 + bgtz t0, 0b + addiu a2, 4 + + RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, s5 + + j ra + nop +END(jsimd_h2v2_fancy_upsample_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_h2v1_fancy_upsample_dspr2) +/* + * a0 = cinfo->max_v_samp_factor + * a1 = downsampled_width + * a2 = input_data + * a3 = output_data_ptr + */ + SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 + + .set at + + beqz a0, 3f + sll t0, a0, 2 + lw s1, 0(a3) + li s3, 0x10001 + addu s0, s1, t0 +0: + addiu t8, a1, -2 + srl t9, t8, 2 + lw t7, 0(a2) + lw s2, 0(s1) + lbu t0, 0(t7) + lbu t1, 1(t7) // t1 = inptr[1] + sll t2, t0, 1 + addu t2, t2, t0 // t2 = invalue*3 + addu t2, t2, t1 + shra_r.w t2, t2, 2 + sb t0, 0(s2) + sb t2, 1(s2) + beqz t9, 11f + addiu s2, 2 +1: + ulw t0, 0(t7) // t0 = |P3|P2|P1|P0| + ulw t1, 1(t7) + ulh t2, 4(t7) // t2 = |0|0|P5|P4| + preceu.ph.qbl t3, t0 // t3 = |0|P3|0|P2| + preceu.ph.qbr t0, t0 // t0 = |0|P1|0|P0| + preceu.ph.qbr t2, t2 // t2 = |0|P5|0|P4| + preceu.ph.qbl t4, t1 // t4 = |0|P4|0|P3| + preceu.ph.qbr t1, t1 // t1 = |0|P2|0|P1| + shll.ph t5, t4, 1 + shll.ph t6, t1, 1 + addu.ph t5, t5, t4 // t5 = |P4*3|P3*3| + addu.ph t6, t6, t1 // t6 = |P2*3|P1*3| + addu.ph t4, t3, s3 + addu.ph t0, t0, s3 + addu.ph t4, t4, t5 + addu.ph t0, t0, t6 + shrl.ph t4, t4, 2 // t4 = |0|P3|0|P2| + shrl.ph t0, t0, 2 // t0 = |0|P1|0|P0| + addu.ph t2, t2, t5 + addu.ph t3, t3, t6 + shra_r.ph t2, t2, 2 // t2 = |0|P5|0|P4| + shra_r.ph t3, t3, 2 // t3 = |0|P3|0|P2| + shll.ph t2, t2, 8 + shll.ph t3, t3, 8 + or t2, t4, t2 + or t3, t3, t0 + addiu t9, -1 + usw t3, 0(s2) + usw t2, 4(s2) + addiu s2, 8 + bgtz t9, 1b + addiu t7, 4 +11: + andi t8, 3 + beqz t8, 22f + addiu t7, 1 + +2: + lbu t0, 0(t7) + addiu t7, 1 + sll t1, t0, 1 + addu t2, t0, t1 // t2 = invalue + lbu t3, -2(t7) + lbu t4, 0(t7) + addiu t3, 1 + addiu t4, 2 + addu t3, t3, t2 + addu t4, t4, t2 + srl t3, 2 + srl t4, 2 + sb t3, 0(s2) + sb t4, 1(s2) + addiu t8, -1 + bgtz t8, 2b + addiu s2, 2 + +22: + lbu t0, 0(t7) + lbu t2, -1(t7) + sll t1, t0, 1 + addu t1, t1, t0 // t1 = invalue * 3 + addu t1, t1, t2 + addiu t1, 1 + srl t1, t1, 2 + sb t1, 0(s2) + sb t0, 1(s2) + addiu s1, 4 + bne s1, s0, 0b + addiu a2, 4 +3: + RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 + + j ra + nop +END(jsimd_h2v1_fancy_upsample_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_h2v1_downsample_dspr2) +/* + * a0 = cinfo->image_width + * a1 = cinfo->max_v_samp_factor + * a2 = compptr->v_samp_factor + * a3 = compptr->width_in_blocks + * 16(sp) = input_data + * 20(sp) = output_data + */ + .set at + + SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4 + + beqz a2, 7f + lw s1, 44(sp) // s1 = output_data + lw s0, 40(sp) // s0 = input_data + srl s2, a0, 2 + andi t9, a0, 2 + srl t7, t9, 1 + addu s2, t7, s2 + sll t0, a3, 3 // t0 = width_in_blocks*DCT + srl t7, t0, 1 + subu s2, t7, s2 +0: + andi t6, a0, 1 // t6 = temp_index + addiu t6, -1 + lw t4, 0(s1) // t4 = outptr + lw t5, 0(s0) // t5 = inptr0 + li s3, 0 // s3 = bias + srl t7, a0, 1 // t7 = image_width1 + srl s4, t7, 2 + andi t8, t7, 3 +1: + ulhu t0, 0(t5) + ulhu t1, 2(t5) + ulhu t2, 4(t5) + ulhu t3, 6(t5) + raddu.w.qb t0, t0 + raddu.w.qb t1, t1 + raddu.w.qb t2, t2 + raddu.w.qb t3, t3 + shra.ph t0, t0, 1 + shra_r.ph t1, t1, 1 + shra.ph t2, t2, 1 + shra_r.ph t3, t3, 1 + sb t0, 0(t4) + sb t1, 1(t4) + sb t2, 2(t4) + sb t3, 3(t4) + addiu s4, -1 + addiu t4, 4 + bgtz s4, 1b + addiu t5, 8 + beqz t8, 3f + addu s4, t4, t8 +2: + ulhu t0, 0(t5) + raddu.w.qb t0, t0 + addqh.w t0, t0, s3 + xori s3, s3, 1 + sb t0, 0(t4) + addiu t4, 1 + bne t4, s4, 2b + addiu t5, 2 +3: + lbux t1, t6(t5) + sll t1, 1 + addqh.w t2, t1, s3 // t2 = pixval1 + xori s3, s3, 1 + addqh.w t3, t1, s3 // t3 = pixval2 + blez s2, 5f + append t3, t2, 8 + addu t5, t4, s2 // t5 = loop_end2 +4: + ush t3, 0(t4) + addiu s2, -1 + bgtz s2, 4b + addiu t4, 2 +5: + beqz t9, 6f + nop + sb t2, 0(t4) +6: + addiu s1, 4 + addiu a2, -1 + bnez a2, 0b + addiu s0, 4 +7: + RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4 + + j ra + nop +END(jsimd_h2v1_downsample_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_h2v2_downsample_dspr2) +/* + * a0 = cinfo->image_width + * a1 = cinfo->max_v_samp_factor + * a2 = compptr->v_samp_factor + * a3 = compptr->width_in_blocks + * 16(sp) = input_data + * 20(sp) = output_data + */ + .set at + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + beqz a2, 8f + lw s1, 52(sp) // s1 = output_data + lw s0, 48(sp) // s0 = input_data + + andi t6, a0, 1 // t6 = temp_index + addiu t6, -1 + srl t7, a0, 1 // t7 = image_width1 + srl s4, t7, 2 + andi t8, t7, 3 + andi t9, a0, 2 + srl s2, a0, 2 + srl t7, t9, 1 + addu s2, t7, s2 + sll t0, a3, 3 // s2 = width_in_blocks*DCT + srl t7, t0, 1 + subu s2, t7, s2 +0: + lw t4, 0(s1) // t4 = outptr + lw t5, 0(s0) // t5 = inptr0 + lw s7, 4(s0) // s7 = inptr1 + li s6, 1 // s6 = bias +2: + ulw t0, 0(t5) // t0 = |P3|P2|P1|P0| + ulw t1, 0(s7) // t1 = |Q3|Q2|Q1|Q0| + ulw t2, 4(t5) + ulw t3, 4(s7) + precrq.ph.w t7, t0, t1 // t2 = |P3|P2|Q3|Q2| + ins t0, t1, 16, 16 // t0 = |Q1|Q0|P1|P0| + raddu.w.qb t1, t7 + raddu.w.qb t0, t0 + shra_r.w t1, t1, 2 + addiu t0, 1 + srl t0, 2 + precrq.ph.w t7, t2, t3 + ins t2, t3, 16, 16 + raddu.w.qb t7, t7 + raddu.w.qb t2, t2 + shra_r.w t7, t7, 2 + addiu t2, 1 + srl t2, 2 + sb t0, 0(t4) + sb t1, 1(t4) + sb t2, 2(t4) + sb t7, 3(t4) + addiu t4, 4 + addiu t5, 8 + addiu s4, s4, -1 + bgtz s4, 2b + addiu s7, 8 + beqz t8, 4f + addu t8, t4, t8 +3: + ulhu t0, 0(t5) + ulhu t1, 0(s7) + ins t0, t1, 16, 16 + raddu.w.qb t0, t0 + addu t0, t0, s6 + srl t0, 2 + xori s6, s6, 3 + sb t0, 0(t4) + addiu t5, 2 + addiu t4, 1 + bne t8, t4, 3b + addiu s7, 2 +4: + lbux t1, t6(t5) + sll t1, 1 + lbux t0, t6(s7) + sll t0, 1 + addu t1, t1, t0 + addu t3, t1, s6 + srl t0, t3, 2 // t2 = pixval1 + xori s6, s6, 3 + addu t2, t1, s6 + srl t1, t2, 2 // t3 = pixval2 + blez s2, 6f + append t1, t0, 8 +5: + ush t1, 0(t4) + addiu s2, -1 + bgtz s2, 5b + addiu t4, 2 +6: + beqz t9, 7f + nop + sb t0, 0(t4) +7: + addiu s1, 4 + addiu a2, -1 + bnez a2, 0b + addiu s0, 8 +8: + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop +END(jsimd_h2v2_downsample_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_h2v2_smooth_downsample_dspr2) +/* + * a0 = input_data + * a1 = output_data + * a2 = compptr->v_samp_factor + * a3 = cinfo->max_v_samp_factor + * 16(sp) = cinfo->smoothing_factor + * 20(sp) = compptr->width_in_blocks + * 24(sp) = cinfo->image_width + */ + .set at + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + lw s7, 52(sp) // compptr->width_in_blocks + lw s0, 56(sp) // cinfo->image_width + lw s6, 48(sp) // cinfo->smoothing_factor + sll s7, 3 // output_cols = width_in_blocks * DCTSIZE + sll v0, s7, 1 + subu v0, v0, s0 + blez v0, 2f + move v1, zero + addiu t0, a3, 2 // t0 = cinfo->max_v_samp_factor + 2 +0: + addiu t1, a0, -4 + sll t2, v1, 2 + lwx t1, t2(t1) + move t3, v0 + addu t1, t1, s0 + lbu t2, -1(t1) +1: + addiu t3, t3, -1 + sb t2, 0(t1) + bgtz t3, 1b + addiu t1, t1, 1 + addiu v1, v1, 1 + bne v1, t0, 0b + nop +2: + li v0, 80 + mul v0, s6, v0 + li v1, 16384 + move t4, zero + move t5, zero + subu t6, v1, v0 // t6 = 16384 - tmp_smoot_f * 80 + sll t7, s6, 4 // t7 = tmp_smoot_f * 16 +3: +/* Special case for first column: pretend column -1 is same as column 0 */ + sll v0, t4, 2 + lwx t8, v0(a1) // outptr = output_data[outrow] + sll v1, t5, 2 + addiu t9, v1, 4 + addiu s0, v1, -4 + addiu s1, v1, 8 + lwx s2, v1(a0) // inptr0 = input_data[inrow] + lwx t9, t9(a0) // inptr1 = input_data[inrow+1] + lwx s0, s0(a0) // above_ptr = input_data[inrow-1] + lwx s1, s1(a0) // below_ptr = input_data[inrow+2] + lh v0, 0(s2) + lh v1, 0(t9) + lh t0, 0(s0) + lh t1, 0(s1) + ins v0, v1, 16, 16 + ins t0, t1, 16, 16 + raddu.w.qb t2, v0 + raddu.w.qb s3, t0 + lbu v0, 0(s2) + lbu v1, 2(s2) + lbu t0, 0(t9) + lbu t1, 2(t9) + addu v0, v0, v1 + mult $ac1, t2, t6 + addu t0, t0, t1 + lbu t2, 2(s0) + addu t0, t0, v0 + lbu t3, 2(s1) + addu s3, t0, s3 + lbu v0, 0(s0) + lbu t0, 0(s1) + sll s3, s3, 1 + addu v0, v0, t2 + addu t0, t0, t3 + addu t0, t0, v0 + addu s3, t0, s3 + madd $ac1, s3, t7 + extr_r.w v0, $ac1, 16 + addiu t8, t8, 1 + addiu s2, s2, 2 + addiu t9, t9, 2 + addiu s0, s0, 2 + addiu s1, s1, 2 + sb v0, -1(t8) + addiu s4, s7, -2 + and s4, s4, 3 + addu s5, s4, t8 // end address +4: + lh v0, 0(s2) + lh v1, 0(t9) + lh t0, 0(s0) + lh t1, 0(s1) + ins v0, v1, 16, 16 + ins t0, t1, 16, 16 + raddu.w.qb t2, v0 + raddu.w.qb s3, t0 + lbu v0, -1(s2) + lbu v1, 2(s2) + lbu t0, -1(t9) + lbu t1, 2(t9) + addu v0, v0, v1 + mult $ac1, t2, t6 + addu t0, t0, t1 + lbu t2, 2(s0) + addu t0, t0, v0 + lbu t3, 2(s1) + addu s3, t0, s3 + lbu v0, -1(s0) + lbu t0, -1(s1) + sll s3, s3, 1 + addu v0, v0, t2 + addu t0, t0, t3 + addu t0, t0, v0 + addu s3, t0, s3 + madd $ac1, s3, t7 + extr_r.w t2, $ac1, 16 + addiu t8, t8, 1 + addiu s2, s2, 2 + addiu t9, t9, 2 + addiu s0, s0, 2 + sb t2, -1(t8) + bne s5, t8, 4b + addiu s1, s1, 2 + addiu s5, s7, -2 + subu s5, s5, s4 + addu s5, s5, t8 // end address +5: + lh v0, 0(s2) + lh v1, 0(t9) + lh t0, 0(s0) + lh t1, 0(s1) + ins v0, v1, 16, 16 + ins t0, t1, 16, 16 + raddu.w.qb t2, v0 + raddu.w.qb s3, t0 + lbu v0, -1(s2) + lbu v1, 2(s2) + lbu t0, -1(t9) + lbu t1, 2(t9) + addu v0, v0, v1 + mult $ac1, t2, t6 + addu t0, t0, t1 + lbu t2, 2(s0) + addu t0, t0, v0 + lbu t3, 2(s1) + addu s3, t0, s3 + lbu v0, -1(s0) + lbu t0, -1(s1) + sll s3, s3, 1 + addu v0, v0, t2 + addu t0, t0, t3 + lh v1, 2(t9) + addu t0, t0, v0 + lh v0, 2(s2) + addu s3, t0, s3 + lh t0, 2(s0) + lh t1, 2(s1) + madd $ac1, s3, t7 + extr_r.w t2, $ac1, 16 + ins t0, t1, 16, 16 + ins v0, v1, 16, 16 + raddu.w.qb s3, t0 + lbu v1, 4(s2) + lbu t0, 1(t9) + lbu t1, 4(t9) + sb t2, 0(t8) + raddu.w.qb t3, v0 + lbu v0, 1(s2) + addu t0, t0, t1 + mult $ac1, t3, t6 + addu v0, v0, v1 + lbu t2, 4(s0) + addu t0, t0, v0 + lbu v0, 1(s0) + addu s3, t0, s3 + lbu t0, 1(s1) + lbu t3, 4(s1) + addu v0, v0, t2 + sll s3, s3, 1 + addu t0, t0, t3 + lh v1, 4(t9) + addu t0, t0, v0 + lh v0, 4(s2) + addu s3, t0, s3 + lh t0, 4(s0) + lh t1, 4(s1) + madd $ac1, s3, t7 + extr_r.w t2, $ac1, 16 + ins t0, t1, 16, 16 + ins v0, v1, 16, 16 + raddu.w.qb s3, t0 + lbu v1, 6(s2) + lbu t0, 3(t9) + lbu t1, 6(t9) + sb t2, 1(t8) + raddu.w.qb t3, v0 + lbu v0, 3(s2) + addu t0, t0, t1 + mult $ac1, t3, t6 + addu v0, v0, v1 + lbu t2, 6(s0) + addu t0, t0, v0 + lbu v0, 3(s0) + addu s3, t0, s3 + lbu t0, 3(s1) + lbu t3, 6(s1) + addu v0, v0, t2 + sll s3, s3, 1 + addu t0, t0, t3 + lh v1, 6(t9) + addu t0, t0, v0 + lh v0, 6(s2) + addu s3, t0, s3 + lh t0, 6(s0) + lh t1, 6(s1) + madd $ac1, s3, t7 + extr_r.w t3, $ac1, 16 + ins t0, t1, 16, 16 + ins v0, v1, 16, 16 + raddu.w.qb s3, t0 + lbu v1, 8(s2) + lbu t0, 5(t9) + lbu t1, 8(t9) + sb t3, 2(t8) + raddu.w.qb t2, v0 + lbu v0, 5(s2) + addu t0, t0, t1 + mult $ac1, t2, t6 + addu v0, v0, v1 + lbu t2, 8(s0) + addu t0, t0, v0 + lbu v0, 5(s0) + addu s3, t0, s3 + lbu t0, 5(s1) + lbu t3, 8(s1) + addu v0, v0, t2 + sll s3, s3, 1 + addu t0, t0, t3 + addiu t8, t8, 4 + addu t0, t0, v0 + addiu s2, s2, 8 + addu s3, t0, s3 + addiu t9, t9, 8 + madd $ac1, s3, t7 + extr_r.w t1, $ac1, 16 + addiu s0, s0, 8 + addiu s1, s1, 8 + bne s5, t8, 5b + sb t1, -1(t8) +/* Special case for last column */ + lh v0, 0(s2) + lh v1, 0(t9) + lh t0, 0(s0) + lh t1, 0(s1) + ins v0, v1, 16, 16 + ins t0, t1, 16, 16 + raddu.w.qb t2, v0 + raddu.w.qb s3, t0 + lbu v0, -1(s2) + lbu v1, 1(s2) + lbu t0, -1(t9) + lbu t1, 1(t9) + addu v0, v0, v1 + mult $ac1, t2, t6 + addu t0, t0, t1 + lbu t2, 1(s0) + addu t0, t0, v0 + lbu t3, 1(s1) + addu s3, t0, s3 + lbu v0, -1(s0) + lbu t0, -1(s1) + sll s3, s3, 1 + addu v0, v0, t2 + addu t0, t0, t3 + addu t0, t0, v0 + addu s3, t0, s3 + madd $ac1, s3, t7 + extr_r.w t0, $ac1, 16 + addiu t5, t5, 2 + sb t0, 0(t8) + addiu t4, t4, 1 + bne t4, a2, 3b + addiu t5, t5, 2 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop + +END(jsimd_h2v2_smooth_downsample_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_int_upsample_dspr2) +/* + * a0 = upsample->h_expand[compptr->component_index] + * a1 = upsample->v_expand[compptr->component_index] + * a2 = input_data + * a3 = output_data_ptr + * 16(sp) = cinfo->output_width + * 20(sp) = cinfo->max_v_samp_factor + */ + .set at + + SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 + + lw s0, 0(a3) // s0 = output_data + lw s1, 32(sp) // s1 = cinfo->output_width + lw s2, 36(sp) // s2 = cinfo->max_v_samp_factor + li t6, 0 // t6 = inrow + beqz s2, 10f + li s3, 0 // s3 = outrow +0: + addu t0, a2, t6 + addu t7, s0, s3 + lw t3, 0(t0) // t3 = inptr + lw t8, 0(t7) // t8 = outptr + beqz s1, 4f + addu t5, t8, s1 // t5 = outend +1: + lb t2, 0(t3) // t2 = invalue = *inptr++ + addiu t3, 1 + beqz a0, 3f + move t0, a0 // t0 = h_expand +2: + sb t2, 0(t8) + addiu t0, -1 + bgtz t0, 2b + addiu t8, 1 +3: + bgt t5, t8, 1b + nop +4: + addiu t9, a1, -1 // t9 = v_expand - 1 + blez t9, 9f + nop +5: + lw t3, 0(s0) + lw t4, 4(s0) + subu t0, s1, 0xF + blez t0, 7f + addu t5, t3, s1 // t5 = end address + andi t7, s1, 0xF // t7 = residual + subu t8, t5, t7 +6: + ulw t0, 0(t3) + ulw t1, 4(t3) + ulw t2, 8(t3) + usw t0, 0(t4) + ulw t0, 12(t3) + usw t1, 4(t4) + usw t2, 8(t4) + usw t0, 12(t4) + addiu t3, 16 + bne t3, t8, 6b + addiu t4, 16 + beqz t7, 8f + nop +7: + lbu t0, 0(t3) + sb t0, 0(t4) + addiu t3, 1 + bne t3, t5, 7b + addiu t4, 1 +8: + addiu t9, -1 + bgtz t9, 5b + addiu s0, 8 +9: + addu s3, s3, a1 + bne s3, s2, 0b + addiu t6, 1 +10: + RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 + + j ra + nop +END(jsimd_int_upsample_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_h2v1_upsample_dspr2) +/* + * a0 = cinfo->max_v_samp_factor + * a1 = cinfo->output_width + * a2 = input_data + * a3 = output_data_ptr + */ + lw t7, 0(a3) // t7 = output_data + andi t8, a1, 0xf // t8 = residual + sll t0, a0, 2 + blez a0, 4f + addu t9, t7, t0 // t9 = output_data end address +0: + lw t5, 0(t7) // t5 = outptr + lw t6, 0(a2) // t6 = inptr + addu t3, t5, a1 // t3 = outptr + output_width (end address) + subu t3, t8 // t3 = end address - residual + beq t5, t3, 2f + move t4, t8 +1: + ulw t0, 0(t6) // t0 = |P3|P2|P1|P0| + ulw t2, 4(t6) // t2 = |P7|P6|P5|P4| + srl t1, t0, 16 // t1 = |X|X|P3|P2| + ins t0, t0, 16, 16 // t0 = |P1|P0|P1|P0| + ins t1, t1, 16, 16 // t1 = |P3|P2|P3|P2| + ins t0, t0, 8, 16 // t0 = |P1|P1|P0|P0| + ins t1, t1, 8, 16 // t1 = |P3|P3|P2|P2| + usw t0, 0(t5) + usw t1, 4(t5) + srl t0, t2, 16 // t0 = |X|X|P7|P6| + ins t2, t2, 16, 16 // t2 = |P5|P4|P5|P4| + ins t0, t0, 16, 16 // t0 = |P7|P6|P7|P6| + ins t2, t2, 8, 16 // t2 = |P5|P5|P4|P4| + ins t0, t0, 8, 16 // t0 = |P7|P7|P6|P6| + usw t2, 8(t5) + usw t0, 12(t5) + addiu t5, 16 + bne t5, t3, 1b + addiu t6, 8 + beqz t8, 3f + move t4, t8 +2: + lbu t1, 0(t6) + sb t1, 0(t5) + sb t1, 1(t5) + addiu t4, -2 + addiu t6, 1 + bgtz t4, 2b + addiu t5, 2 +3: + addiu t7, 4 + bne t9, t7, 0b + addiu a2, 4 +4: + j ra + nop +END(jsimd_h2v1_upsample_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_h2v2_upsample_dspr2) +/* + * a0 = cinfo->max_v_samp_factor + * a1 = cinfo->output_width + * a2 = input_data + * a3 = output_data_ptr + */ + lw t7, 0(a3) + blez a0, 7f + andi t9, a1, 0xf // t9 = residual +0: + lw t6, 0(a2) // t6 = inptr + lw t5, 0(t7) // t5 = outptr + addu t8, t5, a1 // t8 = outptr end address + subu t8, t9 // t8 = end address - residual + beq t5, t8, 2f + move t4, t9 +1: + ulw t0, 0(t6) + srl t1, t0, 16 + ins t0, t0, 16, 16 + ins t0, t0, 8, 16 + ins t1, t1, 16, 16 + ins t1, t1, 8, 16 + ulw t2, 4(t6) + usw t0, 0(t5) + usw t1, 4(t5) + srl t3, t2, 16 + ins t2, t2, 16, 16 + ins t2, t2, 8, 16 + ins t3, t3, 16, 16 + ins t3, t3, 8, 16 + usw t2, 8(t5) + usw t3, 12(t5) + addiu t5, 16 + bne t5, t8, 1b + addiu t6, 8 + beqz t9, 3f + move t4, t9 +2: + lbu t0, 0(t6) + sb t0, 0(t5) + sb t0, 1(t5) + addiu t4, -2 + addiu t6, 1 + bgtz t4, 2b + addiu t5, 2 +3: + lw t6, 0(t7) // t6 = outptr[0] + lw t5, 4(t7) // t5 = outptr[1] + addu t4, t6, a1 // t4 = new end address + beq a1, t9, 5f + subu t8, t4, t9 +4: + ulw t0, 0(t6) + ulw t1, 4(t6) + ulw t2, 8(t6) + usw t0, 0(t5) + ulw t0, 12(t6) + usw t1, 4(t5) + usw t2, 8(t5) + usw t0, 12(t5) + addiu t6, 16 + bne t6, t8, 4b + addiu t5, 16 + beqz t9, 6f + nop +5: + lbu t0, 0(t6) + sb t0, 0(t5) + addiu t6, 1 + bne t6, t4, 5b + addiu t5, 1 +6: + addiu t7, 8 + addiu a0, -2 + bgtz a0, 0b + addiu a2, 4 +7: + j ra + nop +END(jsimd_h2v2_upsample_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_idct_islow_dspr2) +/* + * a0 = coef_block + * a1 = compptr->dcttable + * a2 = output + * a3 = range_limit + */ + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + addiu sp, sp, -256 + move v0, sp + addiu v1, zero, 8 // v1 = DCTSIZE = 8 +1: + lh s4, 32(a0) // s4 = inptr[16] + lh s5, 64(a0) // s5 = inptr[32] + lh s6, 96(a0) // s6 = inptr[48] + lh t1, 112(a0) // t1 = inptr[56] + lh t7, 16(a0) // t7 = inptr[8] + lh t5, 80(a0) // t5 = inptr[40] + lh t3, 48(a0) // t3 = inptr[24] + or s4, s4, t1 + or s4, s4, t3 + or s4, s4, t5 + or s4, s4, t7 + or s4, s4, s5 + or s4, s4, s6 + bnez s4, 2f + addiu v1, v1, -1 + lh s5, 0(a1) // quantptr[DCTSIZE*0] + lh s6, 0(a0) // inptr[DCTSIZE*0] + mul s5, s5, s6 // DEQUANTIZE(inptr[0], quantptr[0]) + sll s5, s5, 2 + sw s5, 0(v0) + sw s5, 32(v0) + sw s5, 64(v0) + sw s5, 96(v0) + sw s5, 128(v0) + sw s5, 160(v0) + sw s5, 192(v0) + b 3f + sw s5, 224(v0) +2: + lh t0, 112(a1) + lh t2, 48(a1) + lh t4, 80(a1) + lh t6, 16(a1) + mul t0, t0, t1 // DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]) + mul t1, t2, t3 // DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]) + mul t2, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]) + mul t3, t6, t7 // DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]) + lh t4, 32(a1) + lh t5, 32(a0) + lh t6, 96(a1) + lh t7, 96(a0) + addu s0, t0, t1 // z3 = tmp0 + tmp2 + addu s1, t1, t2 // z2 = tmp1 + tmp2 + addu s2, t2, t3 // z4 = tmp1 + tmp3 + addu s3, s0, s2 // z3 + z4 + addiu t9, zero, 9633 // FIX_1_175875602 + mul s3, s3, t9 // z5 = MULTIPLY(z3 + z4, FIX_1_175875602) + addu t8, t0, t3 // z1 = tmp0 + tmp3 + addiu t9, zero, 2446 // FIX_0_298631336 + mul t0, t0, t9 // tmp0 = MULTIPLY(tmp0, FIX_0_298631336) + addiu t9, zero, 16819 // FIX_2_053119869 + mul t2, t2, t9 // tmp1 = MULTIPLY(tmp1, FIX_2_053119869) + addiu t9, zero, 25172 // FIX_3_072711026 + mul t1, t1, t9 // tmp2 = MULTIPLY(tmp2, FIX_3_072711026) + addiu t9, zero, 12299 // FIX_1_501321110 + mul t3, t3, t9 // tmp3 = MULTIPLY(tmp3, FIX_1_501321110) + addiu t9, zero, 16069 // FIX_1_961570560 + mul s0, s0, t9 // -z3 = MULTIPLY(z3, FIX_1_961570560) + addiu t9, zero, 3196 // FIX_0_390180644 + mul s2, s2, t9 // -z4 = MULTIPLY(z4, FIX_0_390180644) + addiu t9, zero, 7373 // FIX_0_899976223 + mul t8, t8, t9 // -z1 = MULTIPLY(z1, FIX_0_899976223) + addiu t9, zero, 20995 // FIX_2_562915447 + mul s1, s1, t9 // -z2 = MULTIPLY(z2, FIX_2_562915447) + subu s0, s3, s0 // z3 += z5 + addu t0, t0, s0 // tmp0 += z3 + addu t1, t1, s0 // tmp2 += z3 + subu s2, s3, s2 // z4 += z5 + addu t2, t2, s2 // tmp1 += z4 + addu t3, t3, s2 // tmp3 += z4 + subu t0, t0, t8 // tmp0 += z1 + subu t1, t1, s1 // tmp2 += z2 + subu t2, t2, s1 // tmp1 += z2 + subu t3, t3, t8 // tmp3 += z1 + mul s0, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]) + addiu t9, zero, 6270 // FIX_0_765366865 + mul s1, t6, t7 // DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]) + lh t4, 0(a1) + lh t5, 0(a0) + lh t6, 64(a1) + lh t7, 64(a0) + mul s2, t9, s0 // MULTIPLY(z2, FIX_0_765366865) + mul t5, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) + mul t6, t6, t7 // DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) + addiu t9, zero, 4433 // FIX_0_541196100 + addu s3, s0, s1 // z2 + z3 + mul s3, s3, t9 // z1 = MULTIPLY(z2 + z3, FIX_0_541196100) + addiu t9, zero, 15137 // FIX_1_847759065 + mul t8, s1, t9 // MULTIPLY(z3, FIX_1_847759065) + addu t4, t5, t6 + subu t5, t5, t6 + sll t4, t4, 13 // tmp0 = (z2 + z3) << CONST_BITS + sll t5, t5, 13 // tmp1 = (z2 - z3) << CONST_BITS + addu t7, s3, s2 // tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865) + subu t6, s3, t8 // tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065) + addu s0, t4, t7 + subu s1, t4, t7 + addu s2, t5, t6 + subu s3, t5, t6 + addu t4, s0, t3 + subu s0, s0, t3 + addu t3, s2, t1 + subu s2, s2, t1 + addu t1, s3, t2 + subu s3, s3, t2 + addu t2, s1, t0 + subu s1, s1, t0 + shra_r.w t4, t4, 11 + shra_r.w t3, t3, 11 + shra_r.w t1, t1, 11 + shra_r.w t2, t2, 11 + shra_r.w s1, s1, 11 + shra_r.w s3, s3, 11 + shra_r.w s2, s2, 11 + shra_r.w s0, s0, 11 + sw t4, 0(v0) + sw t3, 32(v0) + sw t1, 64(v0) + sw t2, 96(v0) + sw s1, 128(v0) + sw s3, 160(v0) + sw s2, 192(v0) + sw s0, 224(v0) +3: + addiu a1, a1, 2 + addiu a0, a0, 2 + bgtz v1, 1b + addiu v0, v0, 4 + move v0, sp + addiu v1, zero, 8 +4: + lw t0, 8(v0) // z2 = (JLONG)wsptr[2] + lw t1, 24(v0) // z3 = (JLONG)wsptr[6] + lw t2, 0(v0) // (JLONG)wsptr[0] + lw t3, 16(v0) // (JLONG)wsptr[4] + lw s4, 4(v0) // (JLONG)wsptr[1] + lw s5, 12(v0) // (JLONG)wsptr[3] + lw s6, 20(v0) // (JLONG)wsptr[5] + lw s7, 28(v0) // (JLONG)wsptr[7] + or s4, s4, t0 + or s4, s4, t1 + or s4, s4, t3 + or s4, s4, s7 + or s4, s4, s5 + or s4, s4, s6 + bnez s4, 5f + addiu v1, v1, -1 + shra_r.w s5, t2, 5 + andi s5, s5, 0x3ff + lbux s5, s5(a3) + lw s1, 0(a2) + replv.qb s5, s5 + usw s5, 0(s1) + usw s5, 4(s1) + b 6f + nop +5: + addu t4, t0, t1 // z2 + z3 + addiu t8, zero, 4433 // FIX_0_541196100 + mul t5, t4, t8 // z1 = MULTIPLY(z2 + z3, FIX_0_541196100) + addiu t8, zero, 15137 // FIX_1_847759065 + mul t1, t1, t8 // MULTIPLY(z3, FIX_1_847759065) + addiu t8, zero, 6270 // FIX_0_765366865 + mul t0, t0, t8 // MULTIPLY(z2, FIX_0_765366865) + addu t4, t2, t3 // (JLONG)wsptr[0] + (JLONG)wsptr[4] + subu t2, t2, t3 // (JLONG)wsptr[0] - (JLONG)wsptr[4] + sll t4, t4, 13 // tmp0 = (wsptr[0] + wsptr[4]) << CONST_BITS + sll t2, t2, 13 // tmp1 = (wsptr[0] - wsptr[4]) << CONST_BITS + subu t1, t5, t1 // tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065) + subu t3, t2, t1 // tmp12 = tmp1 - tmp2 + addu t2, t2, t1 // tmp11 = tmp1 + tmp2 + addu t5, t5, t0 // tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865) + subu t1, t4, t5 // tmp13 = tmp0 - tmp3 + addu t0, t4, t5 // tmp10 = tmp0 + tmp3 + lw t4, 28(v0) // tmp0 = (JLONG)wsptr[7] + lw t6, 12(v0) // tmp2 = (JLONG)wsptr[3] + lw t5, 20(v0) // tmp1 = (JLONG)wsptr[5] + lw t7, 4(v0) // tmp3 = (JLONG)wsptr[1] + addu s0, t4, t6 // z3 = tmp0 + tmp2 + addiu t8, zero, 9633 // FIX_1_175875602 + addu s1, t5, t7 // z4 = tmp1 + tmp3 + addu s2, s0, s1 // z3 + z4 + mul s2, s2, t8 // z5 = MULTIPLY(z3 + z4, FIX_1_175875602) + addu s3, t4, t7 // z1 = tmp0 + tmp3 + addu t9, t5, t6 // z2 = tmp1 + tmp2 + addiu t8, zero, 16069 // FIX_1_961570560 + mul s0, s0, t8 // -z3 = MULTIPLY(z3, FIX_1_961570560) + addiu t8, zero, 3196 // FIX_0_390180644 + mul s1, s1, t8 // -z4 = MULTIPLY(z4, FIX_0_390180644) + addiu t8, zero, 2446 // FIX_0_298631336 + mul t4, t4, t8 // tmp0 = MULTIPLY(tmp0, FIX_0_298631336) + addiu t8, zero, 7373 // FIX_0_899976223 + mul s3, s3, t8 // -z1 = MULTIPLY(z1, FIX_0_899976223) + addiu t8, zero, 16819 // FIX_2_053119869 + mul t5, t5, t8 // tmp1 = MULTIPLY(tmp1, FIX_2_053119869) + addiu t8, zero, 20995 // FIX_2_562915447 + mul t9, t9, t8 // -z2 = MULTIPLY(z2, FIX_2_562915447) + addiu t8, zero, 25172 // FIX_3_072711026 + mul t6, t6, t8 // tmp2 = MULTIPLY(tmp2, FIX_3_072711026) + addiu t8, zero, 12299 // FIX_1_501321110 + mul t7, t7, t8 // tmp3 = MULTIPLY(tmp3, FIX_1_501321110) + subu s0, s2, s0 // z3 += z5 + subu s1, s2, s1 // z4 += z5 + addu t4, t4, s0 + subu t4, t4, s3 // tmp0 + addu t5, t5, s1 + subu t5, t5, t9 // tmp1 + addu t6, t6, s0 + subu t6, t6, t9 // tmp2 + addu t7, t7, s1 + subu t7, t7, s3 // tmp3 + addu s0, t0, t7 + subu t0, t0, t7 + addu t7, t2, t6 + subu t2, t2, t6 + addu t6, t3, t5 + subu t3, t3, t5 + addu t5, t1, t4 + subu t1, t1, t4 + shra_r.w s0, s0, 18 + shra_r.w t7, t7, 18 + shra_r.w t6, t6, 18 + shra_r.w t5, t5, 18 + shra_r.w t1, t1, 18 + shra_r.w t3, t3, 18 + shra_r.w t2, t2, 18 + shra_r.w t0, t0, 18 + andi s0, s0, 0x3ff + andi t7, t7, 0x3ff + andi t6, t6, 0x3ff + andi t5, t5, 0x3ff + andi t1, t1, 0x3ff + andi t3, t3, 0x3ff + andi t2, t2, 0x3ff + andi t0, t0, 0x3ff + lw s1, 0(a2) + lbux s0, s0(a3) + lbux t7, t7(a3) + lbux t6, t6(a3) + lbux t5, t5(a3) + lbux t1, t1(a3) + lbux t3, t3(a3) + lbux t2, t2(a3) + lbux t0, t0(a3) + sb s0, 0(s1) + sb t7, 1(s1) + sb t6, 2(s1) + sb t5, 3(s1) + sb t1, 4(s1) + sb t3, 5(s1) + sb t2, 6(s1) + sb t0, 7(s1) +6: + addiu v0, v0, 32 + bgtz v1, 4b + addiu a2, a2, 4 + addiu sp, sp, 256 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop + +END(jsimd_idct_islow_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_idct_ifast_cols_dspr2) +/* + * a0 = inptr + * a1 = quantptr + * a2 = wsptr + * a3 = mips_idct_ifast_coefs + */ + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + addiu t9, a0, 16 // end address + or AT, a3, zero + +0: + lw s0, 0(a1) // quantptr[DCTSIZE*0] + lw t0, 0(a0) // inptr[DCTSIZE*0] + lw t1, 16(a0) // inptr[DCTSIZE*1] + muleq_s.w.phl v0, t0, s0 // tmp0 ... + lw t2, 32(a0) // inptr[DCTSIZE*2] + lw t3, 48(a0) // inptr[DCTSIZE*3] + lw t4, 64(a0) // inptr[DCTSIZE*4] + lw t5, 80(a0) // inptr[DCTSIZE*5] + muleq_s.w.phr t0, t0, s0 // ... tmp0 ... + lw t6, 96(a0) // inptr[DCTSIZE*6] + lw t7, 112(a0) // inptr[DCTSIZE*7] + or s4, t1, t2 + or s5, t3, t4 + bnez s4, 1f + ins t0, v0, 16, 16 // ... tmp0 + bnez s5, 1f + or s6, t5, t6 + or s6, s6, t7 + bnez s6, 1f + sw t0, 0(a2) // wsptr[DCTSIZE*0] + sw t0, 16(a2) // wsptr[DCTSIZE*1] + sw t0, 32(a2) // wsptr[DCTSIZE*2] + sw t0, 48(a2) // wsptr[DCTSIZE*3] + sw t0, 64(a2) // wsptr[DCTSIZE*4] + sw t0, 80(a2) // wsptr[DCTSIZE*5] + sw t0, 96(a2) // wsptr[DCTSIZE*6] + sw t0, 112(a2) // wsptr[DCTSIZE*7] + addiu a0, a0, 4 + b 2f + addiu a1, a1, 4 + +1: + lw s1, 32(a1) // quantptr[DCTSIZE*2] + lw s2, 64(a1) // quantptr[DCTSIZE*4] + muleq_s.w.phl v0, t2, s1 // tmp1 ... + muleq_s.w.phr t2, t2, s1 // ... tmp1 ... + lw s0, 16(a1) // quantptr[DCTSIZE*1] + lw s1, 48(a1) // quantptr[DCTSIZE*3] + lw s3, 96(a1) // quantptr[DCTSIZE*6] + muleq_s.w.phl v1, t4, s2 // tmp2 ... + muleq_s.w.phr t4, t4, s2 // ... tmp2 ... + lw s2, 80(a1) // quantptr[DCTSIZE*5] + lw t8, 4(AT) // FIX(1.414213562) + ins t2, v0, 16, 16 // ... tmp1 + muleq_s.w.phl v0, t6, s3 // tmp3 ... + muleq_s.w.phr t6, t6, s3 // ... tmp3 ... + ins t4, v1, 16, 16 // ... tmp2 + addq.ph s4, t0, t4 // tmp10 + subq.ph s5, t0, t4 // tmp11 + ins t6, v0, 16, 16 // ... tmp3 + subq.ph s6, t2, t6 // tmp12 ... + addq.ph s7, t2, t6 // tmp13 + mulq_s.ph s6, s6, t8 // ... tmp12 ... + addq.ph t0, s4, s7 // tmp0 + subq.ph t6, s4, s7 // tmp3 + muleq_s.w.phl v0, t1, s0 // tmp4 ... + muleq_s.w.phr t1, t1, s0 // ... tmp4 ... + shll_s.ph s6, s6, 1 // x2 + lw s3, 112(a1) // quantptr[DCTSIZE*7] + subq.ph s6, s6, s7 // ... tmp12 + muleq_s.w.phl v1, t7, s3 // tmp7 ... + muleq_s.w.phr t7, t7, s3 // ... tmp7 ... + ins t1, v0, 16, 16 // ... tmp4 + addq.ph t2, s5, s6 // tmp1 + subq.ph t4, s5, s6 // tmp2 + muleq_s.w.phl v0, t5, s2 // tmp6 ... + muleq_s.w.phr t5, t5, s2 // ... tmp6 ... + ins t7, v1, 16, 16 // ... tmp7 + addq.ph s5, t1, t7 // z11 + subq.ph s6, t1, t7 // z12 + muleq_s.w.phl v1, t3, s1 // tmp5 ... + muleq_s.w.phr t3, t3, s1 // ... tmp5 ... + ins t5, v0, 16, 16 // ... tmp6 + ins t3, v1, 16, 16 // ... tmp5 + addq.ph s7, t5, t3 // z13 + subq.ph v0, t5, t3 // z10 + addq.ph t7, s5, s7 // tmp7 + subq.ph s5, s5, s7 // tmp11 ... + addq.ph v1, v0, s6 // z5 ... + mulq_s.ph s5, s5, t8 // ... tmp11 + lw t8, 8(AT) // FIX(1.847759065) + lw s4, 0(AT) // FIX(1.082392200) + addq.ph s0, t0, t7 + subq.ph s1, t0, t7 + mulq_s.ph v1, v1, t8 // ... z5 + shll_s.ph s5, s5, 1 // x2 + lw t8, 12(AT) // FIX(-2.613125930) + sw s0, 0(a2) // wsptr[DCTSIZE*0] + shll_s.ph v0, v0, 1 // x4 + mulq_s.ph v0, v0, t8 // tmp12 ... + mulq_s.ph s4, s6, s4 // tmp10 ... + shll_s.ph v1, v1, 1 // x2 + addiu a0, a0, 4 + addiu a1, a1, 4 + sw s1, 112(a2) // wsptr[DCTSIZE*7] + shll_s.ph s6, v0, 1 // x4 + shll_s.ph s4, s4, 1 // x2 + addq.ph s6, s6, v1 // ... tmp12 + subq.ph t5, s6, t7 // tmp6 + subq.ph s4, s4, v1 // ... tmp10 + subq.ph t3, s5, t5 // tmp5 + addq.ph s2, t2, t5 + addq.ph t1, s4, t3 // tmp4 + subq.ph s3, t2, t5 + sw s2, 16(a2) // wsptr[DCTSIZE*1] + sw s3, 96(a2) // wsptr[DCTSIZE*6] + addq.ph v0, t4, t3 + subq.ph v1, t4, t3 + sw v0, 32(a2) // wsptr[DCTSIZE*2] + sw v1, 80(a2) // wsptr[DCTSIZE*5] + addq.ph v0, t6, t1 + subq.ph v1, t6, t1 + sw v0, 64(a2) // wsptr[DCTSIZE*4] + sw v1, 48(a2) // wsptr[DCTSIZE*3] + +2: + bne a0, t9, 0b + addiu a2, a2, 4 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop + +END(jsimd_idct_ifast_cols_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_idct_ifast_rows_dspr2) +/* + * a0 = wsptr + * a1 = output_buf + * a2 = output_col + * a3 = mips_idct_ifast_coefs + */ + SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8, a3 + + addiu t9, a0, 128 // end address + lui s8, 0x8080 + ori s8, s8, 0x8080 + +0: + lw AT, 36(sp) // restore $a3 (mips_idct_ifast_coefs) + lw t0, 0(a0) // wsptr[DCTSIZE*0+0/1] b a + lw s0, 16(a0) // wsptr[DCTSIZE*1+0/1] B A + lw t2, 4(a0) // wsptr[DCTSIZE*0+2/3] d c + lw s2, 20(a0) // wsptr[DCTSIZE*1+2/3] D C + lw t4, 8(a0) // wsptr[DCTSIZE*0+4/5] f e + lw s4, 24(a0) // wsptr[DCTSIZE*1+4/5] F E + lw t6, 12(a0) // wsptr[DCTSIZE*0+6/7] h g + lw s6, 28(a0) // wsptr[DCTSIZE*1+6/7] H G + precrq.ph.w t1, s0, t0 // B b + ins t0, s0, 16, 16 // A a + bnez t1, 1f + or s0, t2, s2 + bnez s0, 1f + or s0, t4, s4 + bnez s0, 1f + or s0, t6, s6 + bnez s0, 1f + shll_s.ph s0, t0, 2 // A a + lw a3, 0(a1) + lw AT, 4(a1) + precrq.ph.w t0, s0, s0 // A A + ins s0, s0, 16, 16 // a a + addu a3, a3, a2 + addu AT, AT, a2 + precrq.qb.ph t0, t0, t0 // A A A A + precrq.qb.ph s0, s0, s0 // a a a a + addu.qb s0, s0, s8 + addu.qb t0, t0, s8 + sw s0, 0(a3) + sw s0, 4(a3) + sw t0, 0(AT) + sw t0, 4(AT) + addiu a0, a0, 32 + bne a0, t9, 0b + addiu a1, a1, 8 + b 2f + nop + +1: + precrq.ph.w t3, s2, t2 + ins t2, s2, 16, 16 + precrq.ph.w t5, s4, t4 + ins t4, s4, 16, 16 + precrq.ph.w t7, s6, t6 + ins t6, s6, 16, 16 + lw t8, 4(AT) // FIX(1.414213562) + addq.ph s4, t0, t4 // tmp10 + subq.ph s5, t0, t4 // tmp11 + subq.ph s6, t2, t6 // tmp12 ... + addq.ph s7, t2, t6 // tmp13 + mulq_s.ph s6, s6, t8 // ... tmp12 ... + addq.ph t0, s4, s7 // tmp0 + subq.ph t6, s4, s7 // tmp3 + shll_s.ph s6, s6, 1 // x2 + subq.ph s6, s6, s7 // ... tmp12 + addq.ph t2, s5, s6 // tmp1 + subq.ph t4, s5, s6 // tmp2 + addq.ph s5, t1, t7 // z11 + subq.ph s6, t1, t7 // z12 + addq.ph s7, t5, t3 // z13 + subq.ph v0, t5, t3 // z10 + addq.ph t7, s5, s7 // tmp7 + subq.ph s5, s5, s7 // tmp11 ... + addq.ph v1, v0, s6 // z5 ... + mulq_s.ph s5, s5, t8 // ... tmp11 + lw t8, 8(AT) // FIX(1.847759065) + lw s4, 0(AT) // FIX(1.082392200) + addq.ph s0, t0, t7 // tmp0 + tmp7 + subq.ph s7, t0, t7 // tmp0 - tmp7 + mulq_s.ph v1, v1, t8 // ... z5 + lw a3, 0(a1) + lw t8, 12(AT) // FIX(-2.613125930) + shll_s.ph s5, s5, 1 // x2 + addu a3, a3, a2 + shll_s.ph v0, v0, 1 // x4 + mulq_s.ph v0, v0, t8 // tmp12 ... + mulq_s.ph s4, s6, s4 // tmp10 ... + shll_s.ph v1, v1, 1 // x2 + addiu a0, a0, 32 + addiu a1, a1, 8 + shll_s.ph s6, v0, 1 // x4 + shll_s.ph s4, s4, 1 // x2 + addq.ph s6, s6, v1 // ... tmp12 + shll_s.ph s0, s0, 2 + subq.ph t5, s6, t7 // tmp6 + subq.ph s4, s4, v1 // ... tmp10 + subq.ph t3, s5, t5 // tmp5 + shll_s.ph s7, s7, 2 + addq.ph t1, s4, t3 // tmp4 + addq.ph s1, t2, t5 // tmp1 + tmp6 + subq.ph s6, t2, t5 // tmp1 - tmp6 + addq.ph s2, t4, t3 // tmp2 + tmp5 + subq.ph s5, t4, t3 // tmp2 - tmp5 + addq.ph s4, t6, t1 // tmp3 + tmp4 + subq.ph s3, t6, t1 // tmp3 - tmp4 + shll_s.ph s1, s1, 2 + shll_s.ph s2, s2, 2 + shll_s.ph s3, s3, 2 + shll_s.ph s4, s4, 2 + shll_s.ph s5, s5, 2 + shll_s.ph s6, s6, 2 + precrq.ph.w t0, s1, s0 // B A + ins s0, s1, 16, 16 // b a + precrq.ph.w t2, s3, s2 // D C + ins s2, s3, 16, 16 // d c + precrq.ph.w t4, s5, s4 // F E + ins s4, s5, 16, 16 // f e + precrq.ph.w t6, s7, s6 // H G + ins s6, s7, 16, 16 // h g + precrq.qb.ph t0, t2, t0 // D C B A + precrq.qb.ph s0, s2, s0 // d c b a + precrq.qb.ph t4, t6, t4 // H G F E + precrq.qb.ph s4, s6, s4 // h g f e + addu.qb s0, s0, s8 + addu.qb s4, s4, s8 + sw s0, 0(a3) // outptr[0/1/2/3] d c b a + sw s4, 4(a3) // outptr[4/5/6/7] h g f e + lw a3, -4(a1) + addu.qb t0, t0, s8 + addu a3, a3, a2 + addu.qb t4, t4, s8 + sw t0, 0(a3) // outptr[0/1/2/3] D C B A + bne a0, t9, 0b + sw t4, 4(a3) // outptr[4/5/6/7] H G F E + +2: + + RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8, a3 + + j ra + nop + +END(jsimd_idct_ifast_rows_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_fdct_islow_dspr2) +/* + * a0 = data + */ + SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lui t0, 6437 + ori t0, 2260 + lui t1, 9633 + ori t1, 11363 + lui t2, 0xd39e + ori t2, 0xe6dc + lui t3, 0xf72d + ori t3, 9633 + lui t4, 2261 + ori t4, 9633 + lui t5, 0xd39e + ori t5, 6437 + lui t6, 9633 + ori t6, 0xd39d + lui t7, 0xe6dc + ori t7, 2260 + lui t8, 4433 + ori t8, 10703 + lui t9, 0xd630 + ori t9, 4433 + li s8, 8 + move a1, a0 +1: + lw s0, 0(a1) // tmp0 = 1|0 + lw s1, 4(a1) // tmp1 = 3|2 + lw s2, 8(a1) // tmp2 = 5|4 + lw s3, 12(a1) // tmp3 = 7|6 + packrl.ph s1, s1, s1 // tmp1 = 2|3 + packrl.ph s3, s3, s3 // tmp3 = 6|7 + subq.ph s7, s1, s2 // tmp7 = 2-5|3-4 = t5|t4 + subq.ph s5, s0, s3 // tmp5 = 1-6|0-7 = t6|t7 + mult $0, $0 // ac0 = 0 + dpa.w.ph $ac0, s7, t0 // ac0 += t5* 6437 + t4* 2260 + dpa.w.ph $ac0, s5, t1 // ac0 += t6* 9633 + t7* 11363 + mult $ac1, $0, $0 // ac1 = 0 + dpa.w.ph $ac1, s7, t2 // ac1 += t5*-11362 + t4* -6436 + dpa.w.ph $ac1, s5, t3 // ac1 += t6* -2259 + t7* 9633 + mult $ac2, $0, $0 // ac2 = 0 + dpa.w.ph $ac2, s7, t4 // ac2 += t5* 2261 + t4* 9633 + dpa.w.ph $ac2, s5, t5 // ac2 += t6*-11362 + t7* 6437 + mult $ac3, $0, $0 // ac3 = 0 + dpa.w.ph $ac3, s7, t6 // ac3 += t5* 9633 + t4*-11363 + dpa.w.ph $ac3, s5, t7 // ac3 += t6* -6436 + t7* 2260 + addq.ph s6, s1, s2 // tmp6 = 2+5|3+4 = t2|t3 + addq.ph s4, s0, s3 // tmp4 = 1+6|0+7 = t1|t0 + extr_r.w s0, $ac0, 11 // tmp0 = (ac0 + 1024) >> 11 + extr_r.w s1, $ac1, 11 // tmp1 = (ac1 + 1024) >> 11 + extr_r.w s2, $ac2, 11 // tmp2 = (ac2 + 1024) >> 11 + extr_r.w s3, $ac3, 11 // tmp3 = (ac3 + 1024) >> 11 + addq.ph s5, s4, s6 // tmp5 = t1+t2|t0+t3 = t11|t10 + subq.ph s7, s4, s6 // tmp7 = t1-t2|t0-t3 = t12|t13 + sh s0, 2(a1) + sh s1, 6(a1) + sh s2, 10(a1) + sh s3, 14(a1) + mult $0, $0 // ac0 = 0 + dpa.w.ph $ac0, s7, t8 // ac0 += t12* 4433 + t13* 10703 + mult $ac1, $0, $0 // ac1 = 0 + dpa.w.ph $ac1, s7, t9 // ac1 += t12*-10704 + t13* 4433 + sra s4, s5, 16 // tmp4 = t11 + addiu a1, a1, 16 + addiu s8, s8, -1 + extr_r.w s0, $ac0, 11 // tmp0 = (ac0 + 1024) >> 11 + extr_r.w s1, $ac1, 11 // tmp1 = (ac1 + 1024) >> 11 + addu s2, s5, s4 // tmp2 = t10 + t11 + subu s3, s5, s4 // tmp3 = t10 - t11 + sll s2, s2, 2 // tmp2 = (t10 + t11) << 2 + sll s3, s3, 2 // tmp3 = (t10 - t11) << 2 + sh s2, -16(a1) + sh s3, -8(a1) + sh s0, -12(a1) + bgtz s8, 1b + sh s1, -4(a1) + li t0, 2260 + li t1, 11363 + li t2, 9633 + li t3, 6436 + li t4, 6437 + li t5, 2261 + li t6, 11362 + li t7, 2259 + li t8, 4433 + li t9, 10703 + li a1, 10704 + li s8, 8 + +2: + lh a2, 0(a0) // 0 + lh a3, 16(a0) // 8 + lh v0, 32(a0) // 16 + lh v1, 48(a0) // 24 + lh s4, 64(a0) // 32 + lh s5, 80(a0) // 40 + lh s6, 96(a0) // 48 + lh s7, 112(a0) // 56 + addu s2, v0, s5 // tmp2 = 16 + 40 + subu s5, v0, s5 // tmp5 = 16 - 40 + addu s3, v1, s4 // tmp3 = 24 + 32 + subu s4, v1, s4 // tmp4 = 24 - 32 + addu s0, a2, s7 // tmp0 = 0 + 56 + subu s7, a2, s7 // tmp7 = 0 - 56 + addu s1, a3, s6 // tmp1 = 8 + 48 + subu s6, a3, s6 // tmp6 = 8 - 48 + addu a2, s0, s3 // tmp10 = tmp0 + tmp3 + subu v1, s0, s3 // tmp13 = tmp0 - tmp3 + addu a3, s1, s2 // tmp11 = tmp1 + tmp2 + subu v0, s1, s2 // tmp12 = tmp1 - tmp2 + mult s7, t1 // ac0 = tmp7 * c1 + madd s4, t0 // ac0 += tmp4 * c0 + madd s5, t4 // ac0 += tmp5 * c4 + madd s6, t2 // ac0 += tmp6 * c2 + mult $ac1, s7, t2 // ac1 = tmp7 * c2 + msub $ac1, s4, t3 // ac1 -= tmp4 * c3 + msub $ac1, s5, t6 // ac1 -= tmp5 * c6 + msub $ac1, s6, t7 // ac1 -= tmp6 * c7 + mult $ac2, s7, t4 // ac2 = tmp7 * c4 + madd $ac2, s4, t2 // ac2 += tmp4 * c2 + madd $ac2, s5, t5 // ac2 += tmp5 * c5 + msub $ac2, s6, t6 // ac2 -= tmp6 * c6 + mult $ac3, s7, t0 // ac3 = tmp7 * c0 + msub $ac3, s4, t1 // ac3 -= tmp4 * c1 + madd $ac3, s5, t2 // ac3 += tmp5 * c2 + msub $ac3, s6, t3 // ac3 -= tmp6 * c3 + extr_r.w s0, $ac0, 15 // tmp0 = (ac0 + 16384) >> 15 + extr_r.w s1, $ac1, 15 // tmp1 = (ac1 + 16384) >> 15 + extr_r.w s2, $ac2, 15 // tmp2 = (ac2 + 16384) >> 15 + extr_r.w s3, $ac3, 15 // tmp3 = (ac3 + 16384) >> 15 + addiu s8, s8, -1 + addu s4, a2, a3 // tmp4 = tmp10 + tmp11 + subu s5, a2, a3 // tmp5 = tmp10 - tmp11 + sh s0, 16(a0) + sh s1, 48(a0) + sh s2, 80(a0) + sh s3, 112(a0) + mult v0, t8 // ac0 = tmp12 * c8 + madd v1, t9 // ac0 += tmp13 * c9 + mult $ac1, v1, t8 // ac1 = tmp13 * c8 + msub $ac1, v0, a1 // ac1 -= tmp12 * c10 + addiu a0, a0, 2 + extr_r.w s6, $ac0, 15 // tmp6 = (ac0 + 16384) >> 15 + extr_r.w s7, $ac1, 15 // tmp7 = (ac1 + 16384) >> 15 + shra_r.w s4, s4, 2 // tmp4 = (tmp4 + 2) >> 2 + shra_r.w s5, s5, 2 // tmp5 = (tmp5 + 2) >> 2 + sh s4, -2(a0) + sh s5, 62(a0) + sh s6, 30(a0) + bgtz s8, 2b + sh s7, 94(a0) + + RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + jr ra + nop + +END(jsimd_fdct_islow_dspr2) + + +/**************************************************************************/ +LEAF_DSPR2(jsimd_fdct_ifast_dspr2) +/* + * a0 = data + */ + .set at + + SAVE_REGS_ON_STACK 8, s0, s1 + + li a1, 0x014e014e // FIX_1_306562965 (334 << 16)|(334 & 0xffff) + li a2, 0x008b008b // FIX_0_541196100 (139 << 16)|(139 & 0xffff) + li a3, 0x00620062 // FIX_0_382683433 (98 << 16) |(98 & 0xffff) + li s1, 0x00b500b5 // FIX_0_707106781 (181 << 16)|(181 & 0xffff) + + move v0, a0 + addiu v1, v0, 128 // end address + +0: + lw t0, 0(v0) // tmp0 = 1|0 + lw t1, 4(v0) // tmp1 = 3|2 + lw t2, 8(v0) // tmp2 = 5|4 + lw t3, 12(v0) // tmp3 = 7|6 + packrl.ph t1, t1, t1 // tmp1 = 2|3 + packrl.ph t3, t3, t3 // tmp3 = 6|7 + subq.ph t7, t1, t2 // tmp7 = 2-5|3-4 = t5|t4 + subq.ph t5, t0, t3 // tmp5 = 1-6|0-7 = t6|t7 + addq.ph t6, t1, t2 // tmp6 = 2+5|3+4 = t2|t3 + addq.ph t4, t0, t3 // tmp4 = 1+6|0+7 = t1|t0 + addq.ph t8, t4, t6 // tmp5 = t1+t2|t0+t3 = t11|t10 + subq.ph t9, t4, t6 // tmp7 = t1-t2|t0-t3 = t12|t13 + sra t4, t8, 16 // tmp4 = t11 + mult $0, $0 // ac0 = 0 + dpa.w.ph $ac0, t9, s1 + mult $ac1, $0, $0 // ac1 = 0 + dpa.w.ph $ac1, t7, a3 // ac1 += t4*98 + t5*98 + dpsx.w.ph $ac1, t5, a3 // ac1 += t6*98 + t7*98 + mult $ac2, $0, $0 // ac2 = 0 + dpa.w.ph $ac2, t7, a2 // ac2 += t4*139 + t5*139 + mult $ac3, $0, $0 // ac3 = 0 + dpa.w.ph $ac3, t5, a1 // ac3 += t6*334 + t7*334 + precrq.ph.w t0, t5, t7 // t0 = t5|t6 + addq.ph t2, t8, t4 // tmp2 = t10 + t11 + subq.ph t3, t8, t4 // tmp3 = t10 - t11 + extr.w t4, $ac0, 8 + mult $0, $0 // ac0 = 0 + dpa.w.ph $ac0, t0, s1 // ac0 += t5*181 + t6*181 + extr.w t0, $ac1, 8 // t0 = z5 + extr.w t1, $ac2, 8 // t1 = MULTIPLY(tmp10, 139) + extr.w t7, $ac3, 8 // t2 = MULTIPLY(tmp12, 334) + extr.w t8, $ac0, 8 // t8 = z3 = MULTIPLY(tmp11, 181) + add t6, t1, t0 // t6 = z2 + add t7, t7, t0 // t7 = z4 + subq.ph t0, t5, t8 // t0 = z13 = tmp7 - z3 + addq.ph t8, t5, t8 // t9 = z11 = tmp7 + z3 + addq.ph t1, t0, t6 // t1 = z13 + z2 + subq.ph t6, t0, t6 // t6 = z13 - z2 + addq.ph t0, t8, t7 // t0 = z11 + z4 + subq.ph t7, t8, t7 // t7 = z11 - z4 + addq.ph t5, t4, t9 + subq.ph t4, t9, t4 + sh t2, 0(v0) + sh t5, 4(v0) + sh t3, 8(v0) + sh t4, 12(v0) + sh t1, 10(v0) + sh t6, 6(v0) + sh t0, 2(v0) + sh t7, 14(v0) + addiu v0, 16 + bne v1, v0, 0b + nop + move v0, a0 + addiu v1, v0, 16 + +1: + lh t0, 0(v0) // 0 + lh t1, 16(v0) // 8 + lh t2, 32(v0) // 16 + lh t3, 48(v0) // 24 + lh t4, 64(v0) // 32 + lh t5, 80(v0) // 40 + lh t6, 96(v0) // 48 + lh t7, 112(v0) // 56 + add t8, t0, t7 // t8 = tmp0 + sub t7, t0, t7 // t7 = tmp7 + add t0, t1, t6 // t0 = tmp1 + sub t1, t1, t6 // t1 = tmp6 + add t6, t2, t5 // t6 = tmp2 + sub t5, t2, t5 // t5 = tmp5 + add t2, t3, t4 // t2 = tmp3 + sub t3, t3, t4 // t3 = tmp4 + add t4, t8, t2 // t4 = tmp10 = tmp0 + tmp3 + sub t8, t8, t2 // t8 = tmp13 = tmp0 - tmp3 + sub s0, t0, t6 // s0 = tmp12 = tmp1 - tmp2 + ins t8, s0, 16, 16 // t8 = tmp12|tmp13 + add t2, t0, t6 // t2 = tmp11 = tmp1 + tmp2 + mult $0, $0 // ac0 = 0 + dpa.w.ph $ac0, t8, s1 // ac0 += t12*181 + t13*181 + add s0, t4, t2 // t8 = tmp10+tmp11 + sub t4, t4, t2 // t4 = tmp10-tmp11 + sh s0, 0(v0) + sh t4, 64(v0) + extr.w t2, $ac0, 8 // z1 = MULTIPLY(tmp12+tmp13, FIX_0_707106781) + addq.ph t4, t8, t2 // t9 = tmp13 + z1 + subq.ph t8, t8, t2 // t2 = tmp13 - z1 + sh t4, 32(v0) + sh t8, 96(v0) + add t3, t3, t5 // t3 = tmp10 = tmp4 + tmp5 + add t0, t5, t1 // t0 = tmp11 = tmp5 + tmp6 + add t1, t1, t7 // t1 = tmp12 = tmp6 + tmp7 + andi t4, a1, 0xffff + mul s0, t1, t4 + sra s0, s0, 8 // s0 = z4 = MULTIPLY(tmp12, FIX_1_306562965) + ins t1, t3, 16, 16 // t1 = tmp10|tmp12 + mult $0, $0 // ac0 = 0 + mulsa.w.ph $ac0, t1, a3 // ac0 += t10*98 - t12*98 + extr.w t8, $ac0, 8 // z5 = MULTIPLY(tmp10-tmp12, FIX_0_382683433) + add t2, t7, t8 // t2 = tmp7 + z5 + sub t7, t7, t8 // t7 = tmp7 - z5 + andi t4, a2, 0xffff + mul t8, t3, t4 + sra t8, t8, 8 // t8 = z2 = MULTIPLY(tmp10, FIX_0_541196100) + andi t4, s1, 0xffff + mul t6, t0, t4 + sra t6, t6, 8 // t6 = z3 = MULTIPLY(tmp11, FIX_0_707106781) + add t0, t6, t8 // t0 = z3 + z2 + sub t1, t6, t8 // t1 = z3 - z2 + add t3, t6, s0 // t3 = z3 + z4 + sub t4, t6, s0 // t4 = z3 - z4 + sub t5, t2, t1 // t5 = dataptr[5] + sub t6, t7, t0 // t6 = dataptr[3] + add t3, t2, t3 // t3 = dataptr[1] + add t4, t7, t4 // t4 = dataptr[7] + sh t5, 80(v0) + sh t6, 48(v0) + sh t3, 16(v0) + sh t4, 112(v0) + addiu v0, 2 + bne v0, v1, 1b + nop + + RESTORE_REGS_FROM_STACK 8, s0, s1 + + j ra + nop +END(jsimd_fdct_ifast_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_quantize_dspr2) +/* + * a0 = coef_block + * a1 = divisors + * a2 = workspace + */ + .set at + + SAVE_REGS_ON_STACK 16, s0, s1, s2 + + addiu v0, a2, 124 // v0 = workspace_end + lh t0, 0(a2) + lh t1, 0(a1) + lh t2, 128(a1) + sra t3, t0, 15 + sll t3, t3, 1 + addiu t3, t3, 1 + mul t0, t0, t3 + lh t4, 384(a1) + lh t5, 130(a1) + lh t6, 2(a2) + lh t7, 2(a1) + lh t8, 386(a1) + +1: + andi t1, 0xffff + add t9, t0, t2 + andi t9, 0xffff + mul v1, t9, t1 + sra s0, t6, 15 + sll s0, s0, 1 + addiu s0, s0, 1 + addiu t9, t4, 16 + srav v1, v1, t9 + mul v1, v1, t3 + mul t6, t6, s0 + andi t7, 0xffff + addiu a2, a2, 4 + addiu a1, a1, 4 + add s1, t6, t5 + andi s1, 0xffff + sh v1, 0(a0) + + mul s2, s1, t7 + addiu s1, t8, 16 + srav s2, s2, s1 + mul s2, s2, s0 + lh t0, 0(a2) + lh t1, 0(a1) + sra t3, t0, 15 + sll t3, t3, 1 + addiu t3, t3, 1 + mul t0, t0, t3 + lh t2, 128(a1) + lh t4, 384(a1) + lh t5, 130(a1) + lh t8, 386(a1) + lh t6, 2(a2) + lh t7, 2(a1) + sh s2, 2(a0) + lh t0, 0(a2) + sra t3, t0, 15 + sll t3, t3, 1 + addiu t3, t3, 1 + mul t0, t0, t3 + bne a2, v0, 1b + addiu a0, a0, 4 + + andi t1, 0xffff + add t9, t0, t2 + andi t9, 0xffff + mul v1, t9, t1 + sra s0, t6, 15 + sll s0, s0, 1 + addiu s0, s0, 1 + addiu t9, t4, 16 + srav v1, v1, t9 + mul v1, v1, t3 + mul t6, t6, s0 + andi t7, 0xffff + sh v1, 0(a0) + add s1, t6, t5 + andi s1, 0xffff + mul s2, s1, t7 + addiu s1, t8, 16 + addiu a2, a2, 4 + addiu a1, a1, 4 + srav s2, s2, s1 + mul s2, s2, s0 + sh s2, 2(a0) + + RESTORE_REGS_FROM_STACK 16, s0, s1, s2 + + j ra + nop + +END(jsimd_quantize_dspr2) + + +#ifndef __mips_soft_float + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_quantize_float_dspr2) +/* + * a0 = coef_block + * a1 = divisors + * a2 = workspace + */ + .set at + + li t1, 0x46800100 // integer representation 16384.5 + mtc1 t1, f0 + li t0, 63 +0: + lwc1 f2, 0(a2) + lwc1 f10, 0(a1) + lwc1 f4, 4(a2) + lwc1 f12, 4(a1) + lwc1 f6, 8(a2) + lwc1 f14, 8(a1) + lwc1 f8, 12(a2) + lwc1 f16, 12(a1) + madd.s f2, f0, f2, f10 + madd.s f4, f0, f4, f12 + madd.s f6, f0, f6, f14 + madd.s f8, f0, f8, f16 + lwc1 f10, 16(a1) + lwc1 f12, 20(a1) + trunc.w.s f2, f2 + trunc.w.s f4, f4 + trunc.w.s f6, f6 + trunc.w.s f8, f8 + lwc1 f14, 24(a1) + lwc1 f16, 28(a1) + mfc1 t1, f2 + mfc1 t2, f4 + mfc1 t3, f6 + mfc1 t4, f8 + lwc1 f2, 16(a2) + lwc1 f4, 20(a2) + lwc1 f6, 24(a2) + lwc1 f8, 28(a2) + madd.s f2, f0, f2, f10 + madd.s f4, f0, f4, f12 + madd.s f6, f0, f6, f14 + madd.s f8, f0, f8, f16 + addiu t1, t1, -16384 + addiu t2, t2, -16384 + addiu t3, t3, -16384 + addiu t4, t4, -16384 + trunc.w.s f2, f2 + trunc.w.s f4, f4 + trunc.w.s f6, f6 + trunc.w.s f8, f8 + sh t1, 0(a0) + sh t2, 2(a0) + sh t3, 4(a0) + sh t4, 6(a0) + mfc1 t1, f2 + mfc1 t2, f4 + mfc1 t3, f6 + mfc1 t4, f8 + addiu t0, t0, -8 + addiu a2, a2, 32 + addiu a1, a1, 32 + addiu t1, t1, -16384 + addiu t2, t2, -16384 + addiu t3, t3, -16384 + addiu t4, t4, -16384 + sh t1, 8(a0) + sh t2, 10(a0) + sh t3, 12(a0) + sh t4, 14(a0) + bgez t0, 0b + addiu a0, a0, 16 + + j ra + nop + +END(jsimd_quantize_float_dspr2) + +#endif + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_idct_2x2_dspr2) +/* + * a0 = compptr->dct_table + * a1 = coef_block + * a2 = output_buf + * a3 = output_col + */ + .set at + + SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, s5 + + addiu sp, sp, -40 + move v0, sp + addiu s2, zero, 29692 + addiu s3, zero, -10426 + addiu s4, zero, 6967 + addiu s5, zero, -5906 + lh t0, 0(a1) // t0 = inptr[DCTSIZE*0] + lh t5, 0(a0) // t5 = quantptr[DCTSIZE*0] + lh t1, 48(a1) // t1 = inptr[DCTSIZE*3] + lh t6, 48(a0) // t6 = quantptr[DCTSIZE*3] + mul t4, t5, t0 + lh t0, 16(a1) // t0 = inptr[DCTSIZE*1] + lh t5, 16(a0) // t5 = quantptr[DCTSIZE*1] + mul t6, t6, t1 + mul t5, t5, t0 + lh t2, 80(a1) // t2 = inptr[DCTSIZE*5] + lh t7, 80(a0) // t7 = quantptr[DCTSIZE*5] + lh t3, 112(a1) // t3 = inptr[DCTSIZE*7] + lh t8, 112(a0) // t8 = quantptr[DCTSIZE*7] + mul t7, t7, t2 + mult zero, zero + mul t8, t8, t3 + li s0, 0x73FCD746 // s0 = (29692 << 16) | (-10426 & 0xffff) + li s1, 0x1B37E8EE // s1 = (6967 << 16) | (-5906 & 0xffff) + ins t6, t5, 16, 16 // t6 = t5|t6 + sll t4, t4, 15 + dpa.w.ph $ac0, t6, s0 + lh t1, 2(a1) + lh t6, 2(a0) + ins t8, t7, 16, 16 // t8 = t7|t8 + dpa.w.ph $ac0, t8, s1 + mflo t0, $ac0 + mul t5, t6, t1 + lh t1, 18(a1) + lh t6, 18(a0) + lh t2, 50(a1) + lh t7, 50(a0) + mul t6, t6, t1 + subu t8, t4, t0 + mul t7, t7, t2 + addu t0, t4, t0 + shra_r.w t0, t0, 13 + lh t1, 82(a1) + lh t2, 82(a0) + lh t3, 114(a1) + lh t4, 114(a0) + shra_r.w t8, t8, 13 + mul t1, t1, t2 + mul t3, t3, t4 + sw t0, 0(v0) + sw t8, 20(v0) + sll t4, t5, 15 + ins t7, t6, 16, 16 + mult zero, zero + dpa.w.ph $ac0, t7, s0 + ins t3, t1, 16, 16 + lh t1, 6(a1) + lh t6, 6(a0) + dpa.w.ph $ac0, t3, s1 + mflo t0, $ac0 + mul t5, t6, t1 + lh t1, 22(a1) + lh t6, 22(a0) + lh t2, 54(a1) + lh t7, 54(a0) + mul t6, t6, t1 + subu t8, t4, t0 + mul t7, t7, t2 + addu t0, t4, t0 + shra_r.w t0, t0, 13 + lh t1, 86(a1) + lh t2, 86(a0) + lh t3, 118(a1) + lh t4, 118(a0) + shra_r.w t8, t8, 13 + mul t1, t1, t2 + mul t3, t3, t4 + sw t0, 4(v0) + sw t8, 24(v0) + sll t4, t5, 15 + ins t7, t6, 16, 16 + mult zero, zero + dpa.w.ph $ac0, t7, s0 + ins t3, t1, 16, 16 + lh t1, 10(a1) + lh t6, 10(a0) + dpa.w.ph $ac0, t3, s1 + mflo t0, $ac0 + mul t5, t6, t1 + lh t1, 26(a1) + lh t6, 26(a0) + lh t2, 58(a1) + lh t7, 58(a0) + mul t6, t6, t1 + subu t8, t4, t0 + mul t7, t7, t2 + addu t0, t4, t0 + shra_r.w t0, t0, 13 + lh t1, 90(a1) + lh t2, 90(a0) + lh t3, 122(a1) + lh t4, 122(a0) + shra_r.w t8, t8, 13 + mul t1, t1, t2 + mul t3, t3, t4 + sw t0, 8(v0) + sw t8, 28(v0) + sll t4, t5, 15 + ins t7, t6, 16, 16 + mult zero, zero + dpa.w.ph $ac0, t7, s0 + ins t3, t1, 16, 16 + lh t1, 14(a1) + lh t6, 14(a0) + dpa.w.ph $ac0, t3, s1 + mflo t0, $ac0 + mul t5, t6, t1 + lh t1, 30(a1) + lh t6, 30(a0) + lh t2, 62(a1) + lh t7, 62(a0) + mul t6, t6, t1 + subu t8, t4, t0 + mul t7, t7, t2 + addu t0, t4, t0 + shra_r.w t0, t0, 13 + lh t1, 94(a1) + lh t2, 94(a0) + lh t3, 126(a1) + lh t4, 126(a0) + shra_r.w t8, t8, 13 + mul t1, t1, t2 + mul t3, t3, t4 + sw t0, 12(v0) + sw t8, 32(v0) + sll t4, t5, 15 + ins t7, t6, 16, 16 + mult zero, zero + dpa.w.ph $ac0, t7, s0 + ins t3, t1, 16, 16 + dpa.w.ph $ac0, t3, s1 + mflo t0, $ac0 + lw t9, 0(a2) + lw t3, 0(v0) + lw t7, 4(v0) + lw t1, 8(v0) + addu t9, t9, a3 + sll t3, t3, 15 + subu t8, t4, t0 + addu t0, t4, t0 + shra_r.w t0, t0, 13 + shra_r.w t8, t8, 13 + sw t0, 16(v0) + sw t8, 36(v0) + lw t5, 12(v0) + lw t6, 16(v0) + mult t7, s2 + madd t1, s3 + madd t5, s4 + madd t6, s5 + lw t5, 24(v0) + lw t7, 28(v0) + mflo t0, $ac0 + lw t8, 32(v0) + lw t2, 36(v0) + mult $ac1, t5, s2 + madd $ac1, t7, s3 + madd $ac1, t8, s4 + madd $ac1, t2, s5 + addu t1, t3, t0 + subu t6, t3, t0 + shra_r.w t1, t1, 20 + shra_r.w t6, t6, 20 + mflo t4, $ac1 + shll_s.w t1, t1, 24 + shll_s.w t6, t6, 24 + sra t1, t1, 24 + sra t6, t6, 24 + addiu t1, t1, 128 + addiu t6, t6, 128 + lw t0, 20(v0) + sb t1, 0(t9) + sb t6, 1(t9) + sll t0, t0, 15 + lw t9, 4(a2) + addu t1, t0, t4 + subu t6, t0, t4 + addu t9, t9, a3 + shra_r.w t1, t1, 20 + shra_r.w t6, t6, 20 + shll_s.w t1, t1, 24 + shll_s.w t6, t6, 24 + sra t1, t1, 24 + sra t6, t6, 24 + addiu t1, t1, 128 + addiu t6, t6, 128 + sb t1, 0(t9) + sb t6, 1(t9) + addiu sp, sp, 40 + + RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, s5 + + j ra + nop + +END(jsimd_idct_2x2_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_idct_4x4_dspr2) +/* + * a0 = compptr->dct_table + * a1 = coef_block + * a2 = output_buf + * a3 = output_col + * 16(sp) = workspace[DCTSIZE*4]; // buffers data between passes + */ + .set at + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + lw v1, 48(sp) + move t0, a1 + move t1, v1 + li t9, 4 + li s0, 0x2e75f93e + li s1, 0x21f9ba79 + li s2, 0xecc2efb0 + li s3, 0x52031ccd + +0: + lh s6, 32(t0) // inptr[DCTSIZE*2] + lh t6, 32(a0) // quantptr[DCTSIZE*2] + lh s7, 96(t0) // inptr[DCTSIZE*6] + lh t7, 96(a0) // quantptr[DCTSIZE*6] + mul t6, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2]) + lh s4, 0(t0) // inptr[DCTSIZE*0] + mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6]) + lh s5, 0(a0) // quantptr[0] + li s6, 15137 + li s7, 6270 + mul t2, s4, s5 // tmp0 = (inptr[0] * quantptr[0]) + mul t6, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2]) + lh t5, 112(t0) // inptr[DCTSIZE*7] + mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6]) + lh s4, 112(a0) // quantptr[DCTSIZE*7] + lh v0, 80(t0) // inptr[DCTSIZE*5] + lh s5, 80(a0) // quantptr[DCTSIZE*5] + lh s6, 48(a0) // quantptr[DCTSIZE*3] + sll t2, t2, 14 // tmp0 <<= (CONST_BITS+1) + lh s7, 16(a0) // quantptr[DCTSIZE*1] + lh t8, 16(t0) // inptr[DCTSIZE*1] + subu t6, t6, t7 // tmp2 = MULTIPLY(z2, t5) - MULTIPLY(z3, t6) + lh t7, 48(t0) // inptr[DCTSIZE*3] + mul t5, s4, t5 // z1 = (inptr[DCTSIZE*7] * quantptr[DCTSIZE*7]) + mul v0, s5, v0 // z2 = (inptr[DCTSIZE*5] * quantptr[DCTSIZE*5]) + mul t7, s6, t7 // z3 = (inptr[DCTSIZE*3] * quantptr[DCTSIZE*3]) + mul t8, s7, t8 // z4 = (inptr[DCTSIZE*1] * quantptr[DCTSIZE*1]) + addu t3, t2, t6 // tmp10 = tmp0 + z2 + subu t4, t2, t6 // tmp10 = tmp0 - z2 + mult $ac0, zero, zero + mult $ac1, zero, zero + ins t5, v0, 16, 16 + ins t7, t8, 16, 16 + addiu t9, t9, -1 + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + mflo s4, $ac0 + mflo s5, $ac1 + addiu a0, a0, 2 + addiu t1, t1, 4 + addiu t0, t0, 2 + addu t6, t4, s4 + subu t5, t4, s4 + addu s6, t3, s5 + subu s7, t3, s5 + shra_r.w t6, t6, 12 // DESCALE(tmp12 + temp1, 12) + shra_r.w t5, t5, 12 // DESCALE(tmp12 - temp1, 12) + shra_r.w s6, s6, 12 // DESCALE(tmp10 + temp2, 12) + shra_r.w s7, s7, 12 // DESCALE(tmp10 - temp2, 12) + sw t6, 28(t1) + sw t5, 60(t1) + sw s6, -4(t1) + bgtz t9, 0b + sw s7, 92(t1) + // second loop three pass + li t9, 3 +1: + lh s6, 34(t0) // inptr[DCTSIZE*2] + lh t6, 34(a0) // quantptr[DCTSIZE*2] + lh s7, 98(t0) // inptr[DCTSIZE*6] + lh t7, 98(a0) // quantptr[DCTSIZE*6] + mul t6, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2]) + lh s4, 2(t0) // inptr[DCTSIZE*0] + mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6]) + lh s5, 2(a0) // quantptr[DCTSIZE*0] + li s6, 15137 + li s7, 6270 + mul t2, s4, s5 // tmp0 = (inptr[0] * quantptr[0]) + mul v0, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2]) + lh t5, 114(t0) // inptr[DCTSIZE*7] + mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6]) + lh s4, 114(a0) // quantptr[DCTSIZE*7] + lh s5, 82(a0) // quantptr[DCTSIZE*5] + lh t6, 82(t0) // inptr[DCTSIZE*5] + sll t2, t2, 14 // tmp0 <<= (CONST_BITS+1) + lh s6, 50(a0) // quantptr[DCTSIZE*3] + lh t8, 18(t0) // inptr[DCTSIZE*1] + subu v0, v0, t7 // tmp2 = MULTIPLY(z2, t5) - MULTIPLY(z3, t6) + lh t7, 50(t0) // inptr[DCTSIZE*3] + lh s7, 18(a0) // quantptr[DCTSIZE*1] + mul t5, s4, t5 // z1 = (inptr[DCTSIZE*7] * quantptr[DCTSIZE*7]) + mul t6, s5, t6 // z2 = (inptr[DCTSIZE*5] * quantptr[DCTSIZE*5]) + mul t7, s6, t7 // z3 = (inptr[DCTSIZE*3] * quantptr[DCTSIZE*3]) + mul t8, s7, t8 // z4 = (inptr[DCTSIZE*1] * quantptr[DCTSIZE*1]) + addu t3, t2, v0 // tmp10 = tmp0 + z2 + subu t4, t2, v0 // tmp10 = tmp0 - z2 + mult $ac0, zero, zero + mult $ac1, zero, zero + ins t5, t6, 16, 16 + ins t7, t8, 16, 16 + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + mflo t5, $ac0 + mflo t6, $ac1 + addiu t9, t9, -1 + addiu t0, t0, 2 + addiu a0, a0, 2 + addiu t1, t1, 4 + addu s5, t4, t5 + subu s4, t4, t5 + addu s6, t3, t6 + subu s7, t3, t6 + shra_r.w s5, s5, 12 // DESCALE(tmp12 + temp1, 12) + shra_r.w s4, s4, 12 // DESCALE(tmp12 - temp1, 12) + shra_r.w s6, s6, 12 // DESCALE(tmp10 + temp2, 12) + shra_r.w s7, s7, 12 // DESCALE(tmp10 - temp2, 12) + sw s5, 32(t1) + sw s4, 64(t1) + sw s6, 0(t1) + bgtz t9, 1b + sw s7, 96(t1) + move t1, v1 + li s4, 15137 + lw s6, 8(t1) // wsptr[2] + li s5, 6270 + lw s7, 24(t1) // wsptr[6] + mul s4, s4, s6 // MULTIPLY((JLONG)wsptr[2], FIX_1_847759065) + lw t2, 0(t1) // wsptr[0] + mul s5, s5, s7 // MULTIPLY((JLONG)wsptr[6], -FIX_0_765366865) + lh t5, 28(t1) // wsptr[7] + lh t6, 20(t1) // wsptr[5] + lh t7, 12(t1) // wsptr[3] + lh t8, 4(t1) // wsptr[1] + ins t5, t6, 16, 16 + ins t7, t8, 16, 16 + mult $ac0, zero, zero + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + mult $ac1, zero, zero + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + sll t2, t2, 14 // tmp0 = ((JLONG)wsptr[0]) << (CONST_BITS+1) + mflo s6, $ac0 + // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865) + subu s4, s4, s5 + addu t3, t2, s4 // tmp10 = tmp0 + z2 + mflo s7, $ac1 + subu t4, t2, s4 // tmp10 = tmp0 - z2 + addu t7, t4, s6 + subu t8, t4, s6 + addu t5, t3, s7 + subu t6, t3, s7 + shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, 19) + shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, 19) + shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, 19) + shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, 19) + sll s4, t9, 2 + lw v0, 0(a2) // output_buf[ctr] + shll_s.w t5, t5, 24 + shll_s.w t6, t6, 24 + shll_s.w t7, t7, 24 + shll_s.w t8, t8, 24 + sra t5, t5, 24 + sra t6, t6, 24 + sra t7, t7, 24 + sra t8, t8, 24 + addu v0, v0, a3 // outptr = output_buf[ctr] + output_col + addiu t5, t5, 128 + addiu t6, t6, 128 + addiu t7, t7, 128 + addiu t8, t8, 128 + sb t5, 0(v0) + sb t7, 1(v0) + sb t8, 2(v0) + sb t6, 3(v0) + // 2 + li s4, 15137 + lw s6, 40(t1) // wsptr[2] + li s5, 6270 + lw s7, 56(t1) // wsptr[6] + mul s4, s4, s6 // MULTIPLY((JLONG)wsptr[2], FIX_1_847759065) + lw t2, 32(t1) // wsptr[0] + mul s5, s5, s7 // MULTIPLY((JLONG)wsptr[6], -FIX_0_765366865) + lh t5, 60(t1) // wsptr[7] + lh t6, 52(t1) // wsptr[5] + lh t7, 44(t1) // wsptr[3] + lh t8, 36(t1) // wsptr[1] + ins t5, t6, 16, 16 + ins t7, t8, 16, 16 + mult $ac0, zero, zero + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + mult $ac1, zero, zero + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + sll t2, t2, 14 // tmp0 = ((JLONG)wsptr[0]) << (CONST_BITS+1) + mflo s6, $ac0 + // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865) + subu s4, s4, s5 + addu t3, t2, s4 // tmp10 = tmp0 + z2 + mflo s7, $ac1 + subu t4, t2, s4 // tmp10 = tmp0 - z2 + addu t7, t4, s6 + subu t8, t4, s6 + addu t5, t3, s7 + subu t6, t3, s7 + shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, CONST_BITS-PASS1_BITS+1) + shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, CONST_BITS-PASS1_BITS+1) + shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, CONST_BITS-PASS1_BITS+1) + shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, CONST_BITS-PASS1_BITS+1) + sll s4, t9, 2 + lw v0, 4(a2) // output_buf[ctr] + shll_s.w t5, t5, 24 + shll_s.w t6, t6, 24 + shll_s.w t7, t7, 24 + shll_s.w t8, t8, 24 + sra t5, t5, 24 + sra t6, t6, 24 + sra t7, t7, 24 + sra t8, t8, 24 + addu v0, v0, a3 // outptr = output_buf[ctr] + output_col + addiu t5, t5, 128 + addiu t6, t6, 128 + addiu t7, t7, 128 + addiu t8, t8, 128 + sb t5, 0(v0) + sb t7, 1(v0) + sb t8, 2(v0) + sb t6, 3(v0) + // 3 + li s4, 15137 + lw s6, 72(t1) // wsptr[2] + li s5, 6270 + lw s7, 88(t1) // wsptr[6] + mul s4, s4, s6 // MULTIPLY((JLONG)wsptr[2], FIX_1_847759065) + lw t2, 64(t1) // wsptr[0] + mul s5, s5, s7 // MULTIPLY((JLONG)wsptr[6], -FIX_0_765366865) + lh t5, 92(t1) // wsptr[7] + lh t6, 84(t1) // wsptr[5] + lh t7, 76(t1) // wsptr[3] + lh t8, 68(t1) // wsptr[1] + ins t5, t6, 16, 16 + ins t7, t8, 16, 16 + mult $ac0, zero, zero + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + mult $ac1, zero, zero + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + sll t2, t2, 14 // tmp0 = ((JLONG)wsptr[0]) << (CONST_BITS+1) + mflo s6, $ac0 + // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865) + subu s4, s4, s5 + addu t3, t2, s4 // tmp10 = tmp0 + z2 + mflo s7, $ac1 + subu t4, t2, s4 // tmp10 = tmp0 - z2 + addu t7, t4, s6 + subu t8, t4, s6 + addu t5, t3, s7 + subu t6, t3, s7 + shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, 19) + shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, 19) + shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, 19) + shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, 19) + sll s4, t9, 2 + lw v0, 8(a2) // output_buf[ctr] + shll_s.w t5, t5, 24 + shll_s.w t6, t6, 24 + shll_s.w t7, t7, 24 + shll_s.w t8, t8, 24 + sra t5, t5, 24 + sra t6, t6, 24 + sra t7, t7, 24 + sra t8, t8, 24 + addu v0, v0, a3 // outptr = output_buf[ctr] + output_col + addiu t5, t5, 128 + addiu t6, t6, 128 + addiu t7, t7, 128 + addiu t8, t8, 128 + sb t5, 0(v0) + sb t7, 1(v0) + sb t8, 2(v0) + sb t6, 3(v0) + li s4, 15137 + lw s6, 104(t1) // wsptr[2] + li s5, 6270 + lw s7, 120(t1) // wsptr[6] + mul s4, s4, s6 // MULTIPLY((JLONG)wsptr[2], FIX_1_847759065) + lw t2, 96(t1) // wsptr[0] + mul s5, s5, s7 // MULTIPLY((JLONG)wsptr[6], -FIX_0_765366865) + lh t5, 124(t1) // wsptr[7] + lh t6, 116(t1) // wsptr[5] + lh t7, 108(t1) // wsptr[3] + lh t8, 100(t1) // wsptr[1] + ins t5, t6, 16, 16 + ins t7, t8, 16, 16 + mult $ac0, zero, zero + dpa.w.ph $ac0, t5, s0 + dpa.w.ph $ac0, t7, s1 + mult $ac1, zero, zero + dpa.w.ph $ac1, t5, s2 + dpa.w.ph $ac1, t7, s3 + sll t2, t2, 14 // tmp0 = ((JLONG)wsptr[0]) << (CONST_BITS+1) + mflo s6, $ac0 + // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865) + subu s4, s4, s5 + addu t3, t2, s4 // tmp10 = tmp0 + z2; + mflo s7, $ac1 + subu t4, t2, s4 // tmp10 = tmp0 - z2; + addu t7, t4, s6 + subu t8, t4, s6 + addu t5, t3, s7 + subu t6, t3, s7 + shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, 19) + shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, 19) + shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, 19) + shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, 19) + sll s4, t9, 2 + lw v0, 12(a2) // output_buf[ctr] + shll_s.w t5, t5, 24 + shll_s.w t6, t6, 24 + shll_s.w t7, t7, 24 + shll_s.w t8, t8, 24 + sra t5, t5, 24 + sra t6, t6, 24 + sra t7, t7, 24 + sra t8, t8, 24 + addu v0, v0, a3 // outptr = output_buf[ctr] + output_col + addiu t5, t5, 128 + addiu t6, t6, 128 + addiu t7, t7, 128 + addiu t8, t8, 128 + sb t5, 0(v0) + sb t7, 1(v0) + sb t8, 2(v0) + sb t6, 3(v0) + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop +END(jsimd_idct_4x4_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_idct_6x6_dspr2) +/* + * a0 = compptr->dct_table + * a1 = coef_block + * a2 = output_buf + * a3 = output_col + */ + .set at + + SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + addiu sp, sp, -144 + move v0, sp + addiu v1, v0, 24 + addiu t9, zero, 5793 + addiu s0, zero, 10033 + addiu s1, zero, 2998 + +1: + lh s2, 0(a0) // q0 = quantptr[ 0] + lh s3, 32(a0) // q1 = quantptr[16] + lh s4, 64(a0) // q2 = quantptr[32] + lh t2, 64(a1) // tmp2 = inptr[32] + lh t1, 32(a1) // tmp1 = inptr[16] + lh t0, 0(a1) // tmp0 = inptr[ 0] + mul t2, t2, s4 // tmp2 = tmp2 * q2 + mul t1, t1, s3 // tmp1 = tmp1 * q1 + mul t0, t0, s2 // tmp0 = tmp0 * q0 + lh t6, 16(a1) // z1 = inptr[ 8] + lh t8, 80(a1) // z3 = inptr[40] + lh t7, 48(a1) // z2 = inptr[24] + lh s2, 16(a0) // q0 = quantptr[ 8] + lh s4, 80(a0) // q2 = quantptr[40] + lh s3, 48(a0) // q1 = quantptr[24] + mul t2, t2, t9 // tmp2 = tmp2 * 5793 + mul t1, t1, s0 // tmp1 = tmp1 * 10033 + sll t0, t0, 13 // tmp0 = tmp0 << 13 + mul t6, t6, s2 // z1 = z1 * q0 + mul t8, t8, s4 // z3 = z3 * q2 + mul t7, t7, s3 // z2 = z2 * q1 + addu t3, t0, t2 // tmp10 = tmp0 + tmp2 + sll t2, t2, 1 // tmp2 = tmp2 << 2 + subu t4, t0, t2 // tmp11 = tmp0 - tmp2; + subu t5, t3, t1 // tmp12 = tmp10 - tmp1 + addu t3, t3, t1 // tmp10 = tmp10 + tmp1 + addu t1, t6, t8 // tmp1 = z1 + z3 + mul t1, t1, s1 // tmp1 = tmp1 * 2998 + shra_r.w t4, t4, 11 // tmp11 = (tmp11 + 1024) >> 11 + subu t2, t6, t8 // tmp2 = z1 - z3 + subu t2, t2, t7 // tmp2 = tmp2 - z2 + sll t2, t2, 2 // tmp2 = tmp2 << 2 + addu t0, t6, t7 // tmp0 = z1 + z2 + sll t0, t0, 13 // tmp0 = tmp0 << 13 + subu s2, t8, t7 // q0 = z3 - z2 + sll s2, s2, 13 // q0 = q0 << 13 + addu t0, t0, t1 // tmp0 = tmp0 + tmp1 + addu t1, s2, t1 // tmp1 = q0 + tmp1 + addu s2, t4, t2 // q0 = tmp11 + tmp2 + subu s3, t4, t2 // q1 = tmp11 - tmp2 + addu t6, t3, t0 // z1 = tmp10 + tmp0 + subu t7, t3, t0 // z2 = tmp10 - tmp0 + addu t4, t5, t1 // tmp11 = tmp12 + tmp1 + subu t5, t5, t1 // tmp12 = tmp12 - tmp1 + shra_r.w t6, t6, 11 // z1 = (z1 + 1024) >> 11 + shra_r.w t7, t7, 11 // z2 = (z2 + 1024) >> 11 + shra_r.w t4, t4, 11 // tmp11 = (tmp11 + 1024) >> 11 + shra_r.w t5, t5, 11 // tmp12 = (tmp12 + 1024) >> 11 + sw s2, 24(v0) + sw s3, 96(v0) + sw t6, 0(v0) + sw t7, 120(v0) + sw t4, 48(v0) + sw t5, 72(v0) + addiu v0, v0, 4 + addiu a1, a1, 2 + bne v0, v1, 1b + addiu a0, a0, 2 + + /* Pass 2: process 6 rows from work array, store into output array. */ + move v0, sp + addiu v1, v0, 144 + +2: + lw t0, 0(v0) + lw t2, 16(v0) + lw s5, 0(a2) + addiu t0, t0, 16 + sll t0, t0, 13 + mul t3, t2, t9 + lw t6, 4(v0) + lw t8, 20(v0) + lw t7, 12(v0) + addu s5, s5, a3 + addu s6, t6, t8 + mul s6, s6, s1 + addu t1, t0, t3 + subu t4, t0, t3 + subu t4, t4, t3 + lw t3, 8(v0) + mul t0, t3, s0 + addu s7, t6, t7 + sll s7, s7, 13 + addu s7, s6, s7 + subu t2, t8, t7 + sll t2, t2, 13 + addu t2, s6, t2 + subu s6, t6, t7 + subu s6, s6, t8 + sll s6, s6, 13 + addu t3, t1, t0 + subu t5, t1, t0 + addu t6, t3, s7 + subu t3, t3, s7 + addu t7, t4, s6 + subu t4, t4, s6 + addu t8, t5, t2 + subu t5, t5, t2 + shll_s.w t6, t6, 6 + shll_s.w t3, t3, 6 + shll_s.w t7, t7, 6 + shll_s.w t4, t4, 6 + shll_s.w t8, t8, 6 + shll_s.w t5, t5, 6 + sra t6, t6, 24 + addiu t6, t6, 128 + sra t3, t3, 24 + addiu t3, t3, 128 + sb t6, 0(s5) + sra t7, t7, 24 + addiu t7, t7, 128 + sb t3, 5(s5) + sra t4, t4, 24 + addiu t4, t4, 128 + sb t7, 1(s5) + sra t8, t8, 24 + addiu t8, t8, 128 + sb t4, 4(s5) + addiu v0, v0, 24 + sra t5, t5, 24 + addiu t5, t5, 128 + sb t8, 2(s5) + addiu a2, a2, 4 + bne v0, v1, 2b + sb t5, 3(s5) + + addiu sp, sp, 144 + + RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7 + + j ra + nop + +END(jsimd_idct_6x6_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_idct_12x12_pass1_dspr2) +/* + * a0 = compptr->dct_table + * a1 = coef_block + * a2 = workspace + */ + SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 + + li a3, 8 + +1: + // odd part + lh t0, 48(a1) + lh t1, 48(a0) + lh t2, 16(a1) + lh t3, 16(a0) + lh t4, 80(a1) + lh t5, 80(a0) + lh t6, 112(a1) + lh t7, 112(a0) + mul t0, t0, t1 // z2 + mul t1, t2, t3 // z1 + mul t2, t4, t5 // z3 + mul t3, t6, t7 // z4 + li t4, 10703 // FIX(1.306562965) + li t5, 4433 // FIX_0_541196100 + li t6, 7053 // FIX(0.860918669) + mul t4, t0, t4 // tmp11 + mul t5, t0, t5 // -tmp14 + addu t7, t1, t2 // tmp10 + addu t8, t7, t3 // tmp10 + z4 + mul t6, t6, t8 // tmp15 + li t8, 2139 // FIX(0.261052384) + mul t8, t7, t8 // MULTIPLY(tmp10, FIX(0.261052384)) + li t7, 2295 // FIX(0.280143716) + mul t7, t1, t7 // MULTIPLY(z1, FIX(0.280143716)) + addu t9, t2, t3 // z3 + z4 + li s0, 8565 // FIX(1.045510580) + mul t9, t9, s0 // -tmp13 + li s0, 12112 // FIX(1.478575242) + mul s0, t2, s0 // MULTIPLY(z3, FIX(1.478575242) + li s1, 12998 // FIX(1.586706681) + mul s1, t3, s1 // MULTIPLY(z4, FIX(1.586706681)) + li s2, 5540 // FIX(0.676326758) + mul s2, t1, s2 // MULTIPLY(z1, FIX(0.676326758)) + li s3, 16244 // FIX(1.982889723) + mul s3, t3, s3 // MULTIPLY(z4, FIX(1.982889723)) + subu t1, t1, t3 // z1-=z4 + subu t0, t0, t2 // z2-=z3 + addu t2, t0, t1 // z1+z2 + li t3, 4433 // FIX_0_541196100 + mul t2, t2, t3 // z3 + li t3, 6270 // FIX_0_765366865 + mul t1, t1, t3 // MULTIPLY(z1, FIX_0_765366865) + li t3, 15137 // FIX_0_765366865 + mul t0, t0, t3 // MULTIPLY(z2, FIX_1_847759065) + addu t8, t6, t8 // tmp12 + addu t3, t8, t4 // tmp12 + tmp11 + addu t3, t3, t7 // tmp10 + subu t8, t8, t9 // tmp12 + tmp13 + addu s0, t5, s0 + subu t8, t8, s0 // tmp12 + subu t9, t6, t9 + subu s1, s1, t4 + addu t9, t9, s1 // tmp13 + subu t6, t6, t5 + subu t6, t6, s2 + subu t6, t6, s3 // tmp15 + // even part start + lh t4, 64(a1) + lh t5, 64(a0) + lh t7, 32(a1) + lh s0, 32(a0) + lh s1, 0(a1) + lh s2, 0(a0) + lh s3, 96(a1) + lh v0, 96(a0) + mul t4, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]) + mul t5, t7, s0 // DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]) + mul t7, s1, s2 // DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) + mul s0, s3, v0 // DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]) + // odd part end + addu t1, t2, t1 // tmp11 + subu t0, t2, t0 // tmp14 + // update counter and pointers + addiu a3, a3, -1 + addiu a0, a0, 2 + addiu a1, a1, 2 + // even part rest + li s1, 10033 + li s2, 11190 + mul t4, t4, s1 // z4 + mul s1, t5, s2 // z4 + sll t5, t5, 13 // z1 + sll t7, t7, 13 + addiu t7, t7, 1024 // z3 + sll s0, s0, 13 // z2 + addu s2, t7, t4 // tmp10 + subu t4, t7, t4 // tmp11 + subu s3, t5, s0 // tmp12 + addu t2, t7, s3 // tmp21 + subu s3, t7, s3 // tmp24 + addu t7, s1, s0 // tmp12 + addu v0, s2, t7 // tmp20 + subu s2, s2, t7 // tmp25 + subu s1, s1, t5 // z4 - z1 + subu s1, s1, s0 // tmp12 + addu s0, t4, s1 // tmp22 + subu t4, t4, s1 // tmp23 + // final output stage + addu t5, v0, t3 + subu v0, v0, t3 + addu t3, t2, t1 + subu t2, t2, t1 + addu t1, s0, t8 + subu s0, s0, t8 + addu t8, t4, t9 + subu t4, t4, t9 + addu t9, s3, t0 + subu s3, s3, t0 + addu t0, s2, t6 + subu s2, s2, t6 + sra t5, t5, 11 + sra t3, t3, 11 + sra t1, t1, 11 + sra t8, t8, 11 + sra t9, t9, 11 + sra t0, t0, 11 + sra s2, s2, 11 + sra s3, s3, 11 + sra t4, t4, 11 + sra s0, s0, 11 + sra t2, t2, 11 + sra v0, v0, 11 + sw t5, 0(a2) + sw t3, 32(a2) + sw t1, 64(a2) + sw t8, 96(a2) + sw t9, 128(a2) + sw t0, 160(a2) + sw s2, 192(a2) + sw s3, 224(a2) + sw t4, 256(a2) + sw s0, 288(a2) + sw t2, 320(a2) + sw v0, 352(a2) + bgtz a3, 1b + addiu a2, a2, 4 + + RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 + + j ra + nop + +END(jsimd_idct_12x12_pass1_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_idct_12x12_pass2_dspr2) +/* + * a0 = workspace + * a1 = output + */ + SAVE_REGS_ON_STACK 16, s0, s1, s2, s3 + + li a3, 12 + +1: + // Odd part + lw t0, 12(a0) + lw t1, 4(a0) + lw t2, 20(a0) + lw t3, 28(a0) + li t4, 10703 // FIX(1.306562965) + li t5, 4433 // FIX_0_541196100 + mul t4, t0, t4 // tmp11 + mul t5, t0, t5 // -tmp14 + addu t6, t1, t2 // tmp10 + li t7, 2139 // FIX(0.261052384) + mul t7, t6, t7 // MULTIPLY(tmp10, FIX(0.261052384)) + addu t6, t6, t3 // tmp10 + z4 + li t8, 7053 // FIX(0.860918669) + mul t6, t6, t8 // tmp15 + li t8, 2295 // FIX(0.280143716) + mul t8, t1, t8 // MULTIPLY(z1, FIX(0.280143716)) + addu t9, t2, t3 // z3 + z4 + li s0, 8565 // FIX(1.045510580) + mul t9, t9, s0 // -tmp13 + li s0, 12112 // FIX(1.478575242) + mul s0, t2, s0 // MULTIPLY(z3, FIX(1.478575242)) + li s1, 12998 // FIX(1.586706681) + mul s1, t3, s1 // MULTIPLY(z4, FIX(1.586706681)) + li s2, 5540 // FIX(0.676326758) + mul s2, t1, s2 // MULTIPLY(z1, FIX(0.676326758)) + li s3, 16244 // FIX(1.982889723) + mul s3, t3, s3 // MULTIPLY(z4, FIX(1.982889723)) + subu t1, t1, t3 // z1 -= z4 + subu t0, t0, t2 // z2 -= z3 + addu t2, t1, t0 // z1 + z2 + li t3, 4433 // FIX_0_541196100 + mul t2, t2, t3 // z3 + li t3, 6270 // FIX_0_765366865 + mul t1, t1, t3 // MULTIPLY(z1, FIX_0_765366865) + li t3, 15137 // FIX_1_847759065 + mul t0, t0, t3 // MULTIPLY(z2, FIX_1_847759065) + addu t3, t6, t7 // tmp12 + addu t7, t3, t4 + addu t7, t7, t8 // tmp10 + subu t3, t3, t9 + subu t3, t3, t5 + subu t3, t3, s0 // tmp12 + subu t9, t6, t9 + subu t9, t9, t4 + addu t9, t9, s1 // tmp13 + subu t6, t6, t5 + subu t6, t6, s2 + subu t6, t6, s3 // tmp15 + addu t1, t2, t1 // tmp11 + subu t0, t2, t0 // tmp14 + // even part + lw t2, 16(a0) // z4 + lw t4, 8(a0) // z1 + lw t5, 0(a0) // z3 + lw t8, 24(a0) // z2 + li s0, 10033 // FIX(1.224744871) + li s1, 11190 // FIX(1.366025404) + mul t2, t2, s0 // z4 + mul s0, t4, s1 // z4 + addiu t5, t5, 0x10 + sll t5, t5, 13 // z3 + sll t4, t4, 13 // z1 + sll t8, t8, 13 // z2 + subu s1, t4, t8 // tmp12 + addu s2, t5, t2 // tmp10 + subu t2, t5, t2 // tmp11 + addu s3, t5, s1 // tmp21 + subu s1, t5, s1 // tmp24 + addu t5, s0, t8 // tmp12 + addu v0, s2, t5 // tmp20 + subu t5, s2, t5 // tmp25 + subu t4, s0, t4 + subu t4, t4, t8 // tmp12 + addu t8, t2, t4 // tmp22 + subu t2, t2, t4 // tmp23 + // increment counter and pointers + addiu a3, a3, -1 + addiu a0, a0, 32 + // Final stage + addu t4, v0, t7 + subu v0, v0, t7 + addu t7, s3, t1 + subu s3, s3, t1 + addu t1, t8, t3 + subu t8, t8, t3 + addu t3, t2, t9 + subu t2, t2, t9 + addu t9, s1, t0 + subu s1, s1, t0 + addu t0, t5, t6 + subu t5, t5, t6 + sll t4, t4, 4 + sll t7, t7, 4 + sll t1, t1, 4 + sll t3, t3, 4 + sll t9, t9, 4 + sll t0, t0, 4 + sll t5, t5, 4 + sll s1, s1, 4 + sll t2, t2, 4 + sll t8, t8, 4 + sll s3, s3, 4 + sll v0, v0, 4 + shll_s.w t4, t4, 2 + shll_s.w t7, t7, 2 + shll_s.w t1, t1, 2 + shll_s.w t3, t3, 2 + shll_s.w t9, t9, 2 + shll_s.w t0, t0, 2 + shll_s.w t5, t5, 2 + shll_s.w s1, s1, 2 + shll_s.w t2, t2, 2 + shll_s.w t8, t8, 2 + shll_s.w s3, s3, 2 + shll_s.w v0, v0, 2 + srl t4, t4, 24 + srl t7, t7, 24 + srl t1, t1, 24 + srl t3, t3, 24 + srl t9, t9, 24 + srl t0, t0, 24 + srl t5, t5, 24 + srl s1, s1, 24 + srl t2, t2, 24 + srl t8, t8, 24 + srl s3, s3, 24 + srl v0, v0, 24 + lw t6, 0(a1) + addiu t4, t4, 0x80 + addiu t7, t7, 0x80 + addiu t1, t1, 0x80 + addiu t3, t3, 0x80 + addiu t9, t9, 0x80 + addiu t0, t0, 0x80 + addiu t5, t5, 0x80 + addiu s1, s1, 0x80 + addiu t2, t2, 0x80 + addiu t8, t8, 0x80 + addiu s3, s3, 0x80 + addiu v0, v0, 0x80 + sb t4, 0(t6) + sb t7, 1(t6) + sb t1, 2(t6) + sb t3, 3(t6) + sb t9, 4(t6) + sb t0, 5(t6) + sb t5, 6(t6) + sb s1, 7(t6) + sb t2, 8(t6) + sb t8, 9(t6) + sb s3, 10(t6) + sb v0, 11(t6) + bgtz a3, 1b + addiu a1, a1, 4 + + RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3 + + jr ra + nop + +END(jsimd_idct_12x12_pass2_dspr2) + + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_convsamp_dspr2) +/* + * a0 = sample_data + * a1 = start_col + * a2 = workspace + */ + lw t0, 0(a0) + li t7, 0xff80ff80 + addu t0, t0, a1 + ulw t1, 0(t0) + ulw t2, 4(t0) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + lw t0, 4(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 0(a2) + usw t4, 4(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 8(a2) + usw t6, 12(a2) + + lw t0, 8(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 16(a2) + usw t4, 20(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 24(a2) + usw t6, 28(a2) + + lw t0, 12(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 32(a2) + usw t4, 36(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 40(a2) + usw t6, 44(a2) + + lw t0, 16(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 48(a2) + usw t4, 52(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 56(a2) + usw t6, 60(a2) + + lw t0, 20(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 64(a2) + usw t4, 68(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 72(a2) + usw t6, 76(a2) + + lw t0, 24(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 80(a2) + usw t4, 84(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 88(a2) + usw t6, 92(a2) + + lw t0, 28(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 96(a2) + usw t4, 100(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 104(a2) + usw t6, 108(a2) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 112(a2) + usw t4, 116(a2) + usw t5, 120(a2) + usw t6, 124(a2) + + j ra + nop + +END(jsimd_convsamp_dspr2) + + +#ifndef __mips_soft_float + +/*****************************************************************************/ +LEAF_DSPR2(jsimd_convsamp_float_dspr2) +/* + * a0 = sample_data + * a1 = start_col + * a2 = workspace + */ + .set at + + lw t0, 0(a0) + addu t0, t0, a1 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 4(a0) + swc1 f2, 0(a2) + swc1 f4, 4(a2) + swc1 f6, 8(a2) + addu t0, t0, a1 + swc1 f8, 12(a2) + swc1 f10, 16(a2) + swc1 f12, 20(a2) + swc1 f14, 24(a2) + swc1 f16, 28(a2) + // elemr 1 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 8(a0) + swc1 f2, 32(a2) + swc1 f4, 36(a2) + swc1 f6, 40(a2) + addu t0, t0, a1 + swc1 f8, 44(a2) + swc1 f10, 48(a2) + swc1 f12, 52(a2) + swc1 f14, 56(a2) + swc1 f16, 60(a2) + // elemr 2 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 12(a0) + swc1 f2, 64(a2) + swc1 f4, 68(a2) + swc1 f6, 72(a2) + addu t0, t0, a1 + swc1 f8, 76(a2) + swc1 f10, 80(a2) + swc1 f12, 84(a2) + swc1 f14, 88(a2) + swc1 f16, 92(a2) + // elemr 3 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 16(a0) + swc1 f2, 96(a2) + swc1 f4, 100(a2) + swc1 f6, 104(a2) + addu t0, t0, a1 + swc1 f8, 108(a2) + swc1 f10, 112(a2) + swc1 f12, 116(a2) + swc1 f14, 120(a2) + swc1 f16, 124(a2) + // elemr 4 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 20(a0) + swc1 f2, 128(a2) + swc1 f4, 132(a2) + swc1 f6, 136(a2) + addu t0, t0, a1 + swc1 f8, 140(a2) + swc1 f10, 144(a2) + swc1 f12, 148(a2) + swc1 f14, 152(a2) + swc1 f16, 156(a2) + // elemr 5 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 24(a0) + swc1 f2, 160(a2) + swc1 f4, 164(a2) + swc1 f6, 168(a2) + addu t0, t0, a1 + swc1 f8, 172(a2) + swc1 f10, 176(a2) + swc1 f12, 180(a2) + swc1 f14, 184(a2) + swc1 f16, 188(a2) + // elemr 6 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + lw t0, 28(a0) + swc1 f2, 192(a2) + swc1 f4, 196(a2) + swc1 f6, 200(a2) + addu t0, t0, a1 + swc1 f8, 204(a2) + swc1 f10, 208(a2) + swc1 f12, 212(a2) + swc1 f14, 216(a2) + swc1 f16, 220(a2) + // elemr 7 + lbu t1, 0(t0) + lbu t2, 1(t0) + lbu t3, 2(t0) + lbu t4, 3(t0) + lbu t5, 4(t0) + lbu t6, 5(t0) + lbu t7, 6(t0) + lbu t8, 7(t0) + addiu t1, t1, -128 + addiu t2, t2, -128 + addiu t3, t3, -128 + addiu t4, t4, -128 + addiu t5, t5, -128 + addiu t6, t6, -128 + addiu t7, t7, -128 + addiu t8, t8, -128 + mtc1 t1, f2 + mtc1 t2, f4 + mtc1 t3, f6 + mtc1 t4, f8 + mtc1 t5, f10 + mtc1 t6, f12 + mtc1 t7, f14 + mtc1 t8, f16 + cvt.s.w f2, f2 + cvt.s.w f4, f4 + cvt.s.w f6, f6 + cvt.s.w f8, f8 + cvt.s.w f10, f10 + cvt.s.w f12, f12 + cvt.s.w f14, f14 + cvt.s.w f16, f16 + swc1 f2, 224(a2) + swc1 f4, 228(a2) + swc1 f6, 232(a2) + swc1 f8, 236(a2) + swc1 f10, 240(a2) + swc1 f12, 244(a2) + swc1 f14, 248(a2) + swc1 f16, 252(a2) + + j ra + nop + +END(jsimd_convsamp_float_dspr2) + +#endif + +/*****************************************************************************/ diff --git a/simd/mips/jsimd_dspr2_asm.h b/simd/mips/jsimd_dspr2_asm.h new file mode 100644 index 0000000..12cfda4 --- /dev/null +++ b/simd/mips/jsimd_dspr2_asm.h @@ -0,0 +1,292 @@ +/* + * MIPS DSPr2 optimizations for libjpeg-turbo + * + * Copyright (C) 2013, MIPS Technologies, Inc., California. + * Copyright (C) 2018, Matthieu Darbois. + * All Rights Reserved. + * Authors: Teodora Novkovic (teodora.novkovic@imgtec.com) + * Darko Laus (darko.laus@imgtec.com) + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#define zero $0 +#define AT $1 +#define v0 $2 +#define v1 $3 +#define a0 $4 +#define a1 $5 +#define a2 $6 +#define a3 $7 +#define t0 $8 +#define t1 $9 +#define t2 $10 +#define t3 $11 +#define t4 $12 +#define t5 $13 +#define t6 $14 +#define t7 $15 +#define s0 $16 +#define s1 $17 +#define s2 $18 +#define s3 $19 +#define s4 $20 +#define s5 $21 +#define s6 $22 +#define s7 $23 +#define t8 $24 +#define t9 $25 +#define k0 $26 +#define k1 $27 +#define gp $28 +#define sp $29 +#define fp $30 +#define s8 $30 +#define ra $31 + +#define f0 $f0 +#define f1 $f1 +#define f2 $f2 +#define f3 $f3 +#define f4 $f4 +#define f5 $f5 +#define f6 $f6 +#define f7 $f7 +#define f8 $f8 +#define f9 $f9 +#define f10 $f10 +#define f11 $f11 +#define f12 $f12 +#define f13 $f13 +#define f14 $f14 +#define f15 $f15 +#define f16 $f16 +#define f17 $f17 +#define f18 $f18 +#define f19 $f19 +#define f20 $f20 +#define f21 $f21 +#define f22 $f22 +#define f23 $f23 +#define f24 $f24 +#define f25 $f25 +#define f26 $f26 +#define f27 $f27 +#define f28 $f28 +#define f29 $f29 +#define f30 $f30 +#define f31 $f31 + +#ifdef __ELF__ +#define HIDDEN_SYMBOL(symbol) .hidden symbol; +#else +#define HIDDEN_SYMBOL(symbol) +#endif + +/* + * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2 + */ +#define LEAF_MIPS32R2(symbol) \ + .globl symbol; \ + HIDDEN_SYMBOL(symbol) \ + .align 2; \ + .type symbol, @function; \ + .ent symbol, 0; \ +symbol: \ + .frame sp, 0, ra; \ + .set push; \ + .set arch = mips32r2; \ + .set noreorder; \ + .set noat; + +/* + * LEAF_DSPR2 - declare leaf routine for MIPS DSPr2 + */ +#define LEAF_DSPR2(symbol) \ +LEAF_MIPS32R2(symbol) \ + .set dspr2; + +/* + * END - mark end of function + */ +#define END(function) \ + .set pop; \ + .end function; \ + .size function, .-function + +/* + * Checks if stack offset is big enough for storing/restoring regs_num + * number of register to/from stack. Stack offset must be greater than + * or equal to the number of bytes needed for storing registers (regs_num*4). + * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is + * preserved for input arguments of the functions, already stored in a0-a3), + * stack size can be further optimized by utilizing this space. + */ +.macro CHECK_STACK_OFFSET regs_num, stack_offset +.if \stack_offset < \regs_num * 4 - 16 +.error "Stack offset too small." +.endif +.endm + +/* + * Saves set of registers on stack. Maximum number of registers that + * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). + * Stack offset is number of bytes that are added to stack pointer (sp) + * before registers are pushed in order to provide enough space on stack + * (offset must be multiple of 4, and must be big enough, as described by + * CHECK_STACK_OFFSET macro). This macro is intended to be used in + * combination with RESTORE_REGS_FROM_STACK macro. Example: + * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 + * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 + */ +.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \ + r2 = 0, r3 = 0, r4 = 0, \ + r5 = 0, r6 = 0, r7 = 0, \ + r8 = 0, r9 = 0, r10 = 0, \ + r11 = 0, r12 = 0, r13 = 0, \ + r14 = 0 +.if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4) + .error "Stack offset must be pozitive and multiple of 4." +.endif +.if \stack_offset != 0 + addiu sp, sp, -\stack_offset +.endif + sw \r1, 0(sp) +.if \r2 != 0 + sw \r2, 4(sp) +.endif +.if \r3 != 0 + sw \r3, 8(sp) +.endif +.if \r4 != 0 + sw \r4, 12(sp) +.endif +.if \r5 != 0 + CHECK_STACK_OFFSET 5, \stack_offset + sw \r5, 16(sp) +.endif +.if \r6 != 0 + CHECK_STACK_OFFSET 6, \stack_offset + sw \r6, 20(sp) +.endif +.if \r7 != 0 + CHECK_STACK_OFFSET 7, \stack_offset + sw \r7, 24(sp) +.endif +.if \r8 != 0 + CHECK_STACK_OFFSET 8, \stack_offset + sw \r8, 28(sp) +.endif +.if \r9 != 0 + CHECK_STACK_OFFSET 9, \stack_offset + sw \r9, 32(sp) +.endif +.if \r10 != 0 + CHECK_STACK_OFFSET 10, \stack_offset + sw \r10, 36(sp) +.endif +.if \r11 != 0 + CHECK_STACK_OFFSET 11, \stack_offset + sw \r11, 40(sp) +.endif +.if \r12 != 0 + CHECK_STACK_OFFSET 12, \stack_offset + sw \r12, 44(sp) +.endif +.if \r13 != 0 + CHECK_STACK_OFFSET 13, \stack_offset + sw \r13, 48(sp) +.endif +.if \r14 != 0 + CHECK_STACK_OFFSET 14, \stack_offset + sw \r14, 52(sp) +.endif +.endm + +/* + * Restores set of registers from stack. Maximum number of registers that + * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). + * Stack offset is number of bytes that are added to stack pointer (sp) + * after registers are restored (offset must be multiple of 4, and must + * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is + * intended to be used in combination with RESTORE_REGS_FROM_STACK macro. + * Example: + * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 + * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 + */ +.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \ + r2 = 0, r3 = 0, r4 = 0, \ + r5 = 0, r6 = 0, r7 = 0, \ + r8 = 0, r9 = 0, r10 = 0, \ + r11 = 0, r12 = 0, r13 = 0, \ + r14 = 0 +.if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4) + .error "Stack offset must be pozitive and multiple of 4." +.endif + lw \r1, 0(sp) +.if \r2 != 0 + lw \r2, 4(sp) +.endif +.if \r3 != 0 + lw \r3, 8(sp) +.endif +.if \r4 != 0 + lw \r4, 12(sp) +.endif +.if \r5 != 0 + CHECK_STACK_OFFSET 5, \stack_offset + lw \r5, 16(sp) +.endif +.if \r6 != 0 + CHECK_STACK_OFFSET 6, \stack_offset + lw \r6, 20(sp) +.endif +.if \r7 != 0 + CHECK_STACK_OFFSET 7, \stack_offset + lw \r7, 24(sp) +.endif +.if \r8 != 0 + CHECK_STACK_OFFSET 8, \stack_offset + lw \r8, 28(sp) +.endif +.if \r9 != 0 + CHECK_STACK_OFFSET 9, \stack_offset + lw \r9, 32(sp) +.endif +.if \r10 != 0 + CHECK_STACK_OFFSET 10, \stack_offset + lw \r10, 36(sp) +.endif +.if \r11 != 0 + CHECK_STACK_OFFSET 11, \stack_offset + lw \r11, 40(sp) +.endif +.if \r12 != 0 + CHECK_STACK_OFFSET 12, \stack_offset + lw \r12, 44(sp) +.endif +.if \r13 != 0 + CHECK_STACK_OFFSET 13, \stack_offset + lw \r13, 48(sp) +.endif +.if \r14 != 0 + CHECK_STACK_OFFSET 14, \stack_offset + lw \r14, 52(sp) +.endif +.if \stack_offset != 0 + addiu sp, sp, \stack_offset +.endif +.endm diff --git a/simd/nasm/jcolsamp.inc b/simd/nasm/jcolsamp.inc new file mode 100644 index 0000000..a2d5b49 --- /dev/null +++ b/simd/nasm/jcolsamp.inc @@ -0,0 +1,137 @@ +; +; jcolsamp.inc - private declarations for color conversion & up/downsampling +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; [TAB8] + +; -------------------------------------------------------------------------- + +; pseudo-resisters to make ordering of RGB configurable +; +%if RGB_RED == 0 +%define mmA mm0 +%define mmB mm1 +%define xmmA xmm0 +%define xmmB xmm1 +%define ymmA ymm0 +%define ymmB ymm1 +%elif RGB_GREEN == 0 +%define mmA mm2 +%define mmB mm3 +%define xmmA xmm2 +%define xmmB xmm3 +%define ymmA ymm2 +%define ymmB ymm3 +%elif RGB_BLUE == 0 +%define mmA mm4 +%define mmB mm5 +%define xmmA xmm4 +%define xmmB xmm5 +%define ymmA ymm4 +%define ymmB ymm5 +%else +%define mmA mm6 +%define mmB mm7 +%define xmmA xmm6 +%define xmmB xmm7 +%define ymmA ymm6 +%define ymmB ymm7 +%endif + +%if RGB_RED == 1 +%define mmC mm0 +%define mmD mm1 +%define xmmC xmm0 +%define xmmD xmm1 +%define ymmC ymm0 +%define ymmD ymm1 +%elif RGB_GREEN == 1 +%define mmC mm2 +%define mmD mm3 +%define xmmC xmm2 +%define xmmD xmm3 +%define ymmC ymm2 +%define ymmD ymm3 +%elif RGB_BLUE == 1 +%define mmC mm4 +%define mmD mm5 +%define xmmC xmm4 +%define xmmD xmm5 +%define ymmC ymm4 +%define ymmD ymm5 +%else +%define mmC mm6 +%define mmD mm7 +%define xmmC xmm6 +%define xmmD xmm7 +%define ymmC ymm6 +%define ymmD ymm7 +%endif + +%if RGB_RED == 2 +%define mmE mm0 +%define mmF mm1 +%define xmmE xmm0 +%define xmmF xmm1 +%define ymmE ymm0 +%define ymmF ymm1 +%elif RGB_GREEN == 2 +%define mmE mm2 +%define mmF mm3 +%define xmmE xmm2 +%define xmmF xmm3 +%define ymmE ymm2 +%define ymmF ymm3 +%elif RGB_BLUE == 2 +%define mmE mm4 +%define mmF mm5 +%define xmmE xmm4 +%define xmmF xmm5 +%define ymmE ymm4 +%define ymmF ymm5 +%else +%define mmE mm6 +%define mmF mm7 +%define xmmE xmm6 +%define xmmF xmm7 +%define ymmE ymm6 +%define ymmF ymm7 +%endif + +%if RGB_RED == 3 +%define mmG mm0 +%define mmH mm1 +%define xmmG xmm0 +%define xmmH xmm1 +%define ymmG ymm0 +%define ymmH ymm1 +%elif RGB_GREEN == 3 +%define mmG mm2 +%define mmH mm3 +%define xmmG xmm2 +%define xmmH xmm3 +%define ymmG ymm2 +%define ymmH ymm3 +%elif RGB_BLUE == 3 +%define mmG mm4 +%define mmH mm5 +%define xmmG xmm4 +%define xmmH xmm5 +%define ymmG ymm4 +%define ymmH ymm5 +%else +%define mmG mm6 +%define mmH mm7 +%define xmmG xmm6 +%define xmmH xmm7 +%define ymmG ymm6 +%define ymmH ymm7 +%endif + +; -------------------------------------------------------------------------- diff --git a/simd/jdct.inc b/simd/nasm/jdct.inc similarity index 64% rename from simd/jdct.inc rename to simd/nasm/jdct.inc index b976107..79d5146 100644 --- a/simd/jdct.inc +++ b/simd/nasm/jdct.inc @@ -2,6 +2,7 @@ ; jdct.inc - private declarations for forward & reverse DCT subsystems ; ; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2018, D. R. Commander. ; ; Based on the x86 SIMD extension for IJG JPEG library ; Copyright (C) 1999-2006, MIYASAKA Masaru. @@ -17,11 +18,16 @@ ; %define RANGE_MASK (MAXJSAMPLE * 4 + 3) ; 2 bits wider than legal samples -%define ROW(n,b,s) ((b)+(n)*(s)) -%define COL(n,b,s) ((b)+(n)*(s)*DCTSIZE) +%define ROW(n, b, s) ((b) + (n) * (s)) +%define COL(n, b, s) ((b) + (n) * (s) * DCTSIZE) -%define DWBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_DWORD) -%define MMBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_MMWORD) -%define XMMBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_XMMWORD) +%define DWBLOCK(m, n, b, s) \ + ((b) + (m) * DCTSIZE * (s) + (n) * SIZEOF_DWORD) +%define MMBLOCK(m, n, b, s) \ + ((b) + (m) * DCTSIZE * (s) + (n) * SIZEOF_MMWORD) +%define XMMBLOCK(m, n, b, s) \ + ((b) + (m) * DCTSIZE * (s) + (n) * SIZEOF_XMMWORD) +%define YMMBLOCK(m, n, b, s) \ + ((b) + (m) * DCTSIZE * (s) + (n) * SIZEOF_YMMWORD) ; -------------------------------------------------------------------------- diff --git a/simd/jpeg_nbits_table.inc b/simd/nasm/jpeg_nbits_table.inc similarity index 92% rename from simd/jpeg_nbits_table.inc rename to simd/nasm/jpeg_nbits_table.inc index cbc6990..2ce6c28 100644 --- a/simd/jpeg_nbits_table.inc +++ b/simd/nasm/jpeg_nbits_table.inc @@ -1,4097 +1,4097 @@ -jpeg_nbits_table db \ - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, \ - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, \ - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, \ - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, \ - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, \ - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, \ - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, \ - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, \ - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ +jpeg_nbits_table db \ + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, \ + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, \ + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, \ + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, \ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, \ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, \ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, \ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, \ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 diff --git a/win/jsimdcfg.inc b/simd/nasm/jsimdcfg.inc old mode 100755 new mode 100644 similarity index 97% rename from win/jsimdcfg.inc rename to simd/nasm/jsimdcfg.inc index 9d4aede..667024a --- a/win/jsimdcfg.inc +++ b/simd/nasm/jsimdcfg.inc @@ -90,5 +90,4 @@ %define JSIMD_3DNOW 0x02 %define JSIMD_SSE 0x04 %define JSIMD_SSE2 0x08 -; Short forms of external names for systems with brain-damaged linkers. -; +%define JSIMD_AVX2 0x80 diff --git a/simd/nasm/jsimdcfg.inc.h b/simd/nasm/jsimdcfg.inc.h new file mode 100644 index 0000000..7ff7e29 --- /dev/null +++ b/simd/nasm/jsimdcfg.inc.h @@ -0,0 +1,131 @@ +// This file generates the include file for the assembly +// implementations by abusing the C preprocessor. +// +// Note: Some things are manually defined as they need to +// be mapped to NASM types. + +; +; Automatically generated include file from jsimdcfg.inc.h +; + +#define JPEG_INTERNALS + +#include "../jpeglib.h" +#include "../jconfig.h" +#include "../jmorecfg.h" +#include "jsimd.h" + +; +; -- jpeglib.h +; + +%define _cpp_protection_DCTSIZE DCTSIZE +%define _cpp_protection_DCTSIZE2 DCTSIZE2 + +; +; -- jmorecfg.h +; + +%define _cpp_protection_RGB_RED RGB_RED +%define _cpp_protection_RGB_GREEN RGB_GREEN +%define _cpp_protection_RGB_BLUE RGB_BLUE +%define _cpp_protection_RGB_PIXELSIZE RGB_PIXELSIZE + +%define _cpp_protection_EXT_RGB_RED EXT_RGB_RED +%define _cpp_protection_EXT_RGB_GREEN EXT_RGB_GREEN +%define _cpp_protection_EXT_RGB_BLUE EXT_RGB_BLUE +%define _cpp_protection_EXT_RGB_PIXELSIZE EXT_RGB_PIXELSIZE + +%define _cpp_protection_EXT_RGBX_RED EXT_RGBX_RED +%define _cpp_protection_EXT_RGBX_GREEN EXT_RGBX_GREEN +%define _cpp_protection_EXT_RGBX_BLUE EXT_RGBX_BLUE +%define _cpp_protection_EXT_RGBX_PIXELSIZE EXT_RGBX_PIXELSIZE + +%define _cpp_protection_EXT_BGR_RED EXT_BGR_RED +%define _cpp_protection_EXT_BGR_GREEN EXT_BGR_GREEN +%define _cpp_protection_EXT_BGR_BLUE EXT_BGR_BLUE +%define _cpp_protection_EXT_BGR_PIXELSIZE EXT_BGR_PIXELSIZE + +%define _cpp_protection_EXT_BGRX_RED EXT_BGRX_RED +%define _cpp_protection_EXT_BGRX_GREEN EXT_BGRX_GREEN +%define _cpp_protection_EXT_BGRX_BLUE EXT_BGRX_BLUE +%define _cpp_protection_EXT_BGRX_PIXELSIZE EXT_BGRX_PIXELSIZE + +%define _cpp_protection_EXT_XBGR_RED EXT_XBGR_RED +%define _cpp_protection_EXT_XBGR_GREEN EXT_XBGR_GREEN +%define _cpp_protection_EXT_XBGR_BLUE EXT_XBGR_BLUE +%define _cpp_protection_EXT_XBGR_PIXELSIZE EXT_XBGR_PIXELSIZE + +%define _cpp_protection_EXT_XRGB_RED EXT_XRGB_RED +%define _cpp_protection_EXT_XRGB_GREEN EXT_XRGB_GREEN +%define _cpp_protection_EXT_XRGB_BLUE EXT_XRGB_BLUE +%define _cpp_protection_EXT_XRGB_PIXELSIZE EXT_XRGB_PIXELSIZE + +%define RGBX_FILLER_0XFF 1 + +; Representation of a single sample (pixel element value). +; On this SIMD implementation, this must be 'unsigned char'. +; + +%define JSAMPLE byte ; unsigned char +%define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE) + +%define _cpp_protection_CENTERJSAMPLE CENTERJSAMPLE + +; Representation of a DCT frequency coefficient. +; On this SIMD implementation, this must be 'short'. +; +%define JCOEF word ; short +%define SIZEOF_JCOEF SIZEOF_WORD ; sizeof(JCOEF) + +; Datatype used for image dimensions. +; On this SIMD implementation, this must be 'unsigned int'. +; +%define JDIMENSION dword ; unsigned int +%define SIZEOF_JDIMENSION SIZEOF_DWORD ; sizeof(JDIMENSION) + +%define JSAMPROW POINTER ; JSAMPLE * (jpeglib.h) +%define JSAMPARRAY POINTER ; JSAMPROW * (jpeglib.h) +%define JSAMPIMAGE POINTER ; JSAMPARRAY * (jpeglib.h) +%define JCOEFPTR POINTER ; JCOEF * (jpeglib.h) +%define SIZEOF_JSAMPROW SIZEOF_POINTER ; sizeof(JSAMPROW) +%define SIZEOF_JSAMPARRAY SIZEOF_POINTER ; sizeof(JSAMPARRAY) +%define SIZEOF_JSAMPIMAGE SIZEOF_POINTER ; sizeof(JSAMPIMAGE) +%define SIZEOF_JCOEFPTR SIZEOF_POINTER ; sizeof(JCOEFPTR) + +; +; -- jdct.h +; + +; A forward DCT routine is given a pointer to a work area of type DCTELEM[]; +; the DCT is to be performed in-place in that buffer. +; To maximize parallelism, Type DCTELEM is changed to short (originally, int). +; +%define DCTELEM word ; short +%define SIZEOF_DCTELEM SIZEOF_WORD ; sizeof(DCTELEM) + +%define FAST_FLOAT FP32 ; float +%define SIZEOF_FAST_FLOAT SIZEOF_FP32 ; sizeof(FAST_FLOAT) + +; To maximize parallelism, Type MULTIPLIER is changed to short. +; +%define ISLOW_MULT_TYPE word ; must be short +%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_WORD ; sizeof(ISLOW_MULT_TYPE) + +%define IFAST_MULT_TYPE word ; must be short +%define SIZEOF_IFAST_MULT_TYPE SIZEOF_WORD ; sizeof(IFAST_MULT_TYPE) +%define IFAST_SCALE_BITS 2 ; fractional bits in scale factors + +%define FLOAT_MULT_TYPE FP32 ; must be float +%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FP32 ; sizeof(FLOAT_MULT_TYPE) + +; +; -- jsimd.h +; + +%define _cpp_protection_JSIMD_NONE JSIMD_NONE +%define _cpp_protection_JSIMD_MMX JSIMD_MMX +%define _cpp_protection_JSIMD_3DNOW JSIMD_3DNOW +%define _cpp_protection_JSIMD_SSE JSIMD_SSE +%define _cpp_protection_JSIMD_SSE2 JSIMD_SSE2 +%define _cpp_protection_JSIMD_AVX2 JSIMD_AVX2 diff --git a/simd/nasm/jsimdext.inc b/simd/nasm/jsimdext.inc new file mode 100644 index 0000000..b40901f --- /dev/null +++ b/simd/nasm/jsimdext.inc @@ -0,0 +1,476 @@ +; +; jsimdext.inc - common declarations +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2010, 2016, D. R. Commander. +; Copyright (C) 2018, Matthieu Darbois. +; +; Based on the x86 SIMD extension for IJG JPEG library - version 1.02 +; +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Permission is granted to anyone to use this software for any purpose, +; including commercial applications, and to alter it and redistribute it +; freely, subject to the following restrictions: +; +; 1. The origin of this software must not be misrepresented; you must not +; claim that you wrote the original software. If you use this software +; in a product, an acknowledgment in the product documentation would be +; appreciated but is not required. +; 2. Altered source versions must be plainly marked as such, and must not be +; misrepresented as being the original software. +; 3. This notice may not be removed or altered from any source distribution. +; +; [TAB8] + +; ========================================================================== +; System-dependent configurations + +%ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)-------- +; * Microsoft Visual C++ +; * MinGW (Minimalist GNU for Windows) +; * CygWin +; * LCC-Win32 + +; -- segment definition -- +; +%ifdef __YASM_VER__ +%define SEG_TEXT .text align=32 +%define SEG_CONST .rdata align=32 +%else +%define SEG_TEXT .text align=32 public use32 class=CODE +%define SEG_CONST .rdata align=32 public use32 class=CONST +%endif + +%elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)-------- +; * Microsoft Visual C++ + +; -- segment definition -- +; +%ifdef __YASM_VER__ +%define SEG_TEXT .text align=32 +%define SEG_CONST .rdata align=32 +%else +%define SEG_TEXT .text align=32 public use64 class=CODE +%define SEG_CONST .rdata align=32 public use64 class=CONST +%endif +%define EXTN(name) name ; foo() -> foo + +%elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)---------- +; * Borland C++ (Win32) + +; -- segment definition -- +; +%define SEG_TEXT _text align=32 public use32 class=CODE +%define SEG_CONST _data align=32 public use32 class=DATA + +%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ +; * Linux +; * *BSD family Unix using elf format +; * Unix System V, including Solaris x86, UnixWare and SCO Unix + +; mark stack as non-executable +section .note.GNU-stack noalloc noexec nowrite progbits + +; -- segment definition -- +; +%ifdef __x86_64__ +%define SEG_TEXT .text progbits align=32 +%define SEG_CONST .rodata progbits align=32 +%else +%define SEG_TEXT .text progbits alloc exec nowrite align=32 +%define SEG_CONST .rodata progbits alloc noexec nowrite align=32 +%endif + +; To make the code position-independent, append -DPIC to the commandline +; +%define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC +%define EXTN(name) name ; foo() -> foo + +%elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)---- +; * Older Linux using a.out format (nasm -f aout -DAOUT ...) +; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...) + +; -- segment definition -- +; +%define SEG_TEXT .text +%define SEG_CONST .data + +; To make the code position-independent, append -DPIC to the commandline +; +%define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC + +%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- +; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format) + +; -- segment definition -- +; +%define SEG_TEXT .text ;align=32 ; nasm doesn't accept align=32. why? +%define SEG_CONST .rodata align=32 + +; The generation of position-independent code (PIC) is the default on Darwin. +; +%define PIC +%define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing + +%else ; ----(Other case)---------------------- + +; -- segment definition -- +; +%define SEG_TEXT .text +%define SEG_CONST .data + +%endif ; ---------------------------------------------- + +; ========================================================================== + +; -------------------------------------------------------------------------- +; Common types +; +%ifdef __x86_64__ +%define POINTER qword ; general pointer type +%define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER) +%define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT +%else +%define POINTER dword ; general pointer type +%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER) +%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT +%endif + +%define INT dword ; signed integer type +%define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT) +%define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT + +%define FP32 dword ; IEEE754 single +%define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32) +%define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT + +%define MMWORD qword ; int64 (MMX register) +%define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD) +%define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT + +; NASM is buggy and doesn't properly handle operand sizes for SSE +; instructions, so for now we have to define XMMWORD as blank. +%define XMMWORD ; int128 (SSE register) +%define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD) +%define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT + +%define YMMWORD ; int256 (AVX register) +%define SIZEOF_YMMWORD SIZEOF_YWORD ; sizeof(YMMWORD) +%define YMMWORD_BIT YWORD_BIT ; sizeof(YMMWORD)*BYTE_BIT + +; Similar hacks for when we load a dword or MMWORD into an xmm# register +%define XMM_DWORD +%define XMM_MMWORD + +%define SIZEOF_BYTE 1 ; sizeof(BYTE) +%define SIZEOF_WORD 2 ; sizeof(WORD) +%define SIZEOF_DWORD 4 ; sizeof(DWORD) +%define SIZEOF_QWORD 8 ; sizeof(QWORD) +%define SIZEOF_OWORD 16 ; sizeof(OWORD) +%define SIZEOF_YWORD 32 ; sizeof(YWORD) + +%define BYTE_BIT 8 ; CHAR_BIT in C +%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT +%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT +%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT +%define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT +%define YWORD_BIT 256 ; sizeof(YWORD)*BYTE_BIT + +; -------------------------------------------------------------------------- +; External Symbol Name +; +%ifndef EXTN +%define EXTN(name) _ %+ name ; foo() -> _foo +%endif + +; -------------------------------------------------------------------------- +; Hidden symbols +; +%ifdef ELF ; ----(nasm -felf[64] -DELF ...)-------- +%define GLOBAL_FUNCTION(name) global EXTN(name):function hidden +%define GLOBAL_DATA(name) global EXTN(name):data hidden +%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- +%ifdef __YASM_VER__ +%define GLOBAL_FUNCTION(name) global EXTN(name):private_extern +%define GLOBAL_DATA(name) global EXTN(name):private_extern +%endif +%endif + +%ifndef GLOBAL_FUNCTION +%define GLOBAL_FUNCTION(name) global EXTN(name) +%endif +%ifndef GLOBAL_DATA +%define GLOBAL_DATA(name) global EXTN(name) +%endif + +; -------------------------------------------------------------------------- +; Macros for position-independent code (PIC) support +; +%ifndef GOT_SYMBOL +%undef PIC +%endif + +%ifdef PIC ; ------------------------------------------- + +%ifidn GOT_SYMBOL, _MACHO_PIC_ ; -------------------- + +; At present, nasm doesn't seem to support PIC generation for Mach-O. +; The PIC support code below is a little tricky. + + SECTION SEG_CONST +const_base: + +%define GOTOFF(got, sym) (got) + (sym) - const_base + +%imacro get_GOT 1 + ; NOTE: this macro destroys ecx resister. + call %%geteip + add ecx, byte (%%ref - $) + jmp short %%adjust +%%geteip: + mov ecx, POINTER [esp] + ret +%%adjust: + push ebp + xor ebp, ebp ; ebp = 0 +%ifidni %1, ebx ; (%1 == ebx) + ; db 0x8D,0x9C + jmp near const_base = + ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32) + db 0x8D, 0x9C ; 8D,9C + jmp near const_base ; E9,(const_base-%%ref) +%%ref: +%else ; (%1 != ebx) + ; db 0x8D,0x8C + jmp near const_base = + ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32) + db 0x8D, 0x8C ; 8D,8C + jmp near const_base ; E9,(const_base-%%ref) +%%ref: + mov %1, ecx +%endif ; (%1 == ebx) + pop ebp +%endmacro + +%else ; GOT_SYMBOL != _MACHO_PIC_ ---------------- + +%define GOTOFF(got, sym) (got) + (sym) wrt ..gotoff + +%imacro get_GOT 1 + extern GOT_SYMBOL + call %%geteip + add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc + jmp short %%done +%%geteip: + mov %1, POINTER [esp] + ret +%%done: +%endmacro + +%endif ; GOT_SYMBOL == _MACHO_PIC_ ---------------- + +%imacro pushpic 1.nolist + push %1 +%endmacro +%imacro poppic 1.nolist + pop %1 +%endmacro +%imacro movpic 2.nolist + mov %1, %2 +%endmacro + +%else ; !PIC ----------------------------------------- + +%define GOTOFF(got, sym) (sym) + +%imacro get_GOT 1.nolist +%endmacro +%imacro pushpic 1.nolist +%endmacro +%imacro poppic 1.nolist +%endmacro +%imacro movpic 2.nolist +%endmacro + +%endif ; PIC ----------------------------------------- + +; -------------------------------------------------------------------------- +; Align the next instruction on {2,4,8,16,..}-byte boundary. +; ".balign n,,m" in GNU as +; +%define MSKLE(x, y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16) +%define FILLB(b, n) (($$-(b)) & ((n)-1)) + +%imacro alignx 1-2.nolist 0xFFFF +%%bs: \ + times MSKLE(FILLB(%%bs, %1), %2) & MSKLE(16, FILLB($, %1)) & FILLB($, %1) \ + db 0x90 ; nop + times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 9 \ + db 0x8D, 0x9C, 0x23, 0x00, 0x00, 0x00, 0x00 ; lea ebx,[ebx+0x00000000] + times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 7 \ + db 0x8D, 0xAC, 0x25, 0x00, 0x00, 0x00, 0x00 ; lea ebp,[ebp+0x00000000] + times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 6 \ + db 0x8D, 0xAD, 0x00, 0x00, 0x00, 0x00 ; lea ebp,[ebp+0x00000000] + times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 4 \ + db 0x8D, 0x6C, 0x25, 0x00 ; lea ebp,[ebp+0x00] + times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 3 \ + db 0x8D, 0x6D, 0x00 ; lea ebp,[ebp+0x00] + times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 2 \ + db 0x8B, 0xED ; mov ebp,ebp + times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 1 \ + db 0x90 ; nop +%endmacro + +; Align the next data on {2,4,8,16,..}-byte boundary. +; +%imacro alignz 1.nolist + align %1, db 0 ; filling zeros +%endmacro + +%ifdef __x86_64__ + +%ifdef WIN64 + +%imacro collect_args 1 + sub rsp, SIZEOF_XMMWORD + movaps XMMWORD [rsp], xmm6 + sub rsp, SIZEOF_XMMWORD + movaps XMMWORD [rsp], xmm7 + mov r10, rcx +%if %1 > 1 + mov r11, rdx +%endif +%if %1 > 2 + push r12 + mov r12, r8 +%endif +%if %1 > 3 + push r13 + mov r13, r9 +%endif +%if %1 > 4 + push r14 + mov r14, [rax+48] +%endif +%if %1 > 5 + push r15 + mov r15, [rax+56] +%endif + push rsi + push rdi +%endmacro + +%imacro uncollect_args 1 + pop rdi + pop rsi +%if %1 > 5 + pop r15 +%endif +%if %1 > 4 + pop r14 +%endif +%if %1 > 3 + pop r13 +%endif +%if %1 > 2 + pop r12 +%endif + movaps xmm7, XMMWORD [rsp] + add rsp, SIZEOF_XMMWORD + movaps xmm6, XMMWORD [rsp] + add rsp, SIZEOF_XMMWORD +%endmacro + +%imacro push_xmm 1 + sub rsp, %1 * SIZEOF_XMMWORD + movaps XMMWORD [rsp+0*SIZEOF_XMMWORD], xmm8 +%if %1 > 1 + movaps XMMWORD [rsp+1*SIZEOF_XMMWORD], xmm9 +%endif +%if %1 > 2 + movaps XMMWORD [rsp+2*SIZEOF_XMMWORD], xmm10 +%endif +%if %1 > 3 + movaps XMMWORD [rsp+3*SIZEOF_XMMWORD], xmm11 +%endif +%endmacro + +%imacro pop_xmm 1 + movaps xmm8, XMMWORD [rsp+0*SIZEOF_XMMWORD] +%if %1 > 1 + movaps xmm9, XMMWORD [rsp+1*SIZEOF_XMMWORD] +%endif +%if %1 > 2 + movaps xmm10, XMMWORD [rsp+2*SIZEOF_XMMWORD] +%endif +%if %1 > 3 + movaps xmm11, XMMWORD [rsp+3*SIZEOF_XMMWORD] +%endif + add rsp, %1 * SIZEOF_XMMWORD +%endmacro + +%else + +%imacro collect_args 1 + push r10 + mov r10, rdi +%if %1 > 1 + push r11 + mov r11, rsi +%endif +%if %1 > 2 + push r12 + mov r12, rdx +%endif +%if %1 > 3 + push r13 + mov r13, rcx +%endif +%if %1 > 4 + push r14 + mov r14, r8 +%endif +%if %1 > 5 + push r15 + mov r15, r9 +%endif +%endmacro + +%imacro uncollect_args 1 +%if %1 > 5 + pop r15 +%endif +%if %1 > 4 + pop r14 +%endif +%if %1 > 3 + pop r13 +%endif +%if %1 > 2 + pop r12 +%endif +%if %1 > 1 + pop r11 +%endif + pop r10 +%endmacro + +%imacro push_xmm 1 +%endmacro + +%imacro pop_xmm 1 +%endmacro + +%endif + +%endif + +; -------------------------------------------------------------------------- +; Defines picked up from the C headers +; +%include "jsimdcfg.inc" + +; -------------------------------------------------------------------------- diff --git a/simd/nasm_lt.sh b/simd/nasm_lt.sh deleted file mode 100755 index 817be16..0000000 --- a/simd/nasm_lt.sh +++ /dev/null @@ -1,60 +0,0 @@ -#! /bin/sh -command="" -infile="" -o_opt=no -pic=no -while [ $# -gt 0 ]; do - case "$1" in - --silent) - exec > /dev/null - ;; - -DPIC|-fPIC|-fpic|-Kpic|-KPIC) - if [ "$pic" != "yes" ] ; then - command="$command -DPIC" - pic=yes - fi - ;; - -f|-fbin|-faout|-faoutb|-fcoff|-felf|-felf64|-fas86| \ - -fobj|-fwin32|-fwin64|-frdf|-fieee|-fmacho|-fmacho64) - # it's a file format specifier for nasm. - command="$command $1" - ;; - -f*) - # maybe a code-generation flag for gcc. - ;; - -[Ii]*) - incdir=`echo "$1" | sed 's/^-[Ii]//'` - if [ "x$incdir" = x -a "x$2" != x ] ; then - case "$2" in - -*) ;; - *) incdir="$2"; shift;; - esac - fi - if [ "x$incdir" != x ] ; then - # In the case of NASM, the trailing slash is necessary. - incdir=`echo "$incdir" | sed 's%/*$%/%'` - command="$command -I$incdir" - fi - ;; - -o*) - o_opt=yes - command="$command $1" - ;; - *.asm) - infile=$1 - command="$command $1" - ;; - *) - command="$command $1" - ;; - esac - shift -done -if [ "$o_opt" != yes ] ; then - # By default, NASM creates an output file - # in the same directory as the input file. - outfile="-o `echo $infile | sed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.o" - command="$command $outfile" -fi -echo $command -exec $command diff --git a/simd/jccolext-altivec.c b/simd/powerpc/jccolext-altivec.c similarity index 95% rename from simd/jccolext-altivec.c rename to simd/powerpc/jccolext-altivec.c index 849825e..170f90f 100644 --- a/simd/jccolext-altivec.c +++ b/simd/powerpc/jccolext-altivec.c @@ -24,9 +24,9 @@ /* This file is included by jccolor-altivec.c */ -void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf, - JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) +void jsimd_rgb_ycc_convert_altivec(JDIMENSION img_width, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) { JSAMPROW inptr, outptr0, outptr1, outptr2; int pitch = img_width * RGB_PIXELSIZE, num_cols; @@ -35,13 +35,13 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf, #endif unsigned char __attribute__((aligned(16))) tmpbuf[RGB_PIXELSIZE * 16]; - __vector unsigned char rgb0, rgb1 = {0}, rgb2 = {0}, + __vector unsigned char rgb0, rgb1 = { 0 }, rgb2 = { 0 }, rgbg0, rgbg1, rgbg2, rgbg3, y, cb, cr; #if __BIG_ENDIAN__ || RGB_PIXELSIZE == 4 - __vector unsigned char rgb3 = {0}; + __vector unsigned char rgb3 = { 0 }; #endif #if __BIG_ENDIAN__ && RGB_PIXELSIZE == 4 - __vector unsigned char rgb4 = {0}; + __vector unsigned char rgb4 = { 0 }; #endif __vector short rg0, rg1, rg2, rg3, bg0, bg1, bg2, bg3; __vector unsigned short yl, yh, crl, crh, cbl, cbh; @@ -57,9 +57,11 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf, pd_onehalfm1_cj = { __4X(ONE_HALF - 1 + (CENTERJSAMPLE << SCALEBITS)) }; __vector unsigned char pb_zero = { __16X(0) }, #if __BIG_ENDIAN__ - shift_pack_index = {0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29}; + shift_pack_index = + { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 }; #else - shift_pack_index = {2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31}; + shift_pack_index = + { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 }; #endif while (--num_rows >= 0) { diff --git a/simd/powerpc/jccolor-altivec.c b/simd/powerpc/jccolor-altivec.c new file mode 100644 index 0000000..d670dbc --- /dev/null +++ b/simd/powerpc/jccolor-altivec.c @@ -0,0 +1,116 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014, D. R. Commander. All Rights Reserved. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* RGB --> YCC CONVERSION */ + +#include "jsimd_altivec.h" + + +#define F_0_081 5329 /* FIX(0.08131) */ +#define F_0_114 7471 /* FIX(0.11400) */ +#define F_0_168 11059 /* FIX(0.16874) */ +#define F_0_250 16384 /* FIX(0.25000) */ +#define F_0_299 19595 /* FIX(0.29900) */ +#define F_0_331 21709 /* FIX(0.33126) */ +#define F_0_418 27439 /* FIX(0.41869) */ +#define F_0_500 32768 /* FIX(0.50000) */ +#define F_0_587 38470 /* FIX(0.58700) */ +#define F_0_337 (F_0_587 - F_0_250) /* FIX(0.58700) - FIX(0.25000) */ + +#define SCALEBITS 16 +#define ONE_HALF (1 << (SCALEBITS - 1)) + + +#define RGBG_INDEX0 \ + { 0, 1, 3, 4, 6, 7, 9, 10, 2, 1, 5, 4, 8, 7, 11, 10 } +#define RGBG_INDEX1 \ + { 12, 13, 15, 16, 18, 19, 21, 22, 14, 13, 17, 16, 20, 19, 23, 22 } +#define RGBG_INDEX2 \ + { 8, 9, 11, 12, 14, 15, 17, 18, 10, 9, 13, 12, 16, 15, 19, 18 } +#define RGBG_INDEX3 \ + { 4, 5, 7, 8, 10, 11, 13, 14, 6, 5, 9, 8, 12, 11, 15, 14 } +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE + +#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +#define jsimd_rgb_ycc_convert_altivec jsimd_extrgb_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX0 +#undef RGBG_INDEX1 +#undef RGBG_INDEX2 +#undef RGBG_INDEX3 +#undef jsimd_rgb_ycc_convert_altivec + +#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +#define RGBG_INDEX \ + { 0, 1, 4, 5, 8, 9, 12, 13, 2, 1, 6, 5, 10, 9, 14, 13 } +#define jsimd_rgb_ycc_convert_altivec jsimd_extrgbx_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_ycc_convert_altivec + +#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +#define RGBG_INDEX0 \ + { 2, 1, 5, 4, 8, 7, 11, 10, 0, 1, 3, 4, 6, 7, 9, 10 } +#define RGBG_INDEX1 \ + { 14, 13, 17, 16, 20, 19, 23, 22, 12, 13, 15, 16, 18, 19, 21, 22 } +#define RGBG_INDEX2 \ + { 10, 9, 13, 12, 16, 15, 19, 18, 8, 9, 11, 12, 14, 15, 17, 18 } +#define RGBG_INDEX3 \ + { 6, 5, 9, 8, 12, 11, 15, 14, 4, 5, 7, 8, 10, 11, 13, 14 } +#define jsimd_rgb_ycc_convert_altivec jsimd_extbgr_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX0 +#undef RGBG_INDEX1 +#undef RGBG_INDEX2 +#undef RGBG_INDEX3 +#undef jsimd_rgb_ycc_convert_altivec + +#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +#define RGBG_INDEX \ + { 2, 1, 6, 5, 10, 9, 14, 13, 0, 1, 4, 5, 8, 9, 12, 13 } +#define jsimd_rgb_ycc_convert_altivec jsimd_extbgrx_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_ycc_convert_altivec + +#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +#define RGBG_INDEX \ + { 3, 2, 7, 6, 11, 10, 15, 14, 1, 2, 5, 6, 9, 10, 13, 14 } +#define jsimd_rgb_ycc_convert_altivec jsimd_extxbgr_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_ycc_convert_altivec + +#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +#define RGBG_INDEX \ + { 1, 2, 5, 6, 9, 10, 13, 14, 3, 2, 7, 6, 11, 10, 15, 14 } +#define jsimd_rgb_ycc_convert_altivec jsimd_extxrgb_ycc_convert_altivec +#include "jccolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_ycc_convert_altivec diff --git a/simd/powerpc/jcgray-altivec.c b/simd/powerpc/jcgray-altivec.c new file mode 100644 index 0000000..a11a7e7 --- /dev/null +++ b/simd/powerpc/jcgray-altivec.c @@ -0,0 +1,111 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014, D. R. Commander. All Rights Reserved. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* RGB --> GRAYSCALE CONVERSION */ + +#include "jsimd_altivec.h" + + +#define F_0_114 7471 /* FIX(0.11400) */ +#define F_0_250 16384 /* FIX(0.25000) */ +#define F_0_299 19595 /* FIX(0.29900) */ +#define F_0_587 38470 /* FIX(0.58700) */ +#define F_0_337 (F_0_587 - F_0_250) /* FIX(0.58700) - FIX(0.25000) */ + +#define SCALEBITS 16 +#define ONE_HALF (1 << (SCALEBITS - 1)) + + +#define RGBG_INDEX0 \ + { 0, 1, 3, 4, 6, 7, 9, 10, 2, 1, 5, 4, 8, 7, 11, 10 } +#define RGBG_INDEX1 \ + { 12, 13, 15, 16, 18, 19, 21, 22, 14, 13, 17, 16, 20, 19, 23, 22 } +#define RGBG_INDEX2 \ + { 8, 9, 11, 12, 14, 15, 17, 18, 10, 9, 13, 12, 16, 15, 19, 18 } +#define RGBG_INDEX3 \ + { 4, 5, 7, 8, 10, 11, 13, 14, 6, 5, 9, 8, 12, 11, 15, 14 } +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE + +#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +#define jsimd_rgb_gray_convert_altivec jsimd_extrgb_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX0 +#undef RGBG_INDEX1 +#undef RGBG_INDEX2 +#undef RGBG_INDEX3 +#undef jsimd_rgb_gray_convert_altivec + +#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +#define RGBG_INDEX \ + { 0, 1, 4, 5, 8, 9, 12, 13, 2, 1, 6, 5, 10, 9, 14, 13 } +#define jsimd_rgb_gray_convert_altivec jsimd_extrgbx_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_gray_convert_altivec + +#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +#define RGBG_INDEX0 \ + { 2, 1, 5, 4, 8, 7, 11, 10, 0, 1, 3, 4, 6, 7, 9, 10 } +#define RGBG_INDEX1 \ + { 14, 13, 17, 16, 20, 19, 23, 22, 12, 13, 15, 16, 18, 19, 21, 22 } +#define RGBG_INDEX2 \ + { 10, 9, 13, 12, 16, 15, 19, 18, 8, 9, 11, 12, 14, 15, 17, 18 } +#define RGBG_INDEX3 \ + { 6, 5, 9, 8, 12, 11, 15, 14, 4, 5, 7, 8, 10, 11, 13, 14 } +#define jsimd_rgb_gray_convert_altivec jsimd_extbgr_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX0 +#undef RGBG_INDEX1 +#undef RGBG_INDEX2 +#undef RGBG_INDEX3 +#undef jsimd_rgb_gray_convert_altivec + +#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +#define RGBG_INDEX \ + { 2, 1, 6, 5, 10, 9, 14, 13, 0, 1, 4, 5, 8, 9, 12, 13 } +#define jsimd_rgb_gray_convert_altivec jsimd_extbgrx_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_gray_convert_altivec + +#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +#define RGBG_INDEX \ + { 3, 2, 7, 6, 11, 10, 15, 14, 1, 2, 5, 6, 9, 10, 13, 14 } +#define jsimd_rgb_gray_convert_altivec jsimd_extxbgr_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_gray_convert_altivec + +#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +#define RGBG_INDEX \ + { 1, 2, 5, 6, 9, 10, 13, 14, 3, 2, 7, 6, 11, 10, 15, 14 } +#define jsimd_rgb_gray_convert_altivec jsimd_extxrgb_gray_convert_altivec +#include "jcgryext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGBG_INDEX +#undef jsimd_rgb_gray_convert_altivec diff --git a/simd/jcgryext-altivec.c b/simd/powerpc/jcgryext-altivec.c similarity index 93% rename from simd/jcgryext-altivec.c rename to simd/powerpc/jcgryext-altivec.c index 7f8232b..b280cbb 100644 --- a/simd/jcgryext-altivec.c +++ b/simd/powerpc/jcgryext-altivec.c @@ -24,10 +24,9 @@ /* This file is included by jcgray-altivec.c */ -void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width, - JSAMPARRAY input_buf, - JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) +void jsimd_rgb_gray_convert_altivec(JDIMENSION img_width, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, + JDIMENSION output_row, int num_rows) { JSAMPROW inptr, outptr; int pitch = img_width * RGB_PIXELSIZE, num_cols; @@ -36,13 +35,13 @@ void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width, unsigned char __attribute__((aligned(16))) tmpbuf[RGB_PIXELSIZE * 16]; #endif - __vector unsigned char rgb0, rgb1 = {0}, rgb2 = {0}, + __vector unsigned char rgb0, rgb1 = { 0 }, rgb2 = { 0 }, rgbg0, rgbg1, rgbg2, rgbg3, y; #if __BIG_ENDIAN__ || RGB_PIXELSIZE == 4 - __vector unsigned char rgb3 = {0}; + __vector unsigned char rgb3 = { 0 }; #endif #if __BIG_ENDIAN__ && RGB_PIXELSIZE == 4 - __vector unsigned char rgb4 = {0}; + __vector unsigned char rgb4 = { 0 }; #endif __vector short rg0, rg1, rg2, rg3, bg0, bg1, bg2, bg3; __vector unsigned short yl, yh; @@ -54,9 +53,11 @@ void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width, __vector int pd_onehalf = { __4X(ONE_HALF) }; __vector unsigned char pb_zero = { __16X(0) }, #if __BIG_ENDIAN__ - shift_pack_index = {0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29}; + shift_pack_index = + { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 }; #else - shift_pack_index = {2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31}; + shift_pack_index = + { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 }; #endif while (--num_rows >= 0) { diff --git a/simd/jcsample-altivec.c b/simd/powerpc/jcsample-altivec.c similarity index 84% rename from simd/jcsample-altivec.c rename to simd/powerpc/jcsample-altivec.c index 11609d9..6e25b8d 100644 --- a/simd/jcsample-altivec.c +++ b/simd/powerpc/jcsample-altivec.c @@ -26,14 +26,15 @@ #include "jcsample.h" -void -jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, - JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data) +void jsimd_h2v1_downsample_altivec(JDIMENSION image_width, + int max_v_samp_factor, + JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, + JSAMPARRAY input_data, + JSAMPARRAY output_data) { int outrow, outcol; - JDIMENSION output_cols = width_blocks * DCTSIZE; + JDIMENSION output_cols = width_in_blocks * DCTSIZE; JSAMPROW inptr, outptr; __vector unsigned char this0, next0, out; @@ -43,7 +44,7 @@ jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, __vector unsigned short pw_bias = { __4X2(0, 1) }, pw_one = { __8X(1) }; __vector unsigned char even_odd_index = - {0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15}, + { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 }, pb_zero = { __16X(0) }; expand_right_edge(input_data, max_v_samp_factor, image_width, @@ -83,13 +84,13 @@ jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, void -jsimd_h2v2_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, - JDIMENSION v_samp_factor, - JDIMENSION width_blocks, - JSAMPARRAY input_data, JSAMPARRAY output_data) +jsimd_h2v2_downsample_altivec(JDIMENSION image_width, int max_v_samp_factor, + JDIMENSION v_samp_factor, + JDIMENSION width_in_blocks, + JSAMPARRAY input_data, JSAMPARRAY output_data) { int inrow, outrow, outcol; - JDIMENSION output_cols = width_blocks * DCTSIZE; + JDIMENSION output_cols = width_in_blocks * DCTSIZE; JSAMPROW inptr0, inptr1, outptr; __vector unsigned char this0, next0, this1, next1, out; @@ -100,7 +101,7 @@ jsimd_h2v2_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, __vector unsigned short pw_bias = { __4X2(1, 2) }, pw_two = { __8X(2) }; __vector unsigned char even_odd_index = - { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 }, + { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 }, pb_zero = { __16X(0) }; expand_right_edge(input_data, max_v_samp_factor, image_width, diff --git a/simd/powerpc/jcsample.h b/simd/powerpc/jcsample.h new file mode 100644 index 0000000..2ac4816 --- /dev/null +++ b/simd/powerpc/jcsample.h @@ -0,0 +1,28 @@ +/* + * jcsample.h + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1991-1996, Thomas G. Lane. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + */ + +LOCAL(void) +expand_right_edge(JSAMPARRAY image_data, int num_rows, JDIMENSION input_cols, + JDIMENSION output_cols) +{ + register JSAMPROW ptr; + register JSAMPLE pixval; + register int count; + int row; + int numcols = (int)(output_cols - input_cols); + + if (numcols > 0) { + for (row = 0; row < num_rows; row++) { + ptr = image_data[row] + input_cols; + pixval = ptr[-1]; /* don't need GETJSAMPLE() here */ + for (count = numcols; count > 0; count--) + *ptr++ = pixval; + } + } +} diff --git a/simd/jdcolext-altivec.c b/simd/powerpc/jdcolext-altivec.c similarity index 96% rename from simd/jdcolext-altivec.c rename to simd/powerpc/jdcolext-altivec.c index fb121ce..68d52bd 100644 --- a/simd/jdcolext-altivec.c +++ b/simd/powerpc/jdcolext-altivec.c @@ -23,9 +23,9 @@ /* This file is included by jdcolor-altivec.c */ -void jsimd_ycc_rgb_convert_altivec (JDIMENSION out_width, JSAMPIMAGE input_buf, - JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +void jsimd_ycc_rgb_convert_altivec(JDIMENSION out_width, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) { JSAMPROW outptr, inptr0, inptr1, inptr2; int pitch = out_width * RGB_PIXELSIZE, num_cols; @@ -61,9 +61,11 @@ void jsimd_ycc_rgb_convert_altivec (JDIMENSION out_width, JSAMPIMAGE input_buf, __vector int pd_onehalf = { __4X(ONE_HALF) }; __vector unsigned char pb_zero = { __16X(0) }, #if __BIG_ENDIAN__ - shift_pack_index = {0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29}; + shift_pack_index = + { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 }; #else - shift_pack_index = {2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31}; + shift_pack_index = + { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 }; #endif while (--num_rows >= 0) { diff --git a/simd/powerpc/jdcolor-altivec.c b/simd/powerpc/jdcolor-altivec.c new file mode 100644 index 0000000..eb35b67 --- /dev/null +++ b/simd/powerpc/jdcolor-altivec.c @@ -0,0 +1,106 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2015, D. R. Commander. All Rights Reserved. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* YCC --> RGB CONVERSION */ + +#include "jsimd_altivec.h" + + +#define F_0_344 22554 /* FIX(0.34414) */ +#define F_0_714 46802 /* FIX(0.71414) */ +#define F_1_402 91881 /* FIX(1.40200) */ +#define F_1_772 116130 /* FIX(1.77200) */ +#define F_0_402 (F_1_402 - 65536) /* FIX(1.40200) - FIX(1) */ +#define F_0_285 (65536 - F_0_714) /* FIX(1) - FIX(0.71414) */ +#define F_0_228 (131072 - F_1_772) /* FIX(2) - FIX(1.77200) */ + +#define SCALEBITS 16 +#define ONE_HALF (1 << (SCALEBITS - 1)) + +#define RGB_INDEX0 \ + { 0, 1, 8, 2, 3, 10, 4, 5, 12, 6, 7, 14, 16, 17, 24, 18 } +#define RGB_INDEX1 \ + { 3, 10, 4, 5, 12, 6, 7, 14, 16, 17, 24, 18, 19, 26, 20, 21 } +#define RGB_INDEX2 \ + { 12, 6, 7, 14, 16, 17, 24, 18, 19, 26, 20, 21, 28, 22, 23, 30 } +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE + +#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extrgb_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX0 +#undef RGB_INDEX1 +#undef RGB_INDEX2 +#undef jsimd_ycc_rgb_convert_altivec + +#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +#define RGB_INDEX \ + { 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15 } +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extrgbx_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_ycc_rgb_convert_altivec + +#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +#define RGB_INDEX0 \ + { 8, 1, 0, 10, 3, 2, 12, 5, 4, 14, 7, 6, 24, 17, 16, 26 } +#define RGB_INDEX1 \ + { 3, 2, 12, 5, 4, 14, 7, 6, 24, 17, 16, 26, 19, 18, 28, 21 } +#define RGB_INDEX2 \ + { 4, 14, 7, 6, 24, 17, 16, 26, 19, 18, 28, 21, 20, 30, 23, 22 } +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extbgr_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX0 +#undef RGB_INDEX1 +#undef RGB_INDEX2 +#undef jsimd_ycc_rgb_convert_altivec + +#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +#define RGB_INDEX \ + { 8, 1, 0, 9, 10, 3, 2, 11, 12, 5, 4, 13, 14, 7, 6, 15 } +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extbgrx_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_ycc_rgb_convert_altivec + +#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +#define RGB_INDEX \ + { 9, 8, 1, 0, 11, 10, 3, 2, 13, 12, 5, 4, 15, 14, 7, 6 } +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extxbgr_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_ycc_rgb_convert_altivec + +#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +#define RGB_INDEX \ + { 9, 0, 1, 8, 11, 2, 3, 10, 13, 4, 5, 12, 15, 6, 7, 14 } +#define jsimd_ycc_rgb_convert_altivec jsimd_ycc_extxrgb_convert_altivec +#include "jdcolext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_ycc_rgb_convert_altivec diff --git a/simd/powerpc/jdmerge-altivec.c b/simd/powerpc/jdmerge-altivec.c new file mode 100644 index 0000000..79c577f --- /dev/null +++ b/simd/powerpc/jdmerge-altivec.c @@ -0,0 +1,130 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2015, D. R. Commander. All Rights Reserved. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* MERGED YCC --> RGB CONVERSION AND UPSAMPLING */ + +#include "jsimd_altivec.h" + + +#define F_0_344 22554 /* FIX(0.34414) */ +#define F_0_714 46802 /* FIX(0.71414) */ +#define F_1_402 91881 /* FIX(1.40200) */ +#define F_1_772 116130 /* FIX(1.77200) */ +#define F_0_402 (F_1_402 - 65536) /* FIX(1.40200) - FIX(1) */ +#define F_0_285 (65536 - F_0_714) /* FIX(1) - FIX(0.71414) */ +#define F_0_228 (131072 - F_1_772) /* FIX(2) - FIX(1.77200) */ + +#define SCALEBITS 16 +#define ONE_HALF (1 << (SCALEBITS - 1)) + +#define RGB_INDEX0 \ + { 0, 1, 8, 2, 3, 10, 4, 5, 12, 6, 7, 14, 16, 17, 24, 18 } +#define RGB_INDEX1 \ + { 3, 10, 4, 5, 12, 6, 7, 14, 16, 17, 24, 18, 19, 26, 20, 21 } +#define RGB_INDEX2 \ + { 12, 6, 7, 14, 16, 17, 24, 18, 19, 26, 20, 21, 28, 22, 23, 30 } +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE + +#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +#define jsimd_h2v1_merged_upsample_altivec \ + jsimd_h2v1_extrgb_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec \ + jsimd_h2v2_extrgb_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX0 +#undef RGB_INDEX1 +#undef RGB_INDEX2 +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec + +#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +#define RGB_INDEX \ + { 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15 } +#define jsimd_h2v1_merged_upsample_altivec \ + jsimd_h2v1_extrgbx_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec \ + jsimd_h2v2_extrgbx_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec + +#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +#define RGB_INDEX0 \ + { 8, 1, 0, 10, 3, 2, 12, 5, 4, 14, 7, 6, 24, 17, 16, 26 } +#define RGB_INDEX1 \ + { 3, 2, 12, 5, 4, 14, 7, 6, 24, 17, 16, 26, 19, 18, 28, 21 } +#define RGB_INDEX2 \ + { 4, 14, 7, 6, 24, 17, 16, 26, 19, 18, 28, 21, 20, 30, 23, 22 } +#define jsimd_h2v1_merged_upsample_altivec \ + jsimd_h2v1_extbgr_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec \ + jsimd_h2v2_extbgr_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX0 +#undef RGB_INDEX1 +#undef RGB_INDEX2 +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec + +#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +#define RGB_INDEX \ + { 8, 1, 0, 9, 10, 3, 2, 11, 12, 5, 4, 13, 14, 7, 6, 15 } +#define jsimd_h2v1_merged_upsample_altivec \ + jsimd_h2v1_extbgrx_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec \ + jsimd_h2v2_extbgrx_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec + +#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +#define RGB_INDEX \ + { 9, 8, 1, 0, 11, 10, 3, 2, 13, 12, 5, 4, 15, 14, 7, 6 } +#define jsimd_h2v1_merged_upsample_altivec \ + jsimd_h2v1_extxbgr_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec \ + jsimd_h2v2_extxbgr_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec + +#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +#define RGB_INDEX \ + { 9, 0, 1, 8, 11, 2, 3, 10, 13, 4, 5, 12, 15, 6, 7, 14 } +#define jsimd_h2v1_merged_upsample_altivec \ + jsimd_h2v1_extxrgb_merged_upsample_altivec +#define jsimd_h2v2_merged_upsample_altivec \ + jsimd_h2v2_extxrgb_merged_upsample_altivec +#include "jdmrgext-altivec.c" +#undef RGB_PIXELSIZE +#undef RGB_INDEX +#undef jsimd_h2v1_merged_upsample_altivec +#undef jsimd_h2v2_merged_upsample_altivec diff --git a/simd/jdmrgext-altivec.c b/simd/powerpc/jdmrgext-altivec.c similarity index 91% rename from simd/jdmrgext-altivec.c rename to simd/powerpc/jdmrgext-altivec.c index 55205bb..40f02c3 100644 --- a/simd/jdmrgext-altivec.c +++ b/simd/powerpc/jdmrgext-altivec.c @@ -23,10 +23,10 @@ /* This file is included by jdmerge-altivec.c */ -void jsimd_h2v1_merged_upsample_altivec (JDIMENSION output_width, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +void jsimd_h2v1_merged_upsample_altivec(JDIMENSION output_width, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) { JSAMPROW outptr, inptr0, inptr1, inptr2; int pitch = output_width * RGB_PIXELSIZE, num_cols, yloop; @@ -63,13 +63,19 @@ void jsimd_h2v1_merged_upsample_altivec (JDIMENSION output_width, __vector int pd_onehalf = { __4X(ONE_HALF) }; __vector unsigned char pb_zero = { __16X(0) }, #if __BIG_ENDIAN__ - shift_pack_index = {0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29}, - even_index = {0,16,0,18,0,20,0,22,0,24,0,26,0,28,0,30}, - odd_index = {0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31}; + shift_pack_index = + { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 }, + even_index = + { 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, 0, 28, 0, 30 }, + odd_index = + { 0, 17, 0, 19, 0, 21, 0, 23, 0, 25, 0, 27, 0, 29, 0, 31 }; #else - shift_pack_index = {2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31}, - even_index = {16,0,18,0,20,0,22,0,24,0,26,0,28,0,30,0}, - odd_index = {17,0,19,0,21,0,23,0,25,0,27,0,29,0,31,0}; + shift_pack_index = + { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 }, + even_index = + { 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, 0, 28, 0, 30, 0 }, + odd_index = + { 17, 0, 19, 0, 21, 0, 23, 0, 25, 0, 27, 0, 29, 0, 31, 0 }; #endif inptr0 = input_buf[0][in_row_group_ctr]; @@ -299,10 +305,10 @@ void jsimd_h2v1_merged_upsample_altivec (JDIMENSION output_width, } -void jsimd_h2v2_merged_upsample_altivec (JDIMENSION output_width, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) +void jsimd_h2v2_merged_upsample_altivec(JDIMENSION output_width, + JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, + JSAMPARRAY output_buf) { JSAMPROW inptr, outptr; diff --git a/simd/jdsample-altivec.c b/simd/powerpc/jdsample-altivec.c similarity index 83% rename from simd/jdsample-altivec.c rename to simd/powerpc/jdsample-altivec.c index b40ce55..04df0cf 100644 --- a/simd/jdsample-altivec.c +++ b/simd/powerpc/jdsample-altivec.c @@ -25,31 +25,36 @@ #include "jsimd_altivec.h" -void -jsimd_h2v1_fancy_upsample_altivec (int max_v_samp_factor, - JDIMENSION downsampled_width, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +void jsimd_h2v1_fancy_upsample_altivec(int max_v_samp_factor, + JDIMENSION downsampled_width, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; JSAMPROW inptr, outptr; int inrow, incol; - __vector unsigned char this0, last0, p_last0, next0 = {0}, p_next0, + __vector unsigned char this0, last0, p_last0, next0 = { 0 }, p_next0, out; __vector short this0e, this0o, this0l, this0h, last0l, last0h, next0l, next0h, outle, outhe, outlo, outho; /* Constants */ __vector unsigned char pb_zero = { __16X(0) }, pb_three = { __16X(3) }, - last_index_col0 = {0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14}, - last_index = {15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30}, - next_index = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, - next_index_lastcol = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,15}, + last_index_col0 = + { 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }, + last_index = + { 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 }, + next_index = + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }, + next_index_lastcol = + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15 }, #if __BIG_ENDIAN__ - merge_pack_index = {1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31}; + merge_pack_index = + { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }; #else - merge_pack_index = {0,16,2,18,4,20,6,22,8,24,10,26,12,28,14,30}; + merge_pack_index = + { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }; #endif __vector short pw_one = { __8X(1) }, pw_two = { __8X(2) }; @@ -121,11 +126,10 @@ jsimd_h2v1_fancy_upsample_altivec (int max_v_samp_factor, } -void -jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor, - JDIMENSION downsampled_width, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +void jsimd_h2v2_fancy_upsample_altivec(int max_v_samp_factor, + JDIMENSION downsampled_width, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; JSAMPROW inptr_1, inptr0, inptr1, outptr0, outptr1; @@ -136,21 +140,27 @@ jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor, lastcolsum_1h, lastcolsum1h, p_lastcolsum_1l, p_lastcolsum_1h, p_lastcolsum1l, p_lastcolsum1h, thiscolsum_1l, thiscolsum_1h, thiscolsum1l, thiscolsum1h, - nextcolsum_1l = {0}, nextcolsum_1h = {0}, - nextcolsum1l = {0}, nextcolsum1h = {0}, + nextcolsum_1l = { 0 }, nextcolsum_1h = { 0 }, + nextcolsum1l = { 0 }, nextcolsum1h = { 0 }, p_nextcolsum_1l, p_nextcolsum_1h, p_nextcolsum1l, p_nextcolsum1h, tmpl, tmph, outle, outhe, outlo, outho; /* Constants */ __vector unsigned char pb_zero = { __16X(0) }, - last_index_col0 = {0,1,0,1,2,3,4,5,6,7,8,9,10,11,12,13}, - last_index={14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29}, - next_index = {2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17}, - next_index_lastcol = {2,3,4,5,6,7,8,9,10,11,12,13,14,15,14,15}, + last_index_col0 = + { 0, 1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 }, + last_index = + { 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 }, + next_index = + { 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 }, + next_index_lastcol = + { 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 14, 15 }, #if __BIG_ENDIAN__ - merge_pack_index = {1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31}; + merge_pack_index = + { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }; #else - merge_pack_index = {0,16,2,18,4,20,6,22,8,24,10,26,12,28,14,30}; + merge_pack_index = + { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }; #endif __vector short pw_zero = { __8X(0) }, pw_three = { __8X(3) }, pw_seven = { __8X(7) }, pw_eight = { __8X(8) }; @@ -306,11 +316,10 @@ jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor, /* These are rarely used (mainly just for decompressing YCCK images) */ -void -jsimd_h2v1_upsample_altivec (int max_v_samp_factor, - JDIMENSION output_width, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +void jsimd_h2v1_upsample_altivec(int max_v_samp_factor, + JDIMENSION output_width, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; JSAMPROW inptr, outptr; @@ -345,11 +354,10 @@ jsimd_h2v1_upsample_altivec (int max_v_samp_factor, } -void -jsimd_h2v2_upsample_altivec (int max_v_samp_factor, - JDIMENSION output_width, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +void jsimd_h2v2_upsample_altivec(int max_v_samp_factor, + JDIMENSION output_width, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr) { JSAMPARRAY output_data = *output_data_ptr; JSAMPROW inptr, outptr0, outptr1; diff --git a/simd/jfdctfst-altivec.c b/simd/powerpc/jfdctfst-altivec.c similarity index 68% rename from simd/jfdctfst-altivec.c rename to simd/powerpc/jfdctfst-altivec.c index 04157f7..ad9af81 100644 --- a/simd/jfdctfst-altivec.c +++ b/simd/powerpc/jfdctfst-altivec.c @@ -32,64 +32,62 @@ #include "jsimd_altivec.h" -#define F_0_382 98 /* FIX(0.382683433) */ -#define F_0_541 139 /* FIX(0.541196100) */ -#define F_0_707 181 /* FIX(0.707106781) */ -#define F_1_306 334 /* FIX(1.306562965) */ +#define F_0_382 98 /* FIX(0.382683433) */ +#define F_0_541 139 /* FIX(0.541196100) */ +#define F_0_707 181 /* FIX(0.707106781) */ +#define F_1_306 334 /* FIX(1.306562965) */ -#define CONST_BITS 8 -#define PRE_MULTIPLY_SCALE_BITS 2 -#define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS - 1) +#define CONST_BITS 8 +#define PRE_MULTIPLY_SCALE_BITS 2 +#define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS - 1) -#define DO_FDCT() \ -{ \ - /* Even part */ \ +#define DO_FDCT() { \ + /* Even part */ \ \ - tmp10 = vec_add(tmp0, tmp3); \ - tmp13 = vec_sub(tmp0, tmp3); \ - tmp11 = vec_add(tmp1, tmp2); \ - tmp12 = vec_sub(tmp1, tmp2); \ + tmp10 = vec_add(tmp0, tmp3); \ + tmp13 = vec_sub(tmp0, tmp3); \ + tmp11 = vec_add(tmp1, tmp2); \ + tmp12 = vec_sub(tmp1, tmp2); \ \ - out0 = vec_add(tmp10, tmp11); \ - out4 = vec_sub(tmp10, tmp11); \ + out0 = vec_add(tmp10, tmp11); \ + out4 = vec_sub(tmp10, tmp11); \ \ - z1 = vec_add(tmp12, tmp13); \ - z1 = vec_sl(z1, pre_multiply_scale_bits); \ - z1 = vec_madds(z1, pw_0707, pw_zero); \ + z1 = vec_add(tmp12, tmp13); \ + z1 = vec_sl(z1, pre_multiply_scale_bits); \ + z1 = vec_madds(z1, pw_0707, pw_zero); \ \ - out2 = vec_add(tmp13, z1); \ - out6 = vec_sub(tmp13, z1); \ + out2 = vec_add(tmp13, z1); \ + out6 = vec_sub(tmp13, z1); \ \ - /* Odd part */ \ + /* Odd part */ \ \ - tmp10 = vec_add(tmp4, tmp5); \ - tmp11 = vec_add(tmp5, tmp6); \ - tmp12 = vec_add(tmp6, tmp7); \ + tmp10 = vec_add(tmp4, tmp5); \ + tmp11 = vec_add(tmp5, tmp6); \ + tmp12 = vec_add(tmp6, tmp7); \ \ - tmp10 = vec_sl(tmp10, pre_multiply_scale_bits); \ - tmp12 = vec_sl(tmp12, pre_multiply_scale_bits); \ - z5 = vec_sub(tmp10, tmp12); \ - z5 = vec_madds(z5, pw_0382, pw_zero); \ + tmp10 = vec_sl(tmp10, pre_multiply_scale_bits); \ + tmp12 = vec_sl(tmp12, pre_multiply_scale_bits); \ + z5 = vec_sub(tmp10, tmp12); \ + z5 = vec_madds(z5, pw_0382, pw_zero); \ \ - z2 = vec_madds(tmp10, pw_0541, z5); \ - z4 = vec_madds(tmp12, pw_1306, z5); \ + z2 = vec_madds(tmp10, pw_0541, z5); \ + z4 = vec_madds(tmp12, pw_1306, z5); \ \ - tmp11 = vec_sl(tmp11, pre_multiply_scale_bits); \ - z3 = vec_madds(tmp11, pw_0707, pw_zero); \ + tmp11 = vec_sl(tmp11, pre_multiply_scale_bits); \ + z3 = vec_madds(tmp11, pw_0707, pw_zero); \ \ - z11 = vec_add(tmp7, z3); \ - z13 = vec_sub(tmp7, z3); \ + z11 = vec_add(tmp7, z3); \ + z13 = vec_sub(tmp7, z3); \ \ - out5 = vec_add(z13, z2); \ - out3 = vec_sub(z13, z2); \ - out1 = vec_add(z11, z4); \ - out7 = vec_sub(z11, z4); \ + out5 = vec_add(z13, z2); \ + out3 = vec_sub(z13, z2); \ + out1 = vec_add(z11, z4); \ + out7 = vec_sub(z11, z4); \ } -void -jsimd_fdct_ifast_altivec (DCTELEM *data) +void jsimd_fdct_ifast_altivec(DCTELEM *data) { __vector short row0, row1, row2, row3, row4, row5, row6, row7, col0, col1, col2, col3, col4, col5, col6, col7, diff --git a/simd/powerpc/jfdctint-altivec.c b/simd/powerpc/jfdctint-altivec.c new file mode 100644 index 0000000..6e63cc1 --- /dev/null +++ b/simd/powerpc/jfdctint-altivec.c @@ -0,0 +1,258 @@ +/* + * AltiVec optimizations for libjpeg-turbo + * + * Copyright (C) 2014, D. R. Commander. All Rights Reserved. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* SLOW INTEGER FORWARD DCT */ + +#include "jsimd_altivec.h" + + +#define F_0_298 2446 /* FIX(0.298631336) */ +#define F_0_390 3196 /* FIX(0.390180644) */ +#define F_0_541 4433 /* FIX(0.541196100) */ +#define F_0_765 6270 /* FIX(0.765366865) */ +#define F_0_899 7373 /* FIX(0.899976223) */ +#define F_1_175 9633 /* FIX(1.175875602) */ +#define F_1_501 12299 /* FIX(1.501321110) */ +#define F_1_847 15137 /* FIX(1.847759065) */ +#define F_1_961 16069 /* FIX(1.961570560) */ +#define F_2_053 16819 /* FIX(2.053119869) */ +#define F_2_562 20995 /* FIX(2.562915447) */ +#define F_3_072 25172 /* FIX(3.072711026) */ + +#define CONST_BITS 13 +#define PASS1_BITS 2 +#define DESCALE_P1 (CONST_BITS - PASS1_BITS) +#define DESCALE_P2 (CONST_BITS + PASS1_BITS) + + +#define DO_FDCT_COMMON(PASS) { \ + /* (Original) \ + * z1 = (tmp12 + tmp13) * 0.541196100; \ + * data2 = z1 + tmp13 * 0.765366865; \ + * data6 = z1 + tmp12 * -1.847759065; \ + * \ + * (This implementation) \ + * data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; \ + * data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); \ + */ \ + \ + tmp1312l = vec_mergeh(tmp13, tmp12); \ + tmp1312h = vec_mergel(tmp13, tmp12); \ + \ + out2l = vec_msums(tmp1312l, pw_f130_f054, pd_descale_p##PASS); \ + out2h = vec_msums(tmp1312h, pw_f130_f054, pd_descale_p##PASS); \ + out6l = vec_msums(tmp1312l, pw_f054_mf130, pd_descale_p##PASS); \ + out6h = vec_msums(tmp1312h, pw_f054_mf130, pd_descale_p##PASS); \ + \ + out2l = vec_sra(out2l, descale_p##PASS); \ + out2h = vec_sra(out2h, descale_p##PASS); \ + out6l = vec_sra(out6l, descale_p##PASS); \ + out6h = vec_sra(out6h, descale_p##PASS); \ + \ + out2 = vec_pack(out2l, out2h); \ + out6 = vec_pack(out6l, out6h); \ + \ + /* Odd part */ \ + \ + z3 = vec_add(tmp4, tmp6); \ + z4 = vec_add(tmp5, tmp7); \ + \ + /* (Original) \ + * z5 = (z3 + z4) * 1.175875602; \ + * z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \ + * z3 += z5; z4 += z5; \ + * \ + * (This implementation) \ + * z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \ + * z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \ + */ \ + \ + z34l = vec_mergeh(z3, z4); \ + z34h = vec_mergel(z3, z4); \ + \ + z3l = vec_msums(z34l, pw_mf078_f117, pd_descale_p##PASS); \ + z3h = vec_msums(z34h, pw_mf078_f117, pd_descale_p##PASS); \ + z4l = vec_msums(z34l, pw_f117_f078, pd_descale_p##PASS); \ + z4h = vec_msums(z34h, pw_f117_f078, pd_descale_p##PASS); \ + \ + /* (Original) \ + * z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; \ + * tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; \ + * tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; \ + * z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \ + * data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; \ + * data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; \ + * \ + * (This implementation) \ + * tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; \ + * tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; \ + * tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); \ + * tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); \ + * data7 = tmp4 + z3; data5 = tmp5 + z4; \ + * data3 = tmp6 + z3; data1 = tmp7 + z4; \ + */ \ + \ + tmp47l = vec_mergeh(tmp4, tmp7); \ + tmp47h = vec_mergel(tmp4, tmp7); \ + \ + out7l = vec_msums(tmp47l, pw_mf060_mf089, z3l); \ + out7h = vec_msums(tmp47h, pw_mf060_mf089, z3h); \ + out1l = vec_msums(tmp47l, pw_mf089_f060, z4l); \ + out1h = vec_msums(tmp47h, pw_mf089_f060, z4h); \ + \ + out7l = vec_sra(out7l, descale_p##PASS); \ + out7h = vec_sra(out7h, descale_p##PASS); \ + out1l = vec_sra(out1l, descale_p##PASS); \ + out1h = vec_sra(out1h, descale_p##PASS); \ + \ + out7 = vec_pack(out7l, out7h); \ + out1 = vec_pack(out1l, out1h); \ + \ + tmp56l = vec_mergeh(tmp5, tmp6); \ + tmp56h = vec_mergel(tmp5, tmp6); \ + \ + out5l = vec_msums(tmp56l, pw_mf050_mf256, z4l); \ + out5h = vec_msums(tmp56h, pw_mf050_mf256, z4h); \ + out3l = vec_msums(tmp56l, pw_mf256_f050, z3l); \ + out3h = vec_msums(tmp56h, pw_mf256_f050, z3h); \ + \ + out5l = vec_sra(out5l, descale_p##PASS); \ + out5h = vec_sra(out5h, descale_p##PASS); \ + out3l = vec_sra(out3l, descale_p##PASS); \ + out3h = vec_sra(out3h, descale_p##PASS); \ + \ + out5 = vec_pack(out5l, out5h); \ + out3 = vec_pack(out3l, out3h); \ +} + +#define DO_FDCT_PASS1() { \ + /* Even part */ \ + \ + tmp10 = vec_add(tmp0, tmp3); \ + tmp13 = vec_sub(tmp0, tmp3); \ + tmp11 = vec_add(tmp1, tmp2); \ + tmp12 = vec_sub(tmp1, tmp2); \ + \ + out0 = vec_add(tmp10, tmp11); \ + out0 = vec_sl(out0, pass1_bits); \ + out4 = vec_sub(tmp10, tmp11); \ + out4 = vec_sl(out4, pass1_bits); \ + \ + DO_FDCT_COMMON(1); \ +} + +#define DO_FDCT_PASS2() { \ + /* Even part */ \ + \ + tmp10 = vec_add(tmp0, tmp3); \ + tmp13 = vec_sub(tmp0, tmp3); \ + tmp11 = vec_add(tmp1, tmp2); \ + tmp12 = vec_sub(tmp1, tmp2); \ + \ + out0 = vec_add(tmp10, tmp11); \ + out0 = vec_add(out0, pw_descale_p2x); \ + out0 = vec_sra(out0, pass1_bits); \ + out4 = vec_sub(tmp10, tmp11); \ + out4 = vec_add(out4, pw_descale_p2x); \ + out4 = vec_sra(out4, pass1_bits); \ + \ + DO_FDCT_COMMON(2); \ +} + + +void jsimd_fdct_islow_altivec(DCTELEM *data) +{ + __vector short row0, row1, row2, row3, row4, row5, row6, row7, + col0, col1, col2, col3, col4, col5, col6, col7, + tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp10, tmp11, tmp12, tmp13, + tmp47l, tmp47h, tmp56l, tmp56h, tmp1312l, tmp1312h, + z3, z4, z34l, z34h, + out0, out1, out2, out3, out4, out5, out6, out7; + __vector int z3l, z3h, z4l, z4h, + out1l, out1h, out2l, out2h, out3l, out3h, out5l, out5h, out6l, out6h, + out7l, out7h; + + /* Constants */ + __vector short + pw_f130_f054 = { __4X2(F_0_541 + F_0_765, F_0_541) }, + pw_f054_mf130 = { __4X2(F_0_541, F_0_541 - F_1_847) }, + pw_mf078_f117 = { __4X2(F_1_175 - F_1_961, F_1_175) }, + pw_f117_f078 = { __4X2(F_1_175, F_1_175 - F_0_390) }, + pw_mf060_mf089 = { __4X2(F_0_298 - F_0_899, -F_0_899) }, + pw_mf089_f060 = { __4X2(-F_0_899, F_1_501 - F_0_899) }, + pw_mf050_mf256 = { __4X2(F_2_053 - F_2_562, -F_2_562) }, + pw_mf256_f050 = { __4X2(-F_2_562, F_3_072 - F_2_562) }, + pw_descale_p2x = { __8X(1 << (PASS1_BITS - 1)) }; + __vector unsigned short pass1_bits = { __8X(PASS1_BITS) }; + __vector int pd_descale_p1 = { __4X(1 << (DESCALE_P1 - 1)) }, + pd_descale_p2 = { __4X(1 << (DESCALE_P2 - 1)) }; + __vector unsigned int descale_p1 = { __4X(DESCALE_P1) }, + descale_p2 = { __4X(DESCALE_P2) }; + + /* Pass 1: process rows */ + + row0 = vec_ld(0, data); + row1 = vec_ld(16, data); + row2 = vec_ld(32, data); + row3 = vec_ld(48, data); + row4 = vec_ld(64, data); + row5 = vec_ld(80, data); + row6 = vec_ld(96, data); + row7 = vec_ld(112, data); + + TRANSPOSE(row, col); + + tmp0 = vec_add(col0, col7); + tmp7 = vec_sub(col0, col7); + tmp1 = vec_add(col1, col6); + tmp6 = vec_sub(col1, col6); + tmp2 = vec_add(col2, col5); + tmp5 = vec_sub(col2, col5); + tmp3 = vec_add(col3, col4); + tmp4 = vec_sub(col3, col4); + + DO_FDCT_PASS1(); + + /* Pass 2: process columns */ + + TRANSPOSE(out, row); + + tmp0 = vec_add(row0, row7); + tmp7 = vec_sub(row0, row7); + tmp1 = vec_add(row1, row6); + tmp6 = vec_sub(row1, row6); + tmp2 = vec_add(row2, row5); + tmp5 = vec_sub(row2, row5); + tmp3 = vec_add(row3, row4); + tmp4 = vec_sub(row3, row4); + + DO_FDCT_PASS2(); + + vec_st(out0, 0, data); + vec_st(out1, 16, data); + vec_st(out2, 32, data); + vec_st(out3, 48, data); + vec_st(out4, 64, data); + vec_st(out5, 80, data); + vec_st(out6, 96, data); + vec_st(out7, 112, data); +} diff --git a/simd/jidctfst-altivec.c b/simd/powerpc/jidctfst-altivec.c similarity index 71% rename from simd/jidctfst-altivec.c rename to simd/powerpc/jidctfst-altivec.c index ec30c39..456c6c6 100644 --- a/simd/jidctfst-altivec.c +++ b/simd/powerpc/jidctfst-altivec.c @@ -32,87 +32,85 @@ #include "jsimd_altivec.h" -#define F_1_082 277 /* FIX(1.082392200) */ -#define F_1_414 362 /* FIX(1.414213562) */ -#define F_1_847 473 /* FIX(1.847759065) */ -#define F_2_613 669 /* FIX(2.613125930) */ -#define F_1_613 (F_2_613 - 256) /* FIX(2.613125930) - FIX(1) */ +#define F_1_082 277 /* FIX(1.082392200) */ +#define F_1_414 362 /* FIX(1.414213562) */ +#define F_1_847 473 /* FIX(1.847759065) */ +#define F_2_613 669 /* FIX(2.613125930) */ +#define F_1_613 (F_2_613 - 256) /* FIX(2.613125930) - FIX(1) */ -#define CONST_BITS 8 -#define PASS1_BITS 2 -#define PRE_MULTIPLY_SCALE_BITS 2 -#define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS - 1) +#define CONST_BITS 8 +#define PASS1_BITS 2 +#define PRE_MULTIPLY_SCALE_BITS 2 +#define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS - 1) -#define DO_IDCT(in) \ -{ \ - /* Even part */ \ +#define DO_IDCT(in) { \ + /* Even part */ \ \ - tmp10 = vec_add(in##0, in##4); \ - tmp11 = vec_sub(in##0, in##4); \ - tmp13 = vec_add(in##2, in##6); \ + tmp10 = vec_add(in##0, in##4); \ + tmp11 = vec_sub(in##0, in##4); \ + tmp13 = vec_add(in##2, in##6); \ \ - tmp12 = vec_sub(in##2, in##6); \ - tmp12 = vec_sl(tmp12, pre_multiply_scale_bits); \ - tmp12 = vec_madds(tmp12, pw_F1414, pw_zero); \ - tmp12 = vec_sub(tmp12, tmp13); \ + tmp12 = vec_sub(in##2, in##6); \ + tmp12 = vec_sl(tmp12, pre_multiply_scale_bits); \ + tmp12 = vec_madds(tmp12, pw_F1414, pw_zero); \ + tmp12 = vec_sub(tmp12, tmp13); \ \ - tmp0 = vec_add(tmp10, tmp13); \ - tmp3 = vec_sub(tmp10, tmp13); \ - tmp1 = vec_add(tmp11, tmp12); \ - tmp2 = vec_sub(tmp11, tmp12); \ + tmp0 = vec_add(tmp10, tmp13); \ + tmp3 = vec_sub(tmp10, tmp13); \ + tmp1 = vec_add(tmp11, tmp12); \ + tmp2 = vec_sub(tmp11, tmp12); \ \ - /* Odd part */ \ + /* Odd part */ \ \ - z13 = vec_add(in##5, in##3); \ - z10 = vec_sub(in##5, in##3); \ - z10s = vec_sl(z10, pre_multiply_scale_bits); \ - z11 = vec_add(in##1, in##7); \ - z12s = vec_sub(in##1, in##7); \ - z12s = vec_sl(z12s, pre_multiply_scale_bits); \ + z13 = vec_add(in##5, in##3); \ + z10 = vec_sub(in##5, in##3); \ + z10s = vec_sl(z10, pre_multiply_scale_bits); \ + z11 = vec_add(in##1, in##7); \ + z12s = vec_sub(in##1, in##7); \ + z12s = vec_sl(z12s, pre_multiply_scale_bits); \ \ - tmp11 = vec_sub(z11, z13); \ - tmp11 = vec_sl(tmp11, pre_multiply_scale_bits); \ - tmp11 = vec_madds(tmp11, pw_F1414, pw_zero); \ + tmp11 = vec_sub(z11, z13); \ + tmp11 = vec_sl(tmp11, pre_multiply_scale_bits); \ + tmp11 = vec_madds(tmp11, pw_F1414, pw_zero); \ \ - tmp7 = vec_add(z11, z13); \ + tmp7 = vec_add(z11, z13); \ \ - /* To avoid overflow... \ - * \ - * (Original) \ - * tmp12 = -2.613125930 * z10 + z5; \ - * \ - * (This implementation) \ - * tmp12 = (-1.613125930 - 1) * z10 + z5; \ - * = -1.613125930 * z10 - z10 + z5; \ - */ \ + /* To avoid overflow... \ + * \ + * (Original) \ + * tmp12 = -2.613125930 * z10 + z5; \ + * \ + * (This implementation) \ + * tmp12 = (-1.613125930 - 1) * z10 + z5; \ + * = -1.613125930 * z10 - z10 + z5; \ + */ \ \ - z5 = vec_add(z10s, z12s); \ - z5 = vec_madds(z5, pw_F1847, pw_zero); \ + z5 = vec_add(z10s, z12s); \ + z5 = vec_madds(z5, pw_F1847, pw_zero); \ \ - tmp10 = vec_madds(z12s, pw_F1082, pw_zero); \ - tmp10 = vec_sub(tmp10, z5); \ - tmp12 = vec_madds(z10s, pw_MF1613, z5); \ - tmp12 = vec_sub(tmp12, z10); \ + tmp10 = vec_madds(z12s, pw_F1082, pw_zero); \ + tmp10 = vec_sub(tmp10, z5); \ + tmp12 = vec_madds(z10s, pw_MF1613, z5); \ + tmp12 = vec_sub(tmp12, z10); \ \ - tmp6 = vec_sub(tmp12, tmp7); \ - tmp5 = vec_sub(tmp11, tmp6); \ - tmp4 = vec_add(tmp10, tmp5); \ + tmp6 = vec_sub(tmp12, tmp7); \ + tmp5 = vec_sub(tmp11, tmp6); \ + tmp4 = vec_add(tmp10, tmp5); \ \ - out0 = vec_add(tmp0, tmp7); \ - out1 = vec_add(tmp1, tmp6); \ - out2 = vec_add(tmp2, tmp5); \ - out3 = vec_sub(tmp3, tmp4); \ - out4 = vec_add(tmp3, tmp4); \ - out5 = vec_sub(tmp2, tmp5); \ - out6 = vec_sub(tmp1, tmp6); \ - out7 = vec_sub(tmp0, tmp7); \ + out0 = vec_add(tmp0, tmp7); \ + out1 = vec_add(tmp1, tmp6); \ + out2 = vec_add(tmp2, tmp5); \ + out3 = vec_sub(tmp3, tmp4); \ + out4 = vec_add(tmp3, tmp4); \ + out5 = vec_sub(tmp2, tmp5); \ + out6 = vec_sub(tmp1, tmp6); \ + out7 = vec_sub(tmp0, tmp7); \ } -void -jsimd_idct_ifast_altivec (void *dct_table_, JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +void jsimd_idct_ifast_altivec(void *dct_table_, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) { short *dct_table = (short *)dct_table_; int *outptr; diff --git a/simd/jidctint-altivec.c b/simd/powerpc/jidctint-altivec.c similarity index 53% rename from simd/jidctint-altivec.c rename to simd/powerpc/jidctint-altivec.c index 935f35d..0e5dd58 100644 --- a/simd/jidctint-altivec.c +++ b/simd/powerpc/jidctint-altivec.c @@ -25,189 +25,187 @@ #include "jsimd_altivec.h" -#define F_0_298 2446 /* FIX(0.298631336) */ -#define F_0_390 3196 /* FIX(0.390180644) */ -#define F_0_541 4433 /* FIX(0.541196100) */ -#define F_0_765 6270 /* FIX(0.765366865) */ -#define F_0_899 7373 /* FIX(0.899976223) */ -#define F_1_175 9633 /* FIX(1.175875602) */ -#define F_1_501 12299 /* FIX(1.501321110) */ -#define F_1_847 15137 /* FIX(1.847759065) */ -#define F_1_961 16069 /* FIX(1.961570560) */ -#define F_2_053 16819 /* FIX(2.053119869) */ -#define F_2_562 20995 /* FIX(2.562915447) */ -#define F_3_072 25172 /* FIX(3.072711026) */ - -#define CONST_BITS 13 -#define PASS1_BITS 2 -#define DESCALE_P1 (CONST_BITS - PASS1_BITS) -#define DESCALE_P2 (CONST_BITS + PASS1_BITS + 3) - - -#define DO_IDCT(in, PASS) \ -{ \ - /* Even part \ - * \ - * (Original) \ - * z1 = (z2 + z3) * 0.541196100; \ - * tmp2 = z1 + z3 * -1.847759065; \ - * tmp3 = z1 + z2 * 0.765366865; \ - * \ - * (This implementation) \ - * tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); \ - * tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; \ - */ \ +#define F_0_298 2446 /* FIX(0.298631336) */ +#define F_0_390 3196 /* FIX(0.390180644) */ +#define F_0_541 4433 /* FIX(0.541196100) */ +#define F_0_765 6270 /* FIX(0.765366865) */ +#define F_0_899 7373 /* FIX(0.899976223) */ +#define F_1_175 9633 /* FIX(1.175875602) */ +#define F_1_501 12299 /* FIX(1.501321110) */ +#define F_1_847 15137 /* FIX(1.847759065) */ +#define F_1_961 16069 /* FIX(1.961570560) */ +#define F_2_053 16819 /* FIX(2.053119869) */ +#define F_2_562 20995 /* FIX(2.562915447) */ +#define F_3_072 25172 /* FIX(3.072711026) */ + +#define CONST_BITS 13 +#define PASS1_BITS 2 +#define DESCALE_P1 (CONST_BITS - PASS1_BITS) +#define DESCALE_P2 (CONST_BITS + PASS1_BITS + 3) + + +#define DO_IDCT(in, PASS) { \ + /* Even part \ + * \ + * (Original) \ + * z1 = (z2 + z3) * 0.541196100; \ + * tmp2 = z1 + z3 * -1.847759065; \ + * tmp3 = z1 + z2 * 0.765366865; \ + * \ + * (This implementation) \ + * tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); \ + * tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; \ + */ \ \ - in##26l = vec_mergeh(in##2, in##6); \ - in##26h = vec_mergel(in##2, in##6); \ + in##26l = vec_mergeh(in##2, in##6); \ + in##26h = vec_mergel(in##2, in##6); \ \ - tmp3l = vec_msums(in##26l, pw_f130_f054, pd_zero); \ - tmp3h = vec_msums(in##26h, pw_f130_f054, pd_zero); \ - tmp2l = vec_msums(in##26l, pw_f054_mf130, pd_zero); \ - tmp2h = vec_msums(in##26h, pw_f054_mf130, pd_zero); \ + tmp3l = vec_msums(in##26l, pw_f130_f054, pd_zero); \ + tmp3h = vec_msums(in##26h, pw_f130_f054, pd_zero); \ + tmp2l = vec_msums(in##26l, pw_f054_mf130, pd_zero); \ + tmp2h = vec_msums(in##26h, pw_f054_mf130, pd_zero); \ \ - tmp0 = vec_add(in##0, in##4); \ - tmp1 = vec_sub(in##0, in##4); \ + tmp0 = vec_add(in##0, in##4); \ + tmp1 = vec_sub(in##0, in##4); \ \ - tmp0l = vec_unpackh(tmp0); \ - tmp0h = vec_unpackl(tmp0); \ - tmp0l = vec_sl(tmp0l, const_bits); \ - tmp0h = vec_sl(tmp0h, const_bits); \ - tmp0l = vec_add(tmp0l, pd_descale_p##PASS); \ - tmp0h = vec_add(tmp0h, pd_descale_p##PASS); \ + tmp0l = vec_unpackh(tmp0); \ + tmp0h = vec_unpackl(tmp0); \ + tmp0l = vec_sl(tmp0l, const_bits); \ + tmp0h = vec_sl(tmp0h, const_bits); \ + tmp0l = vec_add(tmp0l, pd_descale_p##PASS); \ + tmp0h = vec_add(tmp0h, pd_descale_p##PASS); \ \ - tmp10l = vec_add(tmp0l, tmp3l); \ - tmp10h = vec_add(tmp0h, tmp3h); \ - tmp13l = vec_sub(tmp0l, tmp3l); \ - tmp13h = vec_sub(tmp0h, tmp3h); \ + tmp10l = vec_add(tmp0l, tmp3l); \ + tmp10h = vec_add(tmp0h, tmp3h); \ + tmp13l = vec_sub(tmp0l, tmp3l); \ + tmp13h = vec_sub(tmp0h, tmp3h); \ \ - tmp1l = vec_unpackh(tmp1); \ - tmp1h = vec_unpackl(tmp1); \ - tmp1l = vec_sl(tmp1l, const_bits); \ - tmp1h = vec_sl(tmp1h, const_bits); \ - tmp1l = vec_add(tmp1l, pd_descale_p##PASS); \ - tmp1h = vec_add(tmp1h, pd_descale_p##PASS); \ + tmp1l = vec_unpackh(tmp1); \ + tmp1h = vec_unpackl(tmp1); \ + tmp1l = vec_sl(tmp1l, const_bits); \ + tmp1h = vec_sl(tmp1h, const_bits); \ + tmp1l = vec_add(tmp1l, pd_descale_p##PASS); \ + tmp1h = vec_add(tmp1h, pd_descale_p##PASS); \ \ - tmp11l = vec_add(tmp1l, tmp2l); \ - tmp11h = vec_add(tmp1h, tmp2h); \ - tmp12l = vec_sub(tmp1l, tmp2l); \ - tmp12h = vec_sub(tmp1h, tmp2h); \ + tmp11l = vec_add(tmp1l, tmp2l); \ + tmp11h = vec_add(tmp1h, tmp2h); \ + tmp12l = vec_sub(tmp1l, tmp2l); \ + tmp12h = vec_sub(tmp1h, tmp2h); \ \ - /* Odd part */ \ + /* Odd part */ \ \ - z3 = vec_add(in##3, in##7); \ - z4 = vec_add(in##1, in##5); \ + z3 = vec_add(in##3, in##7); \ + z4 = vec_add(in##1, in##5); \ \ - /* (Original) \ - * z5 = (z3 + z4) * 1.175875602; \ - * z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \ - * z3 += z5; z4 += z5; \ - * \ - * (This implementation) \ - * z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \ - * z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \ - */ \ + /* (Original) \ + * z5 = (z3 + z4) * 1.175875602; \ + * z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \ + * z3 += z5; z4 += z5; \ + * \ + * (This implementation) \ + * z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \ + * z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \ + */ \ \ - z34l = vec_mergeh(z3, z4); \ - z34h = vec_mergel(z3, z4); \ + z34l = vec_mergeh(z3, z4); \ + z34h = vec_mergel(z3, z4); \ \ - z3l = vec_msums(z34l, pw_mf078_f117, pd_zero); \ - z3h = vec_msums(z34h, pw_mf078_f117, pd_zero); \ - z4l = vec_msums(z34l, pw_f117_f078, pd_zero); \ - z4h = vec_msums(z34h, pw_f117_f078, pd_zero); \ + z3l = vec_msums(z34l, pw_mf078_f117, pd_zero); \ + z3h = vec_msums(z34h, pw_mf078_f117, pd_zero); \ + z4l = vec_msums(z34l, pw_f117_f078, pd_zero); \ + z4h = vec_msums(z34h, pw_f117_f078, pd_zero); \ \ - /* (Original) \ - * z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; \ - * tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; \ - * tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; \ - * z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \ - * tmp0 += z1 + z3; tmp1 += z2 + z4; \ - * tmp2 += z2 + z3; tmp3 += z1 + z4; \ - * \ - * (This implementation) \ - * tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; \ - * tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; \ - * tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); \ - * tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); \ - * tmp0 += z3; tmp1 += z4; \ - * tmp2 += z3; tmp3 += z4; \ - */ \ + /* (Original) \ + * z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; \ + * tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; \ + * tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; \ + * z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \ + * tmp0 += z1 + z3; tmp1 += z2 + z4; \ + * tmp2 += z2 + z3; tmp3 += z1 + z4; \ + * \ + * (This implementation) \ + * tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; \ + * tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; \ + * tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); \ + * tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); \ + * tmp0 += z3; tmp1 += z4; \ + * tmp2 += z3; tmp3 += z4; \ + */ \ \ - in##71l = vec_mergeh(in##7, in##1); \ - in##71h = vec_mergel(in##7, in##1); \ + in##71l = vec_mergeh(in##7, in##1); \ + in##71h = vec_mergel(in##7, in##1); \ \ - tmp0l = vec_msums(in##71l, pw_mf060_mf089, z3l); \ - tmp0h = vec_msums(in##71h, pw_mf060_mf089, z3h); \ - tmp3l = vec_msums(in##71l, pw_mf089_f060, z4l); \ - tmp3h = vec_msums(in##71h, pw_mf089_f060, z4h); \ + tmp0l = vec_msums(in##71l, pw_mf060_mf089, z3l); \ + tmp0h = vec_msums(in##71h, pw_mf060_mf089, z3h); \ + tmp3l = vec_msums(in##71l, pw_mf089_f060, z4l); \ + tmp3h = vec_msums(in##71h, pw_mf089_f060, z4h); \ \ - in##53l = vec_mergeh(in##5, in##3); \ - in##53h = vec_mergel(in##5, in##3); \ + in##53l = vec_mergeh(in##5, in##3); \ + in##53h = vec_mergel(in##5, in##3); \ \ - tmp1l = vec_msums(in##53l, pw_mf050_mf256, z4l); \ - tmp1h = vec_msums(in##53h, pw_mf050_mf256, z4h); \ - tmp2l = vec_msums(in##53l, pw_mf256_f050, z3l); \ - tmp2h = vec_msums(in##53h, pw_mf256_f050, z3h); \ + tmp1l = vec_msums(in##53l, pw_mf050_mf256, z4l); \ + tmp1h = vec_msums(in##53h, pw_mf050_mf256, z4h); \ + tmp2l = vec_msums(in##53l, pw_mf256_f050, z3l); \ + tmp2h = vec_msums(in##53h, pw_mf256_f050, z3h); \ \ - /* Final output stage */ \ + /* Final output stage */ \ \ - out0l = vec_add(tmp10l, tmp3l); \ - out0h = vec_add(tmp10h, tmp3h); \ - out7l = vec_sub(tmp10l, tmp3l); \ - out7h = vec_sub(tmp10h, tmp3h); \ + out0l = vec_add(tmp10l, tmp3l); \ + out0h = vec_add(tmp10h, tmp3h); \ + out7l = vec_sub(tmp10l, tmp3l); \ + out7h = vec_sub(tmp10h, tmp3h); \ \ - out0l = vec_sra(out0l, descale_p##PASS); \ - out0h = vec_sra(out0h, descale_p##PASS); \ - out7l = vec_sra(out7l, descale_p##PASS); \ - out7h = vec_sra(out7h, descale_p##PASS); \ + out0l = vec_sra(out0l, descale_p##PASS); \ + out0h = vec_sra(out0h, descale_p##PASS); \ + out7l = vec_sra(out7l, descale_p##PASS); \ + out7h = vec_sra(out7h, descale_p##PASS); \ \ - out0 = vec_pack(out0l, out0h); \ - out7 = vec_pack(out7l, out7h); \ + out0 = vec_pack(out0l, out0h); \ + out7 = vec_pack(out7l, out7h); \ \ - out1l = vec_add(tmp11l, tmp2l); \ - out1h = vec_add(tmp11h, tmp2h); \ - out6l = vec_sub(tmp11l, tmp2l); \ - out6h = vec_sub(tmp11h, tmp2h); \ + out1l = vec_add(tmp11l, tmp2l); \ + out1h = vec_add(tmp11h, tmp2h); \ + out6l = vec_sub(tmp11l, tmp2l); \ + out6h = vec_sub(tmp11h, tmp2h); \ \ - out1l = vec_sra(out1l, descale_p##PASS); \ - out1h = vec_sra(out1h, descale_p##PASS); \ - out6l = vec_sra(out6l, descale_p##PASS); \ - out6h = vec_sra(out6h, descale_p##PASS); \ + out1l = vec_sra(out1l, descale_p##PASS); \ + out1h = vec_sra(out1h, descale_p##PASS); \ + out6l = vec_sra(out6l, descale_p##PASS); \ + out6h = vec_sra(out6h, descale_p##PASS); \ \ - out1 = vec_pack(out1l, out1h); \ - out6 = vec_pack(out6l, out6h); \ + out1 = vec_pack(out1l, out1h); \ + out6 = vec_pack(out6l, out6h); \ \ - out2l = vec_add(tmp12l, tmp1l); \ - out2h = vec_add(tmp12h, tmp1h); \ - out5l = vec_sub(tmp12l, tmp1l); \ - out5h = vec_sub(tmp12h, tmp1h); \ + out2l = vec_add(tmp12l, tmp1l); \ + out2h = vec_add(tmp12h, tmp1h); \ + out5l = vec_sub(tmp12l, tmp1l); \ + out5h = vec_sub(tmp12h, tmp1h); \ \ - out2l = vec_sra(out2l, descale_p##PASS); \ - out2h = vec_sra(out2h, descale_p##PASS); \ - out5l = vec_sra(out5l, descale_p##PASS); \ - out5h = vec_sra(out5h, descale_p##PASS); \ + out2l = vec_sra(out2l, descale_p##PASS); \ + out2h = vec_sra(out2h, descale_p##PASS); \ + out5l = vec_sra(out5l, descale_p##PASS); \ + out5h = vec_sra(out5h, descale_p##PASS); \ \ - out2 = vec_pack(out2l, out2h); \ - out5 = vec_pack(out5l, out5h); \ + out2 = vec_pack(out2l, out2h); \ + out5 = vec_pack(out5l, out5h); \ \ - out3l = vec_add(tmp13l, tmp0l); \ - out3h = vec_add(tmp13h, tmp0h); \ - out4l = vec_sub(tmp13l, tmp0l); \ - out4h = vec_sub(tmp13h, tmp0h); \ + out3l = vec_add(tmp13l, tmp0l); \ + out3h = vec_add(tmp13h, tmp0h); \ + out4l = vec_sub(tmp13l, tmp0l); \ + out4h = vec_sub(tmp13h, tmp0h); \ \ - out3l = vec_sra(out3l, descale_p##PASS); \ - out3h = vec_sra(out3h, descale_p##PASS); \ - out4l = vec_sra(out4l, descale_p##PASS); \ - out4h = vec_sra(out4h, descale_p##PASS); \ + out3l = vec_sra(out3l, descale_p##PASS); \ + out3h = vec_sra(out3h, descale_p##PASS); \ + out4l = vec_sra(out4l, descale_p##PASS); \ + out4h = vec_sra(out4h, descale_p##PASS); \ \ - out3 = vec_pack(out3l, out3h); \ - out4 = vec_pack(out4l, out4h); \ + out3 = vec_pack(out3l, out3h); \ + out4 = vec_pack(out4l, out4h); \ } -void -jsimd_idct_islow_altivec (void *dct_table_, JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) +void jsimd_idct_islow_altivec(void *dct_table_, JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) { short *dct_table = (short *)dct_table_; int *outptr; diff --git a/simd/jquanti-altivec.c b/simd/powerpc/jquanti-altivec.c similarity index 88% rename from simd/jquanti-altivec.c rename to simd/powerpc/jquanti-altivec.c index 25cc296..7d6e325 100644 --- a/simd/jquanti-altivec.c +++ b/simd/powerpc/jquanti-altivec.c @@ -31,26 +31,25 @@ */ #if __BIG_ENDIAN__ -#define LOAD_ROW(row) { \ - elemptr = sample_data[row] + start_col; \ - in##row = vec_ld(0, elemptr); \ - if ((size_t)elemptr & 15) \ - in##row = vec_perm(in##row, in##row, vec_lvsl(0, elemptr)); \ +#define LOAD_ROW(row) { \ + elemptr = sample_data[row] + start_col; \ + in##row = vec_ld(0, elemptr); \ + if ((size_t)elemptr & 15) \ + in##row = vec_perm(in##row, in##row, vec_lvsl(0, elemptr)); \ } #else -#define LOAD_ROW(row) { \ - elemptr = sample_data[row] + start_col; \ - in##row = vec_vsx_ld(0, elemptr); \ +#define LOAD_ROW(row) { \ + elemptr = sample_data[row] + start_col; \ + in##row = vec_vsx_ld(0, elemptr); \ } #endif -void -jsimd_convsamp_altivec (JSAMPARRAY sample_data, JDIMENSION start_col, - DCTELEM *workspace) +void jsimd_convsamp_altivec(JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) { JSAMPROW elemptr; @@ -99,24 +98,23 @@ jsimd_convsamp_altivec (JSAMPARRAY sample_data, JDIMENSION start_col, } -#define WORD_BIT 16 +#define WORD_BIT 16 /* There is no AltiVec 16-bit unsigned multiply instruction, hence this. We basically need an unsigned equivalent of vec_madds(). */ -#define MULTIPLY(vs0, vs1, out) { \ - tmpe = vec_mule((__vector unsigned short)vs0, \ - (__vector unsigned short)vs1); \ - tmpo = vec_mulo((__vector unsigned short)vs0, \ - (__vector unsigned short)vs1); \ - out = (__vector short)vec_perm((__vector unsigned short)tmpe, \ - (__vector unsigned short)tmpo, \ - shift_pack_index); \ +#define MULTIPLY(vs0, vs1, out) { \ + tmpe = vec_mule((__vector unsigned short)vs0, \ + (__vector unsigned short)vs1); \ + tmpo = vec_mulo((__vector unsigned short)vs0, \ + (__vector unsigned short)vs1); \ + out = (__vector short)vec_perm((__vector unsigned short)tmpe, \ + (__vector unsigned short)tmpo, \ + shift_pack_index); \ } -void -jsimd_quantize_altivec (JCOEFPTR coef_block, DCTELEM *divisors, - DCTELEM *workspace) +void jsimd_quantize_altivec(JCOEFPTR coef_block, DCTELEM *divisors, + DCTELEM *workspace) { __vector short row0, row1, row2, row3, row4, row5, row6, row7, row0s, row1s, row2s, row3s, row4s, row5s, row6s, row7s, @@ -129,10 +127,10 @@ jsimd_quantize_altivec (JCOEFPTR coef_block, DCTELEM *divisors, __vector unsigned short pw_word_bit_m1 = { __8X(WORD_BIT - 1) }; #if __BIG_ENDIAN__ __vector unsigned char shift_pack_index = - {0,1,16,17,4,5,20,21,8,9,24,25,12,13,28,29}; + { 0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29 }; #else __vector unsigned char shift_pack_index = - {2,3,18,19,6,7,22,23,10,11,26,27,14,15,30,31}; + { 2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31 }; #endif row0 = vec_ld(0, workspace); diff --git a/simd/jsimd_powerpc.c b/simd/powerpc/jsimd.c similarity index 51% rename from simd/jsimd_powerpc.c rename to simd/powerpc/jsimd.c index 47dd746..d0d3981 100644 --- a/simd/jsimd_powerpc.c +++ b/simd/powerpc/jsimd.c @@ -2,8 +2,8 @@ * jsimd_powerpc.c * * Copyright 2009 Pierre Ossman for Cendio AB - * Copyright (C) 2009-2011, 2014-2016, D. R. Commander. - * Copyright (C) 2015, Matthieu Darbois. + * Copyright (C) 2009-2011, 2014-2016, 2018, D. R. Commander. + * Copyright (C) 2015-2016, 2018, Matthieu Darbois. * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. @@ -20,12 +20,12 @@ #endif #define JPEG_INTERNALS -#include "../jinclude.h" -#include "../jpeglib.h" +#include "../../jinclude.h" +#include "../../jpeglib.h" +#include "../../jsimd.h" +#include "../../jdct.h" +#include "../../jsimddct.h" #include "../jsimd.h" -#include "../jdct.h" -#include "../jsimddct.h" -#include "jsimd.h" #include #include @@ -39,14 +39,15 @@ static unsigned int simd_support = ~0; -#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) +#if !defined(__ALTIVEC__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)) -#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) +#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) LOCAL(int) -check_feature (char *buffer, char *feature) +check_feature(char *buffer, char *feature) { char *p; + if (*feature == 0) return 0; if (strncmp(buffer, "cpu", 3) != 0) @@ -72,10 +73,11 @@ check_feature (char *buffer, char *feature) } LOCAL(int) -parse_proc_cpuinfo (int bufsize) +parse_proc_cpuinfo(int bufsize) { char *buffer = (char *)malloc(bufsize); FILE *fd; + simd_support = 0; if (!buffer) @@ -107,9 +109,11 @@ parse_proc_cpuinfo (int bufsize) * FIXME: This code is racy under a multi-threaded environment. */ LOCAL(void) -init_simd (void) +init_simd(void) { +#ifndef NO_GETENV char *env = NULL; +#endif #if !defined(__ALTIVEC__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)) int bufsize = 1024; /* an initial guess for the line buffer size limit */ #elif defined(__amigaos4__) @@ -135,13 +139,14 @@ init_simd (void) } #elif defined(__amigaos4__) IExec->GetCPUInfoTags(GCIT_VectorUnit, &altivec, TAG_DONE); - if(altivec == VECTORTYPE_ALTIVEC) + if (altivec == VECTORTYPE_ALTIVEC) simd_support |= JSIMD_ALTIVEC; #elif defined(__OpenBSD__) if (sysctl(mib, 2, &altivec, &len, NULL, 0) == 0 && altivec != 0) simd_support |= JSIMD_ALTIVEC; #endif +#ifndef NO_GETENV /* Force different settings through environment variables */ env = getenv("JSIMD_FORCEALTIVEC"); if ((env != NULL) && (strcmp(env, "1") == 0)) @@ -149,10 +154,11 @@ init_simd (void) env = getenv("JSIMD_FORCENONE"); if ((env != NULL) && (strcmp(env, "1") == 0)) simd_support = 0; +#endif } GLOBAL(int) -jsimd_can_rgb_ycc (void) +jsimd_can_rgb_ycc(void) { init_simd(); @@ -171,7 +177,7 @@ jsimd_can_rgb_ycc (void) } GLOBAL(int) -jsimd_can_rgb_gray (void) +jsimd_can_rgb_gray(void) { init_simd(); @@ -190,7 +196,7 @@ jsimd_can_rgb_gray (void) } GLOBAL(int) -jsimd_can_ycc_rgb (void) +jsimd_can_ycc_rgb(void) { init_simd(); @@ -209,134 +215,134 @@ jsimd_can_ycc_rgb (void) } GLOBAL(int) -jsimd_can_ycc_rgb565 (void) +jsimd_can_ycc_rgb565(void) { return 0; } GLOBAL(void) -jsimd_rgb_ycc_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) -{ - void (*altivecfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); - - switch(cinfo->in_color_space) { - case JCS_EXT_RGB: - altivecfct=jsimd_extrgb_ycc_convert_altivec; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - altivecfct=jsimd_extrgbx_ycc_convert_altivec; - break; - case JCS_EXT_BGR: - altivecfct=jsimd_extbgr_ycc_convert_altivec; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - altivecfct=jsimd_extbgrx_ycc_convert_altivec; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - altivecfct=jsimd_extxbgr_ycc_convert_altivec; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - altivecfct=jsimd_extxrgb_ycc_convert_altivec; - break; - default: - altivecfct=jsimd_rgb_ycc_convert_altivec; - break; +jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) +{ + void (*altivecfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + altivecfct = jsimd_extrgb_ycc_convert_altivec; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + altivecfct = jsimd_extrgbx_ycc_convert_altivec; + break; + case JCS_EXT_BGR: + altivecfct = jsimd_extbgr_ycc_convert_altivec; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + altivecfct = jsimd_extbgrx_ycc_convert_altivec; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + altivecfct = jsimd_extxbgr_ycc_convert_altivec; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + altivecfct = jsimd_extxrgb_ycc_convert_altivec; + break; + default: + altivecfct = jsimd_rgb_ycc_convert_altivec; + break; } altivecfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); } GLOBAL(void) -jsimd_rgb_gray_convert (j_compress_ptr cinfo, - JSAMPARRAY input_buf, JSAMPIMAGE output_buf, - JDIMENSION output_row, int num_rows) -{ - void (*altivecfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); - - switch(cinfo->in_color_space) { - case JCS_EXT_RGB: - altivecfct=jsimd_extrgb_gray_convert_altivec; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - altivecfct=jsimd_extrgbx_gray_convert_altivec; - break; - case JCS_EXT_BGR: - altivecfct=jsimd_extbgr_gray_convert_altivec; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - altivecfct=jsimd_extbgrx_gray_convert_altivec; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - altivecfct=jsimd_extxbgr_gray_convert_altivec; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - altivecfct=jsimd_extxrgb_gray_convert_altivec; - break; - default: - altivecfct=jsimd_rgb_gray_convert_altivec; - break; +jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) +{ + void (*altivecfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + altivecfct = jsimd_extrgb_gray_convert_altivec; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + altivecfct = jsimd_extrgbx_gray_convert_altivec; + break; + case JCS_EXT_BGR: + altivecfct = jsimd_extbgr_gray_convert_altivec; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + altivecfct = jsimd_extbgrx_gray_convert_altivec; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + altivecfct = jsimd_extxbgr_gray_convert_altivec; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + altivecfct = jsimd_extxrgb_gray_convert_altivec; + break; + default: + altivecfct = jsimd_rgb_gray_convert_altivec; + break; } altivecfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); } GLOBAL(void) -jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) -{ - void (*altivecfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); - - switch(cinfo->out_color_space) { - case JCS_EXT_RGB: - altivecfct=jsimd_ycc_extrgb_convert_altivec; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - altivecfct=jsimd_ycc_extrgbx_convert_altivec; - break; - case JCS_EXT_BGR: - altivecfct=jsimd_ycc_extbgr_convert_altivec; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - altivecfct=jsimd_ycc_extbgrx_convert_altivec; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - altivecfct=jsimd_ycc_extxbgr_convert_altivec; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - altivecfct=jsimd_ycc_extxrgb_convert_altivec; - break; - default: - altivecfct=jsimd_ycc_rgb_convert_altivec; - break; +jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) +{ + void (*altivecfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + altivecfct = jsimd_ycc_extrgb_convert_altivec; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + altivecfct = jsimd_ycc_extrgbx_convert_altivec; + break; + case JCS_EXT_BGR: + altivecfct = jsimd_ycc_extbgr_convert_altivec; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + altivecfct = jsimd_ycc_extbgrx_convert_altivec; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + altivecfct = jsimd_ycc_extxbgr_convert_altivec; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + altivecfct = jsimd_ycc_extxrgb_convert_altivec; + break; + default: + altivecfct = jsimd_ycc_rgb_convert_altivec; + break; } altivecfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); } GLOBAL(void) -jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, JDIMENSION input_row, - JSAMPARRAY output_buf, int num_rows) +jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) { } GLOBAL(int) -jsimd_can_h2v2_downsample (void) +jsimd_can_h2v2_downsample(void) { init_simd(); @@ -353,7 +359,7 @@ jsimd_can_h2v2_downsample (void) } GLOBAL(int) -jsimd_can_h2v1_downsample (void) +jsimd_can_h2v1_downsample(void) { init_simd(); @@ -370,27 +376,27 @@ jsimd_can_h2v1_downsample (void) } GLOBAL(void) -jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { jsimd_h2v2_downsample_altivec(cinfo->image_width, cinfo->max_v_samp_factor, compptr->v_samp_factor, - compptr->width_in_blocks, - input_data, output_data); + compptr->width_in_blocks, input_data, + output_data); } GLOBAL(void) -jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, - JSAMPARRAY input_data, JSAMPARRAY output_data) +jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) { jsimd_h2v1_downsample_altivec(cinfo->image_width, cinfo->max_v_samp_factor, compptr->v_samp_factor, - compptr->width_in_blocks, - input_data, output_data); + compptr->width_in_blocks, input_data, + output_data); } GLOBAL(int) -jsimd_can_h2v2_upsample (void) +jsimd_can_h2v2_upsample(void) { init_simd(); @@ -407,7 +413,7 @@ jsimd_can_h2v2_upsample (void) } GLOBAL(int) -jsimd_can_h2v1_upsample (void) +jsimd_can_h2v1_upsample(void) { init_simd(); @@ -424,27 +430,23 @@ jsimd_can_h2v1_upsample (void) } GLOBAL(void) -jsimd_h2v2_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { jsimd_h2v2_upsample_altivec(cinfo->max_v_samp_factor, cinfo->output_width, input_data, output_data_ptr); } GLOBAL(void) -jsimd_h2v1_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { jsimd_h2v1_upsample_altivec(cinfo->max_v_samp_factor, cinfo->output_width, input_data, output_data_ptr); } GLOBAL(int) -jsimd_can_h2v2_fancy_upsample (void) +jsimd_can_h2v2_fancy_upsample(void) { init_simd(); @@ -461,7 +463,7 @@ jsimd_can_h2v2_fancy_upsample (void) } GLOBAL(int) -jsimd_can_h2v1_fancy_upsample (void) +jsimd_can_h2v1_fancy_upsample(void) { init_simd(); @@ -478,10 +480,8 @@ jsimd_can_h2v1_fancy_upsample (void) } GLOBAL(void) -jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { jsimd_h2v2_fancy_upsample_altivec(cinfo->max_v_samp_factor, compptr->downsampled_width, input_data, @@ -489,10 +489,8 @@ jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, } GLOBAL(void) -jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, - jpeg_component_info *compptr, - JSAMPARRAY input_data, - JSAMPARRAY *output_data_ptr) +jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) { jsimd_h2v1_fancy_upsample_altivec(cinfo->max_v_samp_factor, compptr->downsampled_width, input_data, @@ -500,7 +498,7 @@ jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, } GLOBAL(int) -jsimd_can_h2v2_merged_upsample (void) +jsimd_can_h2v2_merged_upsample(void) { init_simd(); @@ -517,7 +515,7 @@ jsimd_can_h2v2_merged_upsample (void) } GLOBAL(int) -jsimd_can_h2v1_merged_upsample (void) +jsimd_can_h2v1_merged_upsample(void) { init_simd(); @@ -534,85 +532,81 @@ jsimd_can_h2v1_merged_upsample (void) } GLOBAL(void) -jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) -{ - void (*altivecfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); - - switch(cinfo->out_color_space) { - case JCS_EXT_RGB: - altivecfct=jsimd_h2v2_extrgb_merged_upsample_altivec; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - altivecfct=jsimd_h2v2_extrgbx_merged_upsample_altivec; - break; - case JCS_EXT_BGR: - altivecfct=jsimd_h2v2_extbgr_merged_upsample_altivec; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - altivecfct=jsimd_h2v2_extbgrx_merged_upsample_altivec; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - altivecfct=jsimd_h2v2_extxbgr_merged_upsample_altivec; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - altivecfct=jsimd_h2v2_extxrgb_merged_upsample_altivec; - break; - default: - altivecfct=jsimd_h2v2_merged_upsample_altivec; - break; +jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) +{ + void (*altivecfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + altivecfct = jsimd_h2v2_extrgb_merged_upsample_altivec; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + altivecfct = jsimd_h2v2_extrgbx_merged_upsample_altivec; + break; + case JCS_EXT_BGR: + altivecfct = jsimd_h2v2_extbgr_merged_upsample_altivec; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + altivecfct = jsimd_h2v2_extbgrx_merged_upsample_altivec; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + altivecfct = jsimd_h2v2_extxbgr_merged_upsample_altivec; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + altivecfct = jsimd_h2v2_extxrgb_merged_upsample_altivec; + break; + default: + altivecfct = jsimd_h2v2_merged_upsample_altivec; + break; } altivecfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); } GLOBAL(void) -jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, - JSAMPIMAGE input_buf, - JDIMENSION in_row_group_ctr, - JSAMPARRAY output_buf) -{ - void (*altivecfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); - - switch(cinfo->out_color_space) { - case JCS_EXT_RGB: - altivecfct=jsimd_h2v1_extrgb_merged_upsample_altivec; - break; - case JCS_EXT_RGBX: - case JCS_EXT_RGBA: - altivecfct=jsimd_h2v1_extrgbx_merged_upsample_altivec; - break; - case JCS_EXT_BGR: - altivecfct=jsimd_h2v1_extbgr_merged_upsample_altivec; - break; - case JCS_EXT_BGRX: - case JCS_EXT_BGRA: - altivecfct=jsimd_h2v1_extbgrx_merged_upsample_altivec; - break; - case JCS_EXT_XBGR: - case JCS_EXT_ABGR: - altivecfct=jsimd_h2v1_extxbgr_merged_upsample_altivec; - break; - case JCS_EXT_XRGB: - case JCS_EXT_ARGB: - altivecfct=jsimd_h2v1_extxrgb_merged_upsample_altivec; - break; - default: - altivecfct=jsimd_h2v1_merged_upsample_altivec; - break; +jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) +{ + void (*altivecfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + altivecfct = jsimd_h2v1_extrgb_merged_upsample_altivec; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + altivecfct = jsimd_h2v1_extrgbx_merged_upsample_altivec; + break; + case JCS_EXT_BGR: + altivecfct = jsimd_h2v1_extbgr_merged_upsample_altivec; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + altivecfct = jsimd_h2v1_extbgrx_merged_upsample_altivec; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + altivecfct = jsimd_h2v1_extxbgr_merged_upsample_altivec; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + altivecfct = jsimd_h2v1_extxrgb_merged_upsample_altivec; + break; + default: + altivecfct = jsimd_h2v1_merged_upsample_altivec; + break; } altivecfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); } GLOBAL(int) -jsimd_can_convsamp (void) +jsimd_can_convsamp(void) { init_simd(); @@ -633,26 +627,26 @@ jsimd_can_convsamp (void) } GLOBAL(int) -jsimd_can_convsamp_float (void) +jsimd_can_convsamp_float(void) { return 0; } GLOBAL(void) -jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, - DCTELEM *workspace) +jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) { jsimd_convsamp_altivec(sample_data, start_col, workspace); } GLOBAL(void) -jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, - FAST_FLOAT *workspace) +jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) { } GLOBAL(int) -jsimd_can_fdct_islow (void) +jsimd_can_fdct_islow(void) { init_simd(); @@ -669,7 +663,7 @@ jsimd_can_fdct_islow (void) } GLOBAL(int) -jsimd_can_fdct_ifast (void) +jsimd_can_fdct_ifast(void) { init_simd(); @@ -686,30 +680,30 @@ jsimd_can_fdct_ifast (void) } GLOBAL(int) -jsimd_can_fdct_float (void) +jsimd_can_fdct_float(void) { return 0; } GLOBAL(void) -jsimd_fdct_islow (DCTELEM *data) +jsimd_fdct_islow(DCTELEM *data) { jsimd_fdct_islow_altivec(data); } GLOBAL(void) -jsimd_fdct_ifast (DCTELEM *data) +jsimd_fdct_ifast(DCTELEM *data) { jsimd_fdct_ifast_altivec(data); } GLOBAL(void) -jsimd_fdct_float (FAST_FLOAT *data) +jsimd_fdct_float(FAST_FLOAT *data) { } GLOBAL(int) -jsimd_can_quantize (void) +jsimd_can_quantize(void) { init_simd(); @@ -728,52 +722,51 @@ jsimd_can_quantize (void) } GLOBAL(int) -jsimd_can_quantize_float (void) +jsimd_can_quantize_float(void) { return 0; } GLOBAL(void) -jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, - DCTELEM *workspace) +jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) { jsimd_quantize_altivec(coef_block, divisors, workspace); } GLOBAL(void) -jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, - FAST_FLOAT *workspace) +jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) { } GLOBAL(int) -jsimd_can_idct_2x2 (void) +jsimd_can_idct_2x2(void) { return 0; } GLOBAL(int) -jsimd_can_idct_4x4 (void) +jsimd_can_idct_4x4(void) { return 0; } GLOBAL(void) -jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { } GLOBAL(void) -jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { } GLOBAL(int) -jsimd_can_idct_islow (void) +jsimd_can_idct_islow(void) { init_simd(); @@ -790,7 +783,7 @@ jsimd_can_idct_islow (void) } GLOBAL(int) -jsimd_can_idct_ifast (void) +jsimd_can_idct_ifast(void) { init_simd(); @@ -807,46 +800,73 @@ jsimd_can_idct_ifast (void) } GLOBAL(int) -jsimd_can_idct_float (void) +jsimd_can_idct_float(void) { return 0; } GLOBAL(void) -jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { jsimd_idct_islow_altivec(compptr->dct_table, coef_block, output_buf, output_col); } GLOBAL(void) -jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { jsimd_idct_ifast_altivec(compptr->dct_table, coef_block, output_buf, output_col); } GLOBAL(void) -jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, - JDIMENSION output_col) +jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) { } GLOBAL(int) -jsimd_can_huff_encode_one_block (void) +jsimd_can_huff_encode_one_block(void) { return 0; } -GLOBAL(JOCTET*) -jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, - int last_dc_val, c_derived_tbl *dctbl, - c_derived_tbl *actbl) +GLOBAL(JOCTET *) +jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) { return NULL; } + +GLOBAL(int) +jsimd_can_encode_mcu_AC_first_prepare(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *values, size_t *zerobits) +{ +} + +GLOBAL(int) +jsimd_can_encode_mcu_AC_refine_prepare(void) +{ + return 0; +} + +GLOBAL(int) +jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *absvalues, size_t *bits) +{ + return 0; +} diff --git a/simd/jsimd_altivec.h b/simd/powerpc/jsimd_altivec.h similarity index 75% rename from simd/jsimd_altivec.h rename to simd/powerpc/jsimd_altivec.h index 62dbc5c..e8bdb06 100644 --- a/simd/jsimd_altivec.h +++ b/simd/powerpc/jsimd_altivec.h @@ -21,28 +21,27 @@ */ #define JPEG_INTERNALS -#include "../jinclude.h" -#include "../jpeglib.h" +#include "../../jinclude.h" +#include "../../jpeglib.h" +#include "../../jsimd.h" +#include "../../jdct.h" +#include "../../jsimddct.h" #include "../jsimd.h" -#include "../jdct.h" -#include "../jsimddct.h" -#include "jsimd.h" #include /* Common code */ -#define __4X(a) a, a, a, a -#define __4X2(a, b) a, b, a, b, a, b, a, b -#define __8X(a) __4X(a), __4X(a) -#define __16X(a) __8X(a), __8X(a) +#define __4X(a) a, a, a, a +#define __4X2(a, b) a, b, a, b, a, b, a, b +#define __8X(a) __4X(a), __4X(a) +#define __16X(a) __8X(a), __8X(a) -#define TRANSPOSE(row, col) \ -{ \ - __vector short row04l, row04h, row15l, row15h, \ - row26l, row26h, row37l, row37h; \ - __vector short col01e, col01o, col23e, col23o, \ - col45e, col45o, col67e, col67o; \ +#define TRANSPOSE(row, col) { \ + __vector short row04l, row04h, row15l, row15h, \ + row26l, row26h, row37l, row37h; \ + __vector short col01e, col01o, col23e, col23o, \ + col45e, col45o, col67e, col67o; \ \ /* transpose coefficients (phase 1) */ \ row04l = vec_mergeh(row##0, row##4); /* row04l=(00 40 01 41 02 42 03 43) */ \ @@ -65,18 +64,18 @@ col67o = vec_mergel(row15h, row37h); /* col67o=(16 36 56 76 17 37 57 77) */ \ \ /* transpose coefficients (phase 3) */ \ - col##0 = vec_mergeh(col01e, col01o); /* col0=(00 10 20 30 40 50 60 70) */ \ - col##1 = vec_mergel(col01e, col01o); /* col1=(01 11 21 31 41 51 61 71) */ \ - col##2 = vec_mergeh(col23e, col23o); /* col2=(02 12 22 32 42 52 62 72) */ \ - col##3 = vec_mergel(col23e, col23o); /* col3=(03 13 23 33 43 53 63 73) */ \ - col##4 = vec_mergeh(col45e, col45o); /* col4=(04 14 24 34 44 54 64 74) */ \ - col##5 = vec_mergel(col45e, col45o); /* col5=(05 15 25 35 45 55 65 75) */ \ - col##6 = vec_mergeh(col67e, col67o); /* col6=(06 16 26 36 46 56 66 76) */ \ - col##7 = vec_mergel(col67e, col67o); /* col7=(07 17 27 37 47 57 67 77) */ \ + col##0 = vec_mergeh(col01e, col01o); /* col0=(00 10 20 30 40 50 60 70) */ \ + col##1 = vec_mergel(col01e, col01o); /* col1=(01 11 21 31 41 51 61 71) */ \ + col##2 = vec_mergeh(col23e, col23o); /* col2=(02 12 22 32 42 52 62 72) */ \ + col##3 = vec_mergel(col23e, col23o); /* col3=(03 13 23 33 43 53 63 73) */ \ + col##4 = vec_mergeh(col45e, col45o); /* col4=(04 14 24 34 44 54 64 74) */ \ + col##5 = vec_mergel(col45e, col45o); /* col5=(05 15 25 35 45 55 65 75) */ \ + col##6 = vec_mergeh(col67e, col67o); /* col6=(06 16 26 36 46 56 66 76) */ \ + col##7 = vec_mergel(col67e, col67o); /* col7=(07 17 27 37 47 57 67 77) */ \ } #ifndef min -#define min(a,b) ((a) < (b) ? (a) : (b)) +#define min(a, b) ((a) < (b) ? (a) : (b)) #endif @@ -84,16 +83,16 @@ #if __BIG_ENDIAN__ -#define VEC_LD(a, b) vec_ld(a, b) -#define VEC_ST(a, b, c) vec_st(a, b, c) -#define VEC_UNPACKHU(a) vec_mergeh(pb_zero, a) -#define VEC_UNPACKLU(a) vec_mergel(pb_zero, a) +#define VEC_LD(a, b) vec_ld(a, b) +#define VEC_ST(a, b, c) vec_st(a, b, c) +#define VEC_UNPACKHU(a) vec_mergeh(pb_zero, a) +#define VEC_UNPACKLU(a) vec_mergel(pb_zero, a) #else -#define VEC_LD(a, b) vec_vsx_ld(a, b) -#define VEC_ST(a, b, c) vec_vsx_st(a, b, c) -#define VEC_UNPACKHU(a) vec_mergeh(a, pb_zero) -#define VEC_UNPACKLU(a) vec_mergel(a, pb_zero) +#define VEC_LD(a, b) vec_vsx_ld(a, b) +#define VEC_ST(a, b, c) vec_vsx_st(a, b, c) +#define VEC_UNPACKHU(a) vec_mergeh(a, pb_zero) +#define VEC_UNPACKLU(a) vec_mergel(a, pb_zero) #endif diff --git a/simd/x86_64/jccolext-avx2.asm b/simd/x86_64/jccolext-avx2.asm new file mode 100644 index 0000000..5fa3848 --- /dev/null +++ b/simd/x86_64/jccolext-avx2.asm @@ -0,0 +1,560 @@ +; +; jccolext.asm - colorspace conversion (64-bit AVX2) +; +; Copyright (C) 2009, 2016, D. R. Commander. +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_ycc_convert_avx2(JDIMENSION img_width, JSAMPARRAY input_buf, +; JSAMPIMAGE output_buf, JDIMENSION output_row, +; int num_rows); +; + +; r10d = JDIMENSION img_width +; r11 = JSAMPARRAY input_buf +; r12 = JSAMPIMAGE output_buf +; r13d = JDIMENSION output_row +; r14d = int num_rows + +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM] +%define WK_NUM 8 + + align 32 + GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2) + +EXTN(jsimd_rgb_ycc_convert_avx2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args 5 + push rbx + + mov ecx, r10d + test rcx, rcx + jz near .return + + push rcx + + mov rsi, r12 + mov ecx, r13d + mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY] + mov rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY] + mov rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY] + lea rdi, [rdi+rcx*SIZEOF_JSAMPROW] + lea rbx, [rbx+rcx*SIZEOF_JSAMPROW] + lea rdx, [rdx+rcx*SIZEOF_JSAMPROW] + + pop rcx + + mov rsi, r11 + mov eax, r14d + test rax, rax + jle near .return +.rowloop: + push rdx + push rbx + push rdi + push rsi + push rcx ; col + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr0 + mov rbx, JSAMPROW [rbx] ; outptr1 + mov rdx, JSAMPROW [rdx] ; outptr2 + + cmp rcx, byte SIZEOF_YMMWORD + jae near .columnloop + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push rax + push rdx + lea rcx, [rcx+rcx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub rcx, byte SIZEOF_BYTE + movzx rax, BYTE [rsi+rcx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub rcx, byte SIZEOF_WORD + movzx rdx, WORD [rsi+rcx] + shl rax, WORD_BIT + or rax, rdx +.column_ld4: + vmovd xmmA, eax + pop rdx + pop rax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub rcx, byte SIZEOF_DWORD + vmovd xmmF, XMM_DWORD [rsi+rcx] + vpslldq xmmA, xmmA, SIZEOF_DWORD + vpor xmmA, xmmA, xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub rcx, byte SIZEOF_MMWORD + vmovq xmmB, XMM_MMWORD [rsi+rcx] + vpslldq xmmA, xmmA, SIZEOF_MMWORD + vpor xmmA, xmmA, xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + sub rcx, byte SIZEOF_XMMWORD + vmovdqu xmmB, XMM_MMWORD [rsi+rcx] + vperm2i128 ymmA, ymmA, ymmA, 1 + vpor ymmA, ymmB +.column_ld32: + test cl, SIZEOF_YMMWORD + jz short .column_ld64 + sub rcx, byte SIZEOF_YMMWORD + vmovdqa ymmF, ymmA + vmovdqu ymmA, YMMWORD [rsi+0*SIZEOF_YMMWORD] +.column_ld64: + test cl, 2*SIZEOF_YMMWORD + mov rcx, SIZEOF_YMMWORD + jz short .rgb_ycc_cnv + vmovdqa ymmB, ymmA + vmovdqu ymmA, YMMWORD [rsi+0*SIZEOF_YMMWORD] + vmovdqu ymmF, YMMWORD [rsi+1*SIZEOF_YMMWORD] + jmp short .rgb_ycc_cnv + +.columnloop: + vmovdqu ymmA, YMMWORD [rsi+0*SIZEOF_YMMWORD] + vmovdqu ymmF, YMMWORD [rsi+1*SIZEOF_YMMWORD] + vmovdqu ymmB, YMMWORD [rsi+2*SIZEOF_YMMWORD] + +.rgb_ycc_cnv: + ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05 + ; 15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; ymmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F + ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L) + ; ymmB=(1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q + ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V) + + vmovdqu ymmC, ymmA + vinserti128 ymmA, ymmF, xmmA, 0 ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05 + ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L) + vinserti128 ymmC, ymmC, xmmB, 0 ; ymmC=(1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q + ; 15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + vinserti128 ymmB, ymmB, xmmF, 0 ; ymmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F + ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V) + vperm2i128 ymmF, ymmC, ymmC, 1 ; ymmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A + ; 1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q) + + vmovdqa ymmG, ymmA + vpslldq ymmA, ymmA, 8 ; ymmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12 + ; 22 03 13 23 04 14 24 05 0G 1G 2G 0H 1H 2H 0I 1I) + vpsrldq ymmG, ymmG, 8 ; ymmG=(22 03 13 23 04 14 24 05 0G 1G 2G 0H 1H 2H 0I 1I + ; 2I 0J 1J 2J 0K 1K 2K 0L -- -- -- -- -- -- -- --) + + vpunpckhbw ymmA, ymmA, ymmF ; ymmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A + ; 0G 0O 1G 1O 2G 2O 0H 0P 1H 1P 2H 2P 0I 0Q 1I 1Q) + vpslldq ymmF, ymmF, 8 ; ymmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27 + ; 08 18 28 09 19 29 0A 1A 1L 2L 0M 1M 2M 0N 1N 2N) + + vpunpcklbw ymmG, ymmG, ymmB ; ymmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D + ; 2I 2Q 0J 0R 1J 1R 2J 2R 0K 0S 1K 1S 2K 2S 0L 0T) + vpunpckhbw ymmF, ymmF, ymmB ; ymmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F + ; 1L 1T 2L 2T 0M 0U 1M 1U 2M 2U 0N 0V 1N 1V 2N 2V) + + vmovdqa ymmD, ymmA + vpslldq ymmA, ymmA, 8 ; ymmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09 + ; 11 19 21 29 02 0A 12 1A 0G 0O 1G 1O 2G 2O 0H 0P) + vpsrldq ymmD, ymmD, 8 ; ymmD=(11 19 21 29 02 0A 12 1A 0G 0O 1G 1O 2G 2O 0H 0P + ; 1H 1P 2H 2P 0I 0Q 1I 1Q -- -- -- -- -- -- -- --) + + vpunpckhbw ymmA, ymmA, ymmG ; ymmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D + ; 0G 0K 0O 0S 1G 1K 1O 1S 2G 2K 2O 2S 0H 0L 0P 0T) + vpslldq ymmG, ymmG, 8 ; ymmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B + ; 04 0C 14 1C 24 2C 05 0D 2I 2Q 0J 0R 1J 1R 2J 2R) + + vpunpcklbw ymmD, ymmD, ymmF ; ymmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E + ; 1H 1L 1P 1T 2H 2L 2P 2T 0I 0M 0Q 0U 1I 1M 1Q 1U) + vpunpckhbw ymmG, ymmG, ymmF ; ymmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F + ; 2I 2M 2Q 2U 0J 0N 0R 0V 1J 1N 1R 1V 2J 2N 2R 2V) + + vmovdqa ymmE, ymmA + vpslldq ymmA, ymmA, 8 ; ymmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C + ; 20 24 28 2C 01 05 09 0D 0G 0K 0O 0S 1G 1K 1O 1S) + vpsrldq ymmE, ymmE, 8 ; ymmE=(20 24 28 2C 01 05 09 0D 0G 0K 0O 0S 1G 1K 1O 1S + ; 2G 2K 2O 2S 0H 0L 0P 0T -- -- -- -- -- -- -- --) + + vpunpckhbw ymmA, ymmA, ymmD ; ymmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E + ; 0G 0I 0K 0M 0O 0Q 0S 0U 1G 1I 1K 1M 1O 1Q 1S 1U) + vpslldq ymmD, ymmD, 8 ; ymmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D + ; 02 06 0A 0E 12 16 1A 1E 1H 1L 1P 1T 2H 2L 2P 2T) + + vpunpcklbw ymmE, ymmE, ymmG ; ymmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F + ; 2G 2I 2K 2M 2O 2Q 2S 2U 0H 0J 0L 0N 0P 0R 0T 0V) + vpunpckhbw ymmD, ymmD, ymmG ; ymmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F + ; 1H 1J 1L 1N 1P 1R 1T 1V 2H 2J 2L 2N 2P 2R 2T 2V) + + vpxor ymmH, ymmH, ymmH + + vmovdqa ymmC, ymmA + vpunpcklbw ymmA, ymmA, ymmH ; ymmA=(00 02 04 06 08 0A 0C 0E 0G 0I 0K 0M 0O 0Q 0S 0U) + vpunpckhbw ymmC, ymmC, ymmH ; ymmC=(10 12 14 16 18 1A 1C 1E 1G 1I 1K 1M 1O 1Q 1S 1U) + + vmovdqa ymmB, ymmE + vpunpcklbw ymmE, ymmE, ymmH ; ymmE=(20 22 24 26 28 2A 2C 2E 2G 2I 2K 2M 2O 2Q 2S 2U) + vpunpckhbw ymmB, ymmB, ymmH ; ymmB=(01 03 05 07 09 0B 0D 0F 0H 0J 0L 0N 0P 0R 0T 0V) + + vmovdqa ymmF, ymmD + vpunpcklbw ymmD, ymmD, ymmH ; ymmD=(11 13 15 17 19 1B 1D 1F 1H 1J 1L 1N 1P 1R 1T 1V) + vpunpckhbw ymmF, ymmF, ymmH ; ymmF=(21 23 25 27 29 2B 2D 2F 2H 2J 2L 2N 2P 2R 2T 2V) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub rcx, byte SIZEOF_XMMWORD/16 + vmovd xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub rcx, byte SIZEOF_XMMWORD/8 + vmovq xmmF, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE] + vpslldq xmmA, xmmA, SIZEOF_MMWORD + vpor xmmA, xmmA, xmmF +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub rcx, byte SIZEOF_XMMWORD/4 + vmovdqa xmmF, xmmA + vperm2i128 ymmF, ymmF, ymmF, 1 + vmovdqu xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE] + vpor ymmA, ymmA, ymmF +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + jz short .column_ld16 + sub rcx, byte SIZEOF_XMMWORD/2 + vmovdqa ymmF, ymmA + vmovdqu ymmA, YMMWORD [rsi+rcx*RGB_PIXELSIZE] +.column_ld16: + test cl, SIZEOF_XMMWORD + mov rcx, SIZEOF_YMMWORD + jz short .rgb_ycc_cnv + vmovdqa ymmE, ymmA + vmovdqa ymmH, ymmF + vmovdqu ymmA, YMMWORD [rsi+0*SIZEOF_YMMWORD] + vmovdqu ymmF, YMMWORD [rsi+1*SIZEOF_YMMWORD] + jmp short .rgb_ycc_cnv + +.columnloop: + vmovdqu ymmA, YMMWORD [rsi+0*SIZEOF_YMMWORD] + vmovdqu ymmF, YMMWORD [rsi+1*SIZEOF_YMMWORD] + vmovdqu ymmE, YMMWORD [rsi+2*SIZEOF_YMMWORD] + vmovdqu ymmH, YMMWORD [rsi+3*SIZEOF_YMMWORD] + +.rgb_ycc_cnv: + ; ymmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ; 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; ymmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + ; 0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + ; ymmE=(0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J + ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N) + ; ymmH=(0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R + ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V) + + vmovdqa ymmB, ymmA + vinserti128 ymmA, ymmA, xmmE, 1 ; ymmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ; 0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J) + vperm2i128 ymmE, ymmB, ymmE, 0x31 ; ymmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 + ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N) + + vmovdqa ymmB, ymmF + vinserti128 ymmF, ymmF, xmmH, 1 ; ymmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + ; 0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R) + vperm2i128 ymmH, ymmB, ymmH, 0x31 ; ymmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F + ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V) + + vmovdqa ymmD, ymmA + vpunpcklbw ymmA, ymmA, ymmE ; ymmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35 + ; 0G 0K 1G 1K 2G 2K 3G 3K 0H 0L 1H 1L 2H 2L 3H 3L) + vpunpckhbw ymmD, ymmD, ymmE ; ymmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37 + ; 0I 0M 1I 1M 2I 2M 3I 3M 0J 0N 1J 1N 2J 2N 3J 3N) + + vmovdqa ymmC, ymmF + vpunpcklbw ymmF, ymmF, ymmH ; ymmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D + ; 0O 0S 1O 1S 2O 2S 3O 3S 0P 0T 1P 1T 2P 2T 3P 3T) + vpunpckhbw ymmC, ymmC, ymmH ; ymmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F + ; 0Q 0U 1Q 1U 2Q 2U 3Q 3U 0R 0V 1R 1V 2R 2V 3R 3V) + + vmovdqa ymmB, ymmA + vpunpcklwd ymmA, ymmA, ymmF ; ymmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C + ; 0G 0K 0O 0S 1G 1K 1O 1S 2G 2K 2O 2S 3G 3K 3O 3S) + vpunpckhwd ymmB, ymmB, ymmF ; ymmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D + ; 0H 0L 0P 0T 1H 1L 1P 1T 2H 2L 2P 2T 3H 3L 3P 3T) + + vmovdqa ymmG, ymmD + vpunpcklwd ymmD, ymmD, ymmC ; ymmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E + ; 0I 0M 0Q 0U 1I 1M 1Q 1U 2I 2M 2Q 2U 3I 3M 3Q 3U) + vpunpckhwd ymmG, ymmG, ymmC ; ymmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F + ; 0J 0N 0R 0V 1J 1N 1R 1V 2J 2N 2R 2V 3J 3N 3R 3V) + + vmovdqa ymmE, ymmA + vpunpcklbw ymmA, ymmA, ymmD ; ymmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E + ; 0G 0I 0K 0M 0O 0Q 0S 0U 1G 1I 1K 1M 1O 1Q 1S 1U) + vpunpckhbw ymmE, ymmE, ymmD ; ymmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E + ; 2G 2I 2K 2M 2O 2Q 2S 2U 3G 3I 3K 3M 3O 3Q 3S 3U) + + vmovdqa ymmH, ymmB + vpunpcklbw ymmB, ymmB, ymmG ; ymmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F + ; 0H 0J 0L 0N 0P 0R 0T 0V 1H 1J 1L 1N 1P 1R 1T 1V) + vpunpckhbw ymmH, ymmH, ymmG ; ymmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F + ; 2H 2J 2L 2N 2P 2R 2T 2V 3H 3J 3L 3N 3P 3R 3T 3V) + + vpxor ymmF, ymmF, ymmF + + vmovdqa ymmC, ymmA + vpunpcklbw ymmA, ymmA, ymmF ; ymmA=(00 02 04 06 08 0A 0C 0E 0G 0I 0K 0M 0O 0Q 0S 0U) + vpunpckhbw ymmC, ymmC, ymmF ; ymmC=(10 12 14 16 18 1A 1C 1E 1G 1I 1K 1M 1O 1Q 1S 1U) + + vmovdqa ymmD, ymmB + vpunpcklbw ymmB, ymmB, ymmF ; ymmB=(01 03 05 07 09 0B 0D 0F 0H 0J 0L 0N 0P 0R 0T 0V) + vpunpckhbw ymmD, ymmD, ymmF ; ymmD=(11 13 15 17 19 1B 1D 1F 1H 1J 1L 1N 1P 1R 1T 1V) + + vmovdqa ymmG, ymmE + vpunpcklbw ymmE, ymmE, ymmF ; ymmE=(20 22 24 26 28 2A 2C 2E 2G 2I 2K 2M 2O 2Q 2S 2U) + vpunpckhbw ymmG, ymmG, ymmF ; ymmG=(30 32 34 36 38 3A 3C 3E 3G 3I 3K 3M 3O 3Q 3S 3U) + + vpunpcklbw ymmF, ymmF, ymmH + vpunpckhbw ymmH, ymmH, ymmH + vpsrlw ymmF, ymmF, BYTE_BIT ; ymmF=(21 23 25 27 29 2B 2D 2F 2H 2J 2L 2N 2P 2R 2T 2V) + vpsrlw ymmH, ymmH, BYTE_BIT ; ymmH=(31 33 35 37 39 3B 3D 3F 3H 3J 3L 3N 3P 3R 3T 3V) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; ymm0=R(02468ACEGIKMOQSU)=RE, ymm2=G(02468ACEGIKMOQSU)=GE, ymm4=B(02468ACEGIKMOQSU)=BE + ; ymm1=R(13579BDFHJLNPRTV)=RO, ymm3=G(13579BDFHJLNPRTV)=GO, ymm5=B(13579BDFHJLNPRTV)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + + vmovdqa YMMWORD [wk(0)], ymm0 ; wk(0)=RE + vmovdqa YMMWORD [wk(1)], ymm1 ; wk(1)=RO + vmovdqa YMMWORD [wk(2)], ymm4 ; wk(2)=BE + vmovdqa YMMWORD [wk(3)], ymm5 ; wk(3)=BO + + vmovdqa ymm6, ymm1 + vpunpcklwd ymm1, ymm1, ymm3 + vpunpckhwd ymm6, ymm6, ymm3 + vmovdqa ymm7, ymm1 + vmovdqa ymm4, ymm6 + vpmaddwd ymm1, ymm1, [rel PW_F0299_F0337] ; ymm1=ROL*FIX(0.299)+GOL*FIX(0.337) + vpmaddwd ymm6, ymm6, [rel PW_F0299_F0337] ; ymm6=ROH*FIX(0.299)+GOH*FIX(0.337) + vpmaddwd ymm7, ymm7, [rel PW_MF016_MF033] ; ymm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) + vpmaddwd ymm4, ymm4, [rel PW_MF016_MF033] ; ymm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) + + vmovdqa YMMWORD [wk(4)], ymm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) + vmovdqa YMMWORD [wk(5)], ymm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) + + vpxor ymm1, ymm1, ymm1 + vpxor ymm6, ymm6, ymm6 + vpunpcklwd ymm1, ymm1, ymm5 ; ymm1=BOL + vpunpckhwd ymm6, ymm6, ymm5 ; ymm6=BOH + vpsrld ymm1, ymm1, 1 ; ymm1=BOL*FIX(0.500) + vpsrld ymm6, ymm6, 1 ; ymm6=BOH*FIX(0.500) + + vmovdqa ymm5, [rel PD_ONEHALFM1_CJ] ; ymm5=[PD_ONEHALFM1_CJ] + + vpaddd ymm7, ymm7, ymm1 + vpaddd ymm4, ymm4, ymm6 + vpaddd ymm7, ymm7, ymm5 + vpaddd ymm4, ymm4, ymm5 + vpsrld ymm7, ymm7, SCALEBITS ; ymm7=CbOL + vpsrld ymm4, ymm4, SCALEBITS ; ymm4=CbOH + vpackssdw ymm7, ymm7, ymm4 ; ymm7=CbO + + vmovdqa ymm1, YMMWORD [wk(2)] ; ymm1=BE + + vmovdqa ymm6, ymm0 + vpunpcklwd ymm0, ymm0, ymm2 + vpunpckhwd ymm6, ymm6, ymm2 + vmovdqa ymm5, ymm0 + vmovdqa ymm4, ymm6 + vpmaddwd ymm0, ymm0, [rel PW_F0299_F0337] ; ymm0=REL*FIX(0.299)+GEL*FIX(0.337) + vpmaddwd ymm6, ymm6, [rel PW_F0299_F0337] ; ymm6=REH*FIX(0.299)+GEH*FIX(0.337) + vpmaddwd ymm5, ymm5, [rel PW_MF016_MF033] ; ymm5=REL*-FIX(0.168)+GEL*-FIX(0.331) + vpmaddwd ymm4, ymm4, [rel PW_MF016_MF033] ; ymm4=REH*-FIX(0.168)+GEH*-FIX(0.331) + + vmovdqa YMMWORD [wk(6)], ymm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) + vmovdqa YMMWORD [wk(7)], ymm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) + + vpxor ymm0, ymm0, ymm0 + vpxor ymm6, ymm6, ymm6 + vpunpcklwd ymm0, ymm0, ymm1 ; ymm0=BEL + vpunpckhwd ymm6, ymm6, ymm1 ; ymm6=BEH + vpsrld ymm0, ymm0, 1 ; ymm0=BEL*FIX(0.500) + vpsrld ymm6, ymm6, 1 ; ymm6=BEH*FIX(0.500) + + vmovdqa ymm1, [rel PD_ONEHALFM1_CJ] ; ymm1=[PD_ONEHALFM1_CJ] + + vpaddd ymm5, ymm5, ymm0 + vpaddd ymm4, ymm4, ymm6 + vpaddd ymm5, ymm5, ymm1 + vpaddd ymm4, ymm4, ymm1 + vpsrld ymm5, ymm5, SCALEBITS ; ymm5=CbEL + vpsrld ymm4, ymm4, SCALEBITS ; ymm4=CbEH + vpackssdw ymm5, ymm5, ymm4 ; ymm5=CbE + + vpsllw ymm7, ymm7, BYTE_BIT + vpor ymm5, ymm5, ymm7 ; ymm5=Cb + vmovdqu YMMWORD [rbx], ymm5 ; Save Cb + + vmovdqa ymm0, YMMWORD [wk(3)] ; ymm0=BO + vmovdqa ymm6, YMMWORD [wk(2)] ; ymm6=BE + vmovdqa ymm1, YMMWORD [wk(1)] ; ymm1=RO + + vmovdqa ymm4, ymm0 + vpunpcklwd ymm0, ymm0, ymm3 + vpunpckhwd ymm4, ymm4, ymm3 + vmovdqa ymm7, ymm0 + vmovdqa ymm5, ymm4 + vpmaddwd ymm0, ymm0, [rel PW_F0114_F0250] ; ymm0=BOL*FIX(0.114)+GOL*FIX(0.250) + vpmaddwd ymm4, ymm4, [rel PW_F0114_F0250] ; ymm4=BOH*FIX(0.114)+GOH*FIX(0.250) + vpmaddwd ymm7, ymm7, [rel PW_MF008_MF041] ; ymm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) + vpmaddwd ymm5, ymm5, [rel PW_MF008_MF041] ; ymm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) + + vmovdqa ymm3, [rel PD_ONEHALF] ; ymm3=[PD_ONEHALF] + + vpaddd ymm0, ymm0, YMMWORD [wk(4)] + vpaddd ymm4, ymm4, YMMWORD [wk(5)] + vpaddd ymm0, ymm0, ymm3 + vpaddd ymm4, ymm4, ymm3 + vpsrld ymm0, ymm0, SCALEBITS ; ymm0=YOL + vpsrld ymm4, ymm4, SCALEBITS ; ymm4=YOH + vpackssdw ymm0, ymm0, ymm4 ; ymm0=YO + + vpxor ymm3, ymm3, ymm3 + vpxor ymm4, ymm4, ymm4 + vpunpcklwd ymm3, ymm3, ymm1 ; ymm3=ROL + vpunpckhwd ymm4, ymm4, ymm1 ; ymm4=ROH + vpsrld ymm3, ymm3, 1 ; ymm3=ROL*FIX(0.500) + vpsrld ymm4, ymm4, 1 ; ymm4=ROH*FIX(0.500) + + vmovdqa ymm1, [rel PD_ONEHALFM1_CJ] ; ymm1=[PD_ONEHALFM1_CJ] + + vpaddd ymm7, ymm7, ymm3 + vpaddd ymm5, ymm5, ymm4 + vpaddd ymm7, ymm7, ymm1 + vpaddd ymm5, ymm5, ymm1 + vpsrld ymm7, ymm7, SCALEBITS ; ymm7=CrOL + vpsrld ymm5, ymm5, SCALEBITS ; ymm5=CrOH + vpackssdw ymm7, ymm7, ymm5 ; ymm7=CrO + + vmovdqa ymm3, YMMWORD [wk(0)] ; ymm3=RE + + vmovdqa ymm4, ymm6 + vpunpcklwd ymm6, ymm6, ymm2 + vpunpckhwd ymm4, ymm4, ymm2 + vmovdqa ymm1, ymm6 + vmovdqa ymm5, ymm4 + vpmaddwd ymm6, ymm6, [rel PW_F0114_F0250] ; ymm6=BEL*FIX(0.114)+GEL*FIX(0.250) + vpmaddwd ymm4, ymm4, [rel PW_F0114_F0250] ; ymm4=BEH*FIX(0.114)+GEH*FIX(0.250) + vpmaddwd ymm1, ymm1, [rel PW_MF008_MF041] ; ymm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) + vpmaddwd ymm5, ymm5, [rel PW_MF008_MF041] ; ymm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) + + vmovdqa ymm2, [rel PD_ONEHALF] ; ymm2=[PD_ONEHALF] + + vpaddd ymm6, ymm6, YMMWORD [wk(6)] + vpaddd ymm4, ymm4, YMMWORD [wk(7)] + vpaddd ymm6, ymm6, ymm2 + vpaddd ymm4, ymm4, ymm2 + vpsrld ymm6, ymm6, SCALEBITS ; ymm6=YEL + vpsrld ymm4, ymm4, SCALEBITS ; ymm4=YEH + vpackssdw ymm6, ymm6, ymm4 ; ymm6=YE + + vpsllw ymm0, ymm0, BYTE_BIT + vpor ymm6, ymm6, ymm0 ; ymm6=Y + vmovdqu YMMWORD [rdi], ymm6 ; Save Y + + vpxor ymm2, ymm2, ymm2 + vpxor ymm4, ymm4, ymm4 + vpunpcklwd ymm2, ymm2, ymm3 ; ymm2=REL + vpunpckhwd ymm4, ymm4, ymm3 ; ymm4=REH + vpsrld ymm2, ymm2, 1 ; ymm2=REL*FIX(0.500) + vpsrld ymm4, ymm4, 1 ; ymm4=REH*FIX(0.500) + + vmovdqa ymm0, [rel PD_ONEHALFM1_CJ] ; ymm0=[PD_ONEHALFM1_CJ] + + vpaddd ymm1, ymm1, ymm2 + vpaddd ymm5, ymm5, ymm4 + vpaddd ymm1, ymm1, ymm0 + vpaddd ymm5, ymm5, ymm0 + vpsrld ymm1, ymm1, SCALEBITS ; ymm1=CrEL + vpsrld ymm5, ymm5, SCALEBITS ; ymm5=CrEH + vpackssdw ymm1, ymm1, ymm5 ; ymm1=CrE + + vpsllw ymm7, ymm7, BYTE_BIT + vpor ymm1, ymm1, ymm7 ; ymm1=Cr + vmovdqu YMMWORD [rdx], ymm1 ; Save Cr + + sub rcx, byte SIZEOF_YMMWORD + add rsi, RGB_PIXELSIZE*SIZEOF_YMMWORD ; inptr + add rdi, byte SIZEOF_YMMWORD ; outptr0 + add rbx, byte SIZEOF_YMMWORD ; outptr1 + add rdx, byte SIZEOF_YMMWORD ; outptr2 + cmp rcx, byte SIZEOF_YMMWORD + jae near .columnloop + test rcx, rcx + jnz near .column_ld1 + + pop rcx ; col + pop rsi + pop rdi + pop rbx + pop rdx + + add rsi, byte SIZEOF_JSAMPROW ; input_buf + add rdi, byte SIZEOF_JSAMPROW + add rbx, byte SIZEOF_JSAMPROW + add rdx, byte SIZEOF_JSAMPROW + dec rax ; num_rows + jg near .rowloop + +.return: + pop rbx + vzeroupper + uncollect_args 5 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jccolext-sse2.asm b/simd/x86_64/jccolext-sse2.asm new file mode 100644 index 0000000..b1486c0 --- /dev/null +++ b/simd/x86_64/jccolext-sse2.asm @@ -0,0 +1,485 @@ +; +; jccolext.asm - colorspace conversion (64-bit SSE2) +; +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_ycc_convert_sse2(JDIMENSION img_width, JSAMPARRAY input_buf, +; JSAMPIMAGE output_buf, JDIMENSION output_row, +; int num_rows); +; + +; r10d = JDIMENSION img_width +; r11 = JSAMPARRAY input_buf +; r12 = JSAMPIMAGE output_buf +; r13d = JDIMENSION output_row +; r14d = int num_rows + +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 8 + + align 32 + GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2) + +EXTN(jsimd_rgb_ycc_convert_sse2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args 5 + push rbx + + mov ecx, r10d + test rcx, rcx + jz near .return + + push rcx + + mov rsi, r12 + mov ecx, r13d + mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY] + mov rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY] + mov rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY] + lea rdi, [rdi+rcx*SIZEOF_JSAMPROW] + lea rbx, [rbx+rcx*SIZEOF_JSAMPROW] + lea rdx, [rdx+rcx*SIZEOF_JSAMPROW] + + pop rcx + + mov rsi, r11 + mov eax, r14d + test rax, rax + jle near .return +.rowloop: + push rdx + push rbx + push rdi + push rsi + push rcx ; col + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr0 + mov rbx, JSAMPROW [rbx] ; outptr1 + mov rdx, JSAMPROW [rdx] ; outptr2 + + cmp rcx, byte SIZEOF_XMMWORD + jae near .columnloop + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push rax + push rdx + lea rcx, [rcx+rcx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub rcx, byte SIZEOF_BYTE + movzx rax, BYTE [rsi+rcx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub rcx, byte SIZEOF_WORD + movzx rdx, WORD [rsi+rcx] + shl rax, WORD_BIT + or rax, rdx +.column_ld4: + movd xmmA, eax + pop rdx + pop rax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub rcx, byte SIZEOF_DWORD + movd xmmF, XMM_DWORD [rsi+rcx] + pslldq xmmA, SIZEOF_DWORD + por xmmA, xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub rcx, byte SIZEOF_MMWORD + movq xmmB, XMM_MMWORD [rsi+rcx] + pslldq xmmA, SIZEOF_MMWORD + por xmmA, xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + movdqa xmmF, xmmA + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + mov rcx, SIZEOF_XMMWORD + jmp short .rgb_ycc_cnv +.column_ld32: + test cl, 2*SIZEOF_XMMWORD + mov rcx, SIZEOF_XMMWORD + jz short .rgb_ycc_cnv + movdqa xmmB, xmmA + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD] + jmp short .rgb_ycc_cnv + +.columnloop: + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD] + movdqu xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD] + +.rgb_ycc_cnv: + ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + movdqa xmmG, xmmA + pslldq xmmA, 8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) + psrldq xmmG, 8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) + + punpckhbw xmmA, xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) + pslldq xmmF, 8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) + + punpcklbw xmmG, xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) + punpckhbw xmmF, xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) + + movdqa xmmD, xmmA + pslldq xmmA, 8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) + psrldq xmmD, 8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) + + punpckhbw xmmA, xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) + pslldq xmmG, 8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) + + punpcklbw xmmD, xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) + punpckhbw xmmG, xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) + + movdqa xmmE, xmmA + pslldq xmmA, 8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) + psrldq xmmE, 8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) + + punpckhbw xmmA, xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + pslldq xmmD, 8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) + + punpcklbw xmmE, xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD, xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) + + pxor xmmH, xmmH + + movdqa xmmC, xmmA + punpcklbw xmmA, xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC, xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmB, xmmE + punpcklbw xmmE, xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmB, xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) + + movdqa xmmF, xmmD + punpcklbw xmmD, xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) + punpckhbw xmmF, xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub rcx, byte SIZEOF_XMMWORD/16 + movd xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub rcx, byte SIZEOF_XMMWORD/8 + movq xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE] + pslldq xmmA, SIZEOF_MMWORD + por xmmA, xmmE +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub rcx, byte SIZEOF_XMMWORD/4 + movdqa xmmE, xmmA + movdqu xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE] +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + mov rcx, SIZEOF_XMMWORD + jz short .rgb_ycc_cnv + movdqa xmmF, xmmA + movdqa xmmH, xmmE + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD] + jmp short .rgb_ycc_cnv + +.columnloop: + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD] + movdqu xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD] + +.rgb_ycc_cnv: + ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + movdqa xmmD, xmmA + punpcklbw xmmA, xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) + punpckhbw xmmD, xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) + + movdqa xmmC, xmmF + punpcklbw xmmF, xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) + punpckhbw xmmC, xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) + + movdqa xmmB, xmmA + punpcklwd xmmA, xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) + punpckhwd xmmB, xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) + + movdqa xmmG, xmmD + punpcklwd xmmD, xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) + punpckhwd xmmG, xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) + + movdqa xmmE, xmmA + punpcklbw xmmA, xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + punpckhbw xmmE, xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) + + movdqa xmmH, xmmB + punpcklbw xmmB, xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) + punpckhbw xmmH, xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) + + pxor xmmF, xmmF + + movdqa xmmC, xmmA + punpcklbw xmmA, xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC, xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmD, xmmB + punpcklbw xmmB, xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD, xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) + + movdqa xmmG, xmmE + punpcklbw xmmE, xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmG, xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) + + punpcklbw xmmF, xmmH + punpckhbw xmmH, xmmH + psrlw xmmF, BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) + psrlw xmmH, BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE + ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE + ; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=RE + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=RO + movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=BE + movdqa XMMWORD [wk(3)], xmm5 ; wk(3)=BO + + movdqa xmm6, xmm1 + punpcklwd xmm1, xmm3 + punpckhwd xmm6, xmm3 + movdqa xmm7, xmm1 + movdqa xmm4, xmm6 + pmaddwd xmm1, [rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd xmm6, [rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) + pmaddwd xmm7, [rel PW_MF016_MF033] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331) + pmaddwd xmm4, [rel PW_MF016_MF033] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331) + + movdqa XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337) + movdqa XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337) + + pxor xmm1, xmm1 + pxor xmm6, xmm6 + punpcklwd xmm1, xmm5 ; xmm1=BOL + punpckhwd xmm6, xmm5 ; xmm6=BOH + psrld xmm1, 1 ; xmm1=BOL*FIX(0.500) + psrld xmm6, 1 ; xmm6=BOH*FIX(0.500) + + movdqa xmm5, [rel PD_ONEHALFM1_CJ] ; xmm5=[PD_ONEHALFM1_CJ] + + paddd xmm7, xmm1 + paddd xmm4, xmm6 + paddd xmm7, xmm5 + paddd xmm4, xmm5 + psrld xmm7, SCALEBITS ; xmm7=CbOL + psrld xmm4, SCALEBITS ; xmm4=CbOH + packssdw xmm7, xmm4 ; xmm7=CbO + + movdqa xmm1, XMMWORD [wk(2)] ; xmm1=BE + + movdqa xmm6, xmm0 + punpcklwd xmm0, xmm2 + punpckhwd xmm6, xmm2 + movdqa xmm5, xmm0 + movdqa xmm4, xmm6 + pmaddwd xmm0, [rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd xmm6, [rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) + pmaddwd xmm5, [rel PW_MF016_MF033] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331) + pmaddwd xmm4, [rel PW_MF016_MF033] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331) + + movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337) + movdqa XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337) + + pxor xmm0, xmm0 + pxor xmm6, xmm6 + punpcklwd xmm0, xmm1 ; xmm0=BEL + punpckhwd xmm6, xmm1 ; xmm6=BEH + psrld xmm0, 1 ; xmm0=BEL*FIX(0.500) + psrld xmm6, 1 ; xmm6=BEH*FIX(0.500) + + movdqa xmm1, [rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ] + + paddd xmm5, xmm0 + paddd xmm4, xmm6 + paddd xmm5, xmm1 + paddd xmm4, xmm1 + psrld xmm5, SCALEBITS ; xmm5=CbEL + psrld xmm4, SCALEBITS ; xmm4=CbEH + packssdw xmm5, xmm4 ; xmm5=CbE + + psllw xmm7, BYTE_BIT + por xmm5, xmm7 ; xmm5=Cb + movdqa XMMWORD [rbx], xmm5 ; Save Cb + + movdqa xmm0, XMMWORD [wk(3)] ; xmm0=BO + movdqa xmm6, XMMWORD [wk(2)] ; xmm6=BE + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=RO + + movdqa xmm4, xmm0 + punpcklwd xmm0, xmm3 + punpckhwd xmm4, xmm3 + movdqa xmm7, xmm0 + movdqa xmm5, xmm4 + pmaddwd xmm0, [rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd xmm4, [rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) + pmaddwd xmm7, [rel PW_MF008_MF041] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418) + pmaddwd xmm5, [rel PW_MF008_MF041] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418) + + movdqa xmm3, [rel PD_ONEHALF] ; xmm3=[PD_ONEHALF] + + paddd xmm0, XMMWORD [wk(4)] + paddd xmm4, XMMWORD [wk(5)] + paddd xmm0, xmm3 + paddd xmm4, xmm3 + psrld xmm0, SCALEBITS ; xmm0=YOL + psrld xmm4, SCALEBITS ; xmm4=YOH + packssdw xmm0, xmm4 ; xmm0=YO + + pxor xmm3, xmm3 + pxor xmm4, xmm4 + punpcklwd xmm3, xmm1 ; xmm3=ROL + punpckhwd xmm4, xmm1 ; xmm4=ROH + psrld xmm3, 1 ; xmm3=ROL*FIX(0.500) + psrld xmm4, 1 ; xmm4=ROH*FIX(0.500) + + movdqa xmm1, [rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ] + + paddd xmm7, xmm3 + paddd xmm5, xmm4 + paddd xmm7, xmm1 + paddd xmm5, xmm1 + psrld xmm7, SCALEBITS ; xmm7=CrOL + psrld xmm5, SCALEBITS ; xmm5=CrOH + packssdw xmm7, xmm5 ; xmm7=CrO + + movdqa xmm3, XMMWORD [wk(0)] ; xmm3=RE + + movdqa xmm4, xmm6 + punpcklwd xmm6, xmm2 + punpckhwd xmm4, xmm2 + movdqa xmm1, xmm6 + movdqa xmm5, xmm4 + pmaddwd xmm6, [rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd xmm4, [rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) + pmaddwd xmm1, [rel PW_MF008_MF041] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418) + pmaddwd xmm5, [rel PW_MF008_MF041] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418) + + movdqa xmm2, [rel PD_ONEHALF] ; xmm2=[PD_ONEHALF] + + paddd xmm6, XMMWORD [wk(6)] + paddd xmm4, XMMWORD [wk(7)] + paddd xmm6, xmm2 + paddd xmm4, xmm2 + psrld xmm6, SCALEBITS ; xmm6=YEL + psrld xmm4, SCALEBITS ; xmm4=YEH + packssdw xmm6, xmm4 ; xmm6=YE + + psllw xmm0, BYTE_BIT + por xmm6, xmm0 ; xmm6=Y + movdqa XMMWORD [rdi], xmm6 ; Save Y + + pxor xmm2, xmm2 + pxor xmm4, xmm4 + punpcklwd xmm2, xmm3 ; xmm2=REL + punpckhwd xmm4, xmm3 ; xmm4=REH + psrld xmm2, 1 ; xmm2=REL*FIX(0.500) + psrld xmm4, 1 ; xmm4=REH*FIX(0.500) + + movdqa xmm0, [rel PD_ONEHALFM1_CJ] ; xmm0=[PD_ONEHALFM1_CJ] + + paddd xmm1, xmm2 + paddd xmm5, xmm4 + paddd xmm1, xmm0 + paddd xmm5, xmm0 + psrld xmm1, SCALEBITS ; xmm1=CrEL + psrld xmm5, SCALEBITS ; xmm5=CrEH + packssdw xmm1, xmm5 ; xmm1=CrE + + psllw xmm7, BYTE_BIT + por xmm1, xmm7 ; xmm1=Cr + movdqa XMMWORD [rdx], xmm1 ; Save Cr + + sub rcx, byte SIZEOF_XMMWORD + add rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr + add rdi, byte SIZEOF_XMMWORD ; outptr0 + add rbx, byte SIZEOF_XMMWORD ; outptr1 + add rdx, byte SIZEOF_XMMWORD ; outptr2 + cmp rcx, byte SIZEOF_XMMWORD + jae near .columnloop + test rcx, rcx + jnz near .column_ld1 + + pop rcx ; col + pop rsi + pop rdi + pop rbx + pop rdx + + add rsi, byte SIZEOF_JSAMPROW ; input_buf + add rdi, byte SIZEOF_JSAMPROW + add rbx, byte SIZEOF_JSAMPROW + add rdx, byte SIZEOF_JSAMPROW + dec rax ; num_rows + jg near .rowloop + +.return: + pop rbx + uncollect_args 5 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jccolor-avx2.asm b/simd/x86_64/jccolor-avx2.asm new file mode 100644 index 0000000..f9f4be0 --- /dev/null +++ b/simd/x86_64/jccolor-avx2.asm @@ -0,0 +1,123 @@ +; +; jccolor.asm - colorspace conversion (64-bit AVX2) +; +; Copyright (C) 2009, 2016, D. R. Commander. +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_081 equ 5329 ; FIX(0.08131) +F_0_114 equ 7471 ; FIX(0.11400) +F_0_168 equ 11059 ; FIX(0.16874) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_331 equ 21709 ; FIX(0.33126) +F_0_418 equ 27439 ; FIX(0.41869) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_rgb_ycc_convert_avx2) + +EXTN(jconst_rgb_ycc_convert_avx2): + +PW_F0299_F0337 times 8 dw F_0_299, F_0_337 +PW_F0114_F0250 times 8 dw F_0_114, F_0_250 +PW_MF016_MF033 times 8 dw -F_0_168, -F_0_331 +PW_MF008_MF041 times 8 dw -F_0_081, -F_0_418 +PD_ONEHALFM1_CJ times 8 dd (1 << (SCALEBITS - 1)) - 1 + \ + (CENTERJSAMPLE << SCALEBITS) +PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1)) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +%include "jccolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_avx2 jsimd_extrgb_ycc_convert_avx2 +%include "jccolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_ycc_convert_avx2 jsimd_extrgbx_ycc_convert_avx2 +%include "jccolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_avx2 jsimd_extbgr_ycc_convert_avx2 +%include "jccolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_ycc_convert_avx2 jsimd_extbgrx_ycc_convert_avx2 +%include "jccolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_avx2 jsimd_extxbgr_ycc_convert_avx2 +%include "jccolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_avx2 jsimd_extxrgb_ycc_convert_avx2 +%include "jccolext-avx2.asm" diff --git a/simd/x86_64/jccolor-sse2.asm b/simd/x86_64/jccolor-sse2.asm new file mode 100644 index 0000000..3e46601 --- /dev/null +++ b/simd/x86_64/jccolor-sse2.asm @@ -0,0 +1,122 @@ +; +; jccolor.asm - colorspace conversion (64-bit SSE2) +; +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_081 equ 5329 ; FIX(0.08131) +F_0_114 equ 7471 ; FIX(0.11400) +F_0_168 equ 11059 ; FIX(0.16874) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_331 equ 21709 ; FIX(0.33126) +F_0_418 equ 27439 ; FIX(0.41869) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_rgb_ycc_convert_sse2) + +EXTN(jconst_rgb_ycc_convert_sse2): + +PW_F0299_F0337 times 4 dw F_0_299, F_0_337 +PW_F0114_F0250 times 4 dw F_0_114, F_0_250 +PW_MF016_MF033 times 4 dw -F_0_168, -F_0_331 +PW_MF008_MF041 times 4 dw -F_0_081, -F_0_418 +PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS - 1)) - 1 + \ + (CENTERJSAMPLE << SCALEBITS) +PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1)) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2 +%include "jccolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2 +%include "jccolext-sse2.asm" diff --git a/simd/x86_64/jcgray-avx2.asm b/simd/x86_64/jcgray-avx2.asm new file mode 100644 index 0000000..0ec2410 --- /dev/null +++ b/simd/x86_64/jcgray-avx2.asm @@ -0,0 +1,115 @@ +; +; jcgray.asm - grayscale colorspace conversion (64-bit AVX2) +; +; Copyright (C) 2011, 2016, D. R. Commander. +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_114 equ 7471 ; FIX(0.11400) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_rgb_gray_convert_avx2) + +EXTN(jconst_rgb_gray_convert_avx2): + +PW_F0299_F0337 times 8 dw F_0_299, F_0_337 +PW_F0114_F0250 times 8 dw F_0_114, F_0_250 +PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1)) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +%include "jcgryext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_gray_convert_avx2 jsimd_extrgb_gray_convert_avx2 +%include "jcgryext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_gray_convert_avx2 jsimd_extrgbx_gray_convert_avx2 +%include "jcgryext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_gray_convert_avx2 jsimd_extbgr_gray_convert_avx2 +%include "jcgryext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_gray_convert_avx2 jsimd_extbgrx_gray_convert_avx2 +%include "jcgryext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_gray_convert_avx2 jsimd_extxbgr_gray_convert_avx2 +%include "jcgryext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_gray_convert_avx2 jsimd_extxrgb_gray_convert_avx2 +%include "jcgryext-avx2.asm" diff --git a/simd/x86_64/jcgray-sse2.asm b/simd/x86_64/jcgray-sse2.asm new file mode 100644 index 0000000..edf9222 --- /dev/null +++ b/simd/x86_64/jcgray-sse2.asm @@ -0,0 +1,114 @@ +; +; jcgray.asm - grayscale colorspace conversion (64-bit SSE2) +; +; Copyright (C) 2011, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_114 equ 7471 ; FIX(0.11400) +F_0_250 equ 16384 ; FIX(0.25000) +F_0_299 equ 19595 ; FIX(0.29900) +F_0_587 equ 38470 ; FIX(0.58700) +F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_rgb_gray_convert_sse2) + +EXTN(jconst_rgb_gray_convert_sse2): + +PW_F0299_F0337 times 4 dw F_0_299, F_0_337 +PW_F0114_F0250 times 4 dw F_0_114, F_0_250 +PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1)) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extrgb_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extrgbx_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extbgr_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extbgrx_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extxbgr_gray_convert_sse2 +%include "jcgryext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_rgb_gray_convert_sse2 jsimd_extxrgb_gray_convert_sse2 +%include "jcgryext-sse2.asm" diff --git a/simd/x86_64/jcgryext-avx2.asm b/simd/x86_64/jcgryext-avx2.asm new file mode 100644 index 0000000..79e2aa0 --- /dev/null +++ b/simd/x86_64/jcgryext-avx2.asm @@ -0,0 +1,439 @@ +; +; jcgryext.asm - grayscale colorspace conversion (64-bit AVX2) +; +; Copyright (C) 2011, 2016, D. R. Commander. +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_gray_convert_avx2(JDIMENSION img_width, JSAMPARRAY input_buf, +; JSAMPIMAGE output_buf, JDIMENSION output_row, +; int num_rows); +; + +; r10d = JDIMENSION img_width +; r11 = JSAMPARRAY input_buf +; r12 = JSAMPIMAGE output_buf +; r13d = JDIMENSION output_row +; r14d = int num_rows + +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM] +%define WK_NUM 2 + + align 32 + GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2) + +EXTN(jsimd_rgb_gray_convert_avx2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args 5 + push rbx + + mov ecx, r10d + test rcx, rcx + jz near .return + + push rcx + + mov rsi, r12 + mov ecx, r13d + mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY] + lea rdi, [rdi+rcx*SIZEOF_JSAMPROW] + + pop rcx + + mov rsi, r11 + mov eax, r14d + test rax, rax + jle near .return +.rowloop: + push rdi + push rsi + push rcx ; col + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr0 + + cmp rcx, byte SIZEOF_YMMWORD + jae near .columnloop + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push rax + push rdx + lea rcx, [rcx+rcx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub rcx, byte SIZEOF_BYTE + movzx rax, BYTE [rsi+rcx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub rcx, byte SIZEOF_WORD + movzx rdx, WORD [rsi+rcx] + shl rax, WORD_BIT + or rax, rdx +.column_ld4: + vmovd xmmA, eax + pop rdx + pop rax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub rcx, byte SIZEOF_DWORD + vmovd xmmF, XMM_DWORD [rsi+rcx] + vpslldq xmmA, xmmA, SIZEOF_DWORD + vpor xmmA, xmmA, xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub rcx, byte SIZEOF_MMWORD + vmovq xmmB, XMM_MMWORD [rsi+rcx] + vpslldq xmmA, xmmA, SIZEOF_MMWORD + vpor xmmA, xmmA, xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + sub rcx, byte SIZEOF_XMMWORD + vmovdqu xmmB, XMM_MMWORD [rsi+rcx] + vperm2i128 ymmA, ymmA, ymmA, 1 + vpor ymmA, ymmB +.column_ld32: + test cl, SIZEOF_YMMWORD + jz short .column_ld64 + sub rcx, byte SIZEOF_YMMWORD + vmovdqa ymmF, ymmA + vmovdqu ymmA, YMMWORD [rsi+0*SIZEOF_YMMWORD] +.column_ld64: + test cl, 2*SIZEOF_YMMWORD + mov rcx, SIZEOF_YMMWORD + jz short .rgb_gray_cnv + vmovdqa ymmB, ymmA + vmovdqu ymmA, YMMWORD [rsi+0*SIZEOF_YMMWORD] + vmovdqu ymmF, YMMWORD [rsi+1*SIZEOF_YMMWORD] + jmp short .rgb_gray_cnv + +.columnloop: + vmovdqu ymmA, YMMWORD [rsi+0*SIZEOF_YMMWORD] + vmovdqu ymmF, YMMWORD [rsi+1*SIZEOF_YMMWORD] + vmovdqu ymmB, YMMWORD [rsi+2*SIZEOF_YMMWORD] + +.rgb_gray_cnv: + ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05 + ; 15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; ymmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F + ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L) + ; ymmB=(1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q + ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V) + + vmovdqu ymmC, ymmA + vinserti128 ymmA, ymmF, xmmA, 0 ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05 + ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L) + vinserti128 ymmC, ymmC, xmmB, 0 ; ymmC=(1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q + ; 15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + vinserti128 ymmB, ymmB, xmmF, 0 ; ymmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F + ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V) + vperm2i128 ymmF, ymmC, ymmC, 1 ; ymmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A + ; 1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q) + + vmovdqa ymmG, ymmA + vpslldq ymmA, ymmA, 8 ; ymmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12 + ; 22 03 13 23 04 14 24 05 0G 1G 2G 0H 1H 2H 0I 1I) + vpsrldq ymmG, ymmG, 8 ; ymmG=(22 03 13 23 04 14 24 05 0G 1G 2G 0H 1H 2H 0I 1I + ; 2I 0J 1J 2J 0K 1K 2K 0L -- -- -- -- -- -- -- --) + + vpunpckhbw ymmA, ymmA, ymmF ; ymmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A + ; 0G 0O 1G 1O 2G 2O 0H 0P 1H 1P 2H 2P 0I 0Q 1I 1Q) + vpslldq ymmF, ymmF, 8 ; ymmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27 + ; 08 18 28 09 19 29 0A 1A 1L 2L 0M 1M 2M 0N 1N 2N) + + vpunpcklbw ymmG, ymmG, ymmB ; ymmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D + ; 2I 2Q 0J 0R 1J 1R 2J 2R 0K 0S 1K 1S 2K 2S 0L 0T) + vpunpckhbw ymmF, ymmF, ymmB ; ymmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F + ; 1L 1T 2L 2T 0M 0U 1M 1U 2M 2U 0N 0V 1N 1V 2N 2V) + + vmovdqa ymmD, ymmA + vpslldq ymmA, ymmA, 8 ; ymmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09 + ; 11 19 21 29 02 0A 12 1A 0G 0O 1G 1O 2G 2O 0H 0P) + vpsrldq ymmD, ymmD, 8 ; ymmD=(11 19 21 29 02 0A 12 1A 0G 0O 1G 1O 2G 2O 0H 0P + ; 1H 1P 2H 2P 0I 0Q 1I 1Q -- -- -- -- -- -- -- --) + + vpunpckhbw ymmA, ymmA, ymmG ; ymmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D + ; 0G 0K 0O 0S 1G 1K 1O 1S 2G 2K 2O 2S 0H 0L 0P 0T) + vpslldq ymmG, ymmG, 8 ; ymmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B + ; 04 0C 14 1C 24 2C 05 0D 2I 2Q 0J 0R 1J 1R 2J 2R) + + vpunpcklbw ymmD, ymmD, ymmF ; ymmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E + ; 1H 1L 1P 1T 2H 2L 2P 2T 0I 0M 0Q 0U 1I 1M 1Q 1U) + vpunpckhbw ymmG, ymmG, ymmF ; ymmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F + ; 2I 2M 2Q 2U 0J 0N 0R 0V 1J 1N 1R 1V 2J 2N 2R 2V) + + vmovdqa ymmE, ymmA + vpslldq ymmA, ymmA, 8 ; ymmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C + ; 20 24 28 2C 01 05 09 0D 0G 0K 0O 0S 1G 1K 1O 1S) + vpsrldq ymmE, ymmE, 8 ; ymmE=(20 24 28 2C 01 05 09 0D 0G 0K 0O 0S 1G 1K 1O 1S + ; 2G 2K 2O 2S 0H 0L 0P 0T -- -- -- -- -- -- -- --) + + vpunpckhbw ymmA, ymmA, ymmD ; ymmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E + ; 0G 0I 0K 0M 0O 0Q 0S 0U 1G 1I 1K 1M 1O 1Q 1S 1U) + vpslldq ymmD, ymmD, 8 ; ymmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D + ; 02 06 0A 0E 12 16 1A 1E 1H 1L 1P 1T 2H 2L 2P 2T) + + vpunpcklbw ymmE, ymmE, ymmG ; ymmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F + ; 2G 2I 2K 2M 2O 2Q 2S 2U 0H 0J 0L 0N 0P 0R 0T 0V) + vpunpckhbw ymmD, ymmD, ymmG ; ymmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F + ; 1H 1J 1L 1N 1P 1R 1T 1V 2H 2J 2L 2N 2P 2R 2T 2V) + + vpxor ymmH, ymmH, ymmH + + vmovdqa ymmC, ymmA + vpunpcklbw ymmA, ymmA, ymmH ; ymmA=(00 02 04 06 08 0A 0C 0E 0G 0I 0K 0M 0O 0Q 0S 0U) + vpunpckhbw ymmC, ymmC, ymmH ; ymmC=(10 12 14 16 18 1A 1C 1E 1G 1I 1K 1M 1O 1Q 1S 1U) + + vmovdqa ymmB, ymmE + vpunpcklbw ymmE, ymmE, ymmH ; ymmE=(20 22 24 26 28 2A 2C 2E 2G 2I 2K 2M 2O 2Q 2S 2U) + vpunpckhbw ymmB, ymmB, ymmH ; ymmB=(01 03 05 07 09 0B 0D 0F 0H 0J 0L 0N 0P 0R 0T 0V) + + vmovdqa ymmF, ymmD + vpunpcklbw ymmD, ymmD, ymmH ; ymmD=(11 13 15 17 19 1B 1D 1F 1H 1J 1L 1N 1P 1R 1T 1V) + vpunpckhbw ymmF, ymmF, ymmH ; ymmF=(21 23 25 27 29 2B 2D 2F 2H 2J 2L 2N 2P 2R 2T 2V) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub rcx, byte SIZEOF_XMMWORD/16 + vmovd xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub rcx, byte SIZEOF_XMMWORD/8 + vmovq xmmF, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE] + vpslldq xmmA, xmmA, SIZEOF_MMWORD + vpor xmmA, xmmA, xmmF +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub rcx, byte SIZEOF_XMMWORD/4 + vmovdqa xmmF, xmmA + vperm2i128 ymmF, ymmF, ymmF, 1 + vmovdqu xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE] + vpor ymmA, ymmA, ymmF +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + jz short .column_ld16 + sub rcx, byte SIZEOF_XMMWORD/2 + vmovdqa ymmF, ymmA + vmovdqu ymmA, YMMWORD [rsi+rcx*RGB_PIXELSIZE] +.column_ld16: + test cl, SIZEOF_XMMWORD + mov rcx, SIZEOF_YMMWORD + jz short .rgb_gray_cnv + vmovdqa ymmE, ymmA + vmovdqa ymmH, ymmF + vmovdqu ymmA, YMMWORD [rsi+0*SIZEOF_YMMWORD] + vmovdqu ymmF, YMMWORD [rsi+1*SIZEOF_YMMWORD] + jmp short .rgb_gray_cnv + +.columnloop: + vmovdqu ymmA, YMMWORD [rsi+0*SIZEOF_YMMWORD] + vmovdqu ymmF, YMMWORD [rsi+1*SIZEOF_YMMWORD] + vmovdqu ymmE, YMMWORD [rsi+2*SIZEOF_YMMWORD] + vmovdqu ymmH, YMMWORD [rsi+3*SIZEOF_YMMWORD] + +.rgb_gray_cnv: + ; ymmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ; 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; ymmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + ; 0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + ; ymmE=(0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J + ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N) + ; ymmH=(0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R + ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V) + + vmovdqa ymmB, ymmA + vinserti128 ymmA, ymmA, xmmE, 1 ; ymmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ; 0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J) + vperm2i128 ymmE, ymmB, ymmE, 0x31 ; ymmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 + ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N) + + vmovdqa ymmB, ymmF + vinserti128 ymmF, ymmF, xmmH, 1 ; ymmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + ; 0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R) + vperm2i128 ymmH, ymmB, ymmH, 0x31 ; ymmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F + ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V) + + vmovdqa ymmD, ymmA + vpunpcklbw ymmA, ymmA, ymmE ; ymmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35 + ; 0G 0K 1G 1K 2G 2K 3G 3K 0H 0L 1H 1L 2H 2L 3H 3L) + vpunpckhbw ymmD, ymmD, ymmE ; ymmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37 + ; 0I 0M 1I 1M 2I 2M 3I 3M 0J 0N 1J 1N 2J 2N 3J 3N) + + vmovdqa ymmC, ymmF + vpunpcklbw ymmF, ymmF, ymmH ; ymmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D + ; 0O 0S 1O 1S 2O 2S 3O 3S 0P 0T 1P 1T 2P 2T 3P 3T) + vpunpckhbw ymmC, ymmC, ymmH ; ymmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F + ; 0Q 0U 1Q 1U 2Q 2U 3Q 3U 0R 0V 1R 1V 2R 2V 3R 3V) + + vmovdqa ymmB, ymmA + vpunpcklwd ymmA, ymmA, ymmF ; ymmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C + ; 0G 0K 0O 0S 1G 1K 1O 1S 2G 2K 2O 2S 3G 3K 3O 3S) + vpunpckhwd ymmB, ymmB, ymmF ; ymmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D + ; 0H 0L 0P 0T 1H 1L 1P 1T 2H 2L 2P 2T 3H 3L 3P 3T) + + vmovdqa ymmG, ymmD + vpunpcklwd ymmD, ymmD, ymmC ; ymmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E + ; 0I 0M 0Q 0U 1I 1M 1Q 1U 2I 2M 2Q 2U 3I 3M 3Q 3U) + vpunpckhwd ymmG, ymmG, ymmC ; ymmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F + ; 0J 0N 0R 0V 1J 1N 1R 1V 2J 2N 2R 2V 3J 3N 3R 3V) + + vmovdqa ymmE, ymmA + vpunpcklbw ymmA, ymmA, ymmD ; ymmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E + ; 0G 0I 0K 0M 0O 0Q 0S 0U 1G 1I 1K 1M 1O 1Q 1S 1U) + vpunpckhbw ymmE, ymmE, ymmD ; ymmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E + ; 2G 2I 2K 2M 2O 2Q 2S 2U 3G 3I 3K 3M 3O 3Q 3S 3U) + + vmovdqa ymmH, ymmB + vpunpcklbw ymmB, ymmB, ymmG ; ymmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F + ; 0H 0J 0L 0N 0P 0R 0T 0V 1H 1J 1L 1N 1P 1R 1T 1V) + vpunpckhbw ymmH, ymmH, ymmG ; ymmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F + ; 2H 2J 2L 2N 2P 2R 2T 2V 3H 3J 3L 3N 3P 3R 3T 3V) + + vpxor ymmF, ymmF, ymmF + + vmovdqa ymmC, ymmA + vpunpcklbw ymmA, ymmA, ymmF ; ymmA=(00 02 04 06 08 0A 0C 0E 0G 0I 0K 0M 0O 0Q 0S 0U) + vpunpckhbw ymmC, ymmC, ymmF ; ymmC=(10 12 14 16 18 1A 1C 1E 1G 1I 1K 1M 1O 1Q 1S 1U) + + vmovdqa ymmD, ymmB + vpunpcklbw ymmB, ymmB, ymmF ; ymmB=(01 03 05 07 09 0B 0D 0F 0H 0J 0L 0N 0P 0R 0T 0V) + vpunpckhbw ymmD, ymmD, ymmF ; ymmD=(11 13 15 17 19 1B 1D 1F 1H 1J 1L 1N 1P 1R 1T 1V) + + vmovdqa ymmG, ymmE + vpunpcklbw ymmE, ymmE, ymmF ; ymmE=(20 22 24 26 28 2A 2C 2E 2G 2I 2K 2M 2O 2Q 2S 2U) + vpunpckhbw ymmG, ymmG, ymmF ; ymmG=(30 32 34 36 38 3A 3C 3E 3G 3I 3K 3M 3O 3Q 3S 3U) + + vpunpcklbw ymmF, ymmF, ymmH + vpunpckhbw ymmH, ymmH, ymmH + vpsrlw ymmF, ymmF, BYTE_BIT ; ymmF=(21 23 25 27 29 2B 2D 2F 2H 2J 2L 2N 2P 2R 2T 2V) + vpsrlw ymmH, ymmH, BYTE_BIT ; ymmH=(31 33 35 37 39 3B 3D 3F 3H 3J 3L 3N 3P 3R 3T 3V) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; ymm0=R(02468ACEGIKMOQSU)=RE, ymm2=G(02468ACEGIKMOQSU)=GE, ymm4=B(02468ACEGIKMOQSU)=BE + ; ymm1=R(13579BDFHJLNPRTV)=RO, ymm3=G(13579BDFHJLNPRTV)=GO, ymm5=B(13579BDFHJLNPRTV)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + + vmovdqa ymm6, ymm1 + vpunpcklwd ymm1, ymm1, ymm3 + vpunpckhwd ymm6, ymm6, ymm3 + vpmaddwd ymm1, ymm1, [rel PW_F0299_F0337] ; ymm1=ROL*FIX(0.299)+GOL*FIX(0.337) + vpmaddwd ymm6, ymm6, [rel PW_F0299_F0337] ; ymm6=ROH*FIX(0.299)+GOH*FIX(0.337) + + vmovdqa ymm7, ymm6 ; ymm7=ROH*FIX(0.299)+GOH*FIX(0.337) + + vmovdqa ymm6, ymm0 + vpunpcklwd ymm0, ymm0, ymm2 + vpunpckhwd ymm6, ymm6, ymm2 + vpmaddwd ymm0, ymm0, [rel PW_F0299_F0337] ; ymm0=REL*FIX(0.299)+GEL*FIX(0.337) + vpmaddwd ymm6, ymm6, [rel PW_F0299_F0337] ; ymm6=REH*FIX(0.299)+GEH*FIX(0.337) + + vmovdqa YMMWORD [wk(0)], ymm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337) + vmovdqa YMMWORD [wk(1)], ymm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337) + + vmovdqa ymm0, ymm5 ; ymm0=BO + vmovdqa ymm6, ymm4 ; ymm6=BE + + vmovdqa ymm4, ymm0 + vpunpcklwd ymm0, ymm0, ymm3 + vpunpckhwd ymm4, ymm4, ymm3 + vpmaddwd ymm0, ymm0, [rel PW_F0114_F0250] ; ymm0=BOL*FIX(0.114)+GOL*FIX(0.250) + vpmaddwd ymm4, ymm4, [rel PW_F0114_F0250] ; ymm4=BOH*FIX(0.114)+GOH*FIX(0.250) + + vmovdqa ymm3, [rel PD_ONEHALF] ; ymm3=[PD_ONEHALF] + + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm4, ymm4, ymm7 + vpaddd ymm0, ymm0, ymm3 + vpaddd ymm4, ymm4, ymm3 + vpsrld ymm0, ymm0, SCALEBITS ; ymm0=YOL + vpsrld ymm4, ymm4, SCALEBITS ; ymm4=YOH + vpackssdw ymm0, ymm0, ymm4 ; ymm0=YO + + vmovdqa ymm4, ymm6 + vpunpcklwd ymm6, ymm6, ymm2 + vpunpckhwd ymm4, ymm4, ymm2 + vpmaddwd ymm6, ymm6, [rel PW_F0114_F0250] ; ymm6=BEL*FIX(0.114)+GEL*FIX(0.250) + vpmaddwd ymm4, ymm4, [rel PW_F0114_F0250] ; ymm4=BEH*FIX(0.114)+GEH*FIX(0.250) + + vmovdqa ymm2, [rel PD_ONEHALF] ; ymm2=[PD_ONEHALF] + + vpaddd ymm6, ymm6, YMMWORD [wk(0)] + vpaddd ymm4, ymm4, YMMWORD [wk(1)] + vpaddd ymm6, ymm6, ymm2 + vpaddd ymm4, ymm4, ymm2 + vpsrld ymm6, ymm6, SCALEBITS ; ymm6=YEL + vpsrld ymm4, ymm4, SCALEBITS ; ymm4=YEH + vpackssdw ymm6, ymm6, ymm4 ; ymm6=YE + + vpsllw ymm0, ymm0, BYTE_BIT + vpor ymm6, ymm6, ymm0 ; ymm6=Y + vmovdqu YMMWORD [rdi], ymm6 ; Save Y + + sub rcx, byte SIZEOF_YMMWORD + add rsi, RGB_PIXELSIZE*SIZEOF_YMMWORD ; inptr + add rdi, byte SIZEOF_YMMWORD ; outptr0 + cmp rcx, byte SIZEOF_YMMWORD + jae near .columnloop + test rcx, rcx + jnz near .column_ld1 + + pop rcx ; col + pop rsi + pop rdi + + add rsi, byte SIZEOF_JSAMPROW ; input_buf + add rdi, byte SIZEOF_JSAMPROW + dec rax ; num_rows + jg near .rowloop + +.return: + pop rbx + vzeroupper + uncollect_args 5 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jcgryext-sse2.asm b/simd/x86_64/jcgryext-sse2.asm new file mode 100644 index 0000000..9c3ae5e --- /dev/null +++ b/simd/x86_64/jcgryext-sse2.asm @@ -0,0 +1,364 @@ +; +; jcgryext.asm - grayscale colorspace conversion (64-bit SSE2) +; +; Copyright (C) 2011, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_rgb_gray_convert_sse2(JDIMENSION img_width, JSAMPARRAY input_buf, +; JSAMPIMAGE output_buf, JDIMENSION output_row, +; int num_rows); +; + +; r10d = JDIMENSION img_width +; r11 = JSAMPARRAY input_buf +; r12 = JSAMPIMAGE output_buf +; r13d = JDIMENSION output_row +; r14d = int num_rows + +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 32 + GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2) + +EXTN(jsimd_rgb_gray_convert_sse2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args 5 + push rbx + + mov ecx, r10d + test rcx, rcx + jz near .return + + push rcx + + mov rsi, r12 + mov ecx, r13d + mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY] + lea rdi, [rdi+rcx*SIZEOF_JSAMPROW] + + pop rcx + + mov rsi, r11 + mov eax, r14d + test rax, rax + jle near .return +.rowloop: + push rdi + push rsi + push rcx ; col + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr0 + + cmp rcx, byte SIZEOF_XMMWORD + jae near .columnloop + +%if RGB_PIXELSIZE == 3 ; --------------- + +.column_ld1: + push rax + push rdx + lea rcx, [rcx+rcx*2] ; imul ecx,RGB_PIXELSIZE + test cl, SIZEOF_BYTE + jz short .column_ld2 + sub rcx, byte SIZEOF_BYTE + movzx rax, BYTE [rsi+rcx] +.column_ld2: + test cl, SIZEOF_WORD + jz short .column_ld4 + sub rcx, byte SIZEOF_WORD + movzx rdx, WORD [rsi+rcx] + shl rax, WORD_BIT + or rax, rdx +.column_ld4: + movd xmmA, eax + pop rdx + pop rax + test cl, SIZEOF_DWORD + jz short .column_ld8 + sub rcx, byte SIZEOF_DWORD + movd xmmF, XMM_DWORD [rsi+rcx] + pslldq xmmA, SIZEOF_DWORD + por xmmA, xmmF +.column_ld8: + test cl, SIZEOF_MMWORD + jz short .column_ld16 + sub rcx, byte SIZEOF_MMWORD + movq xmmB, XMM_MMWORD [rsi+rcx] + pslldq xmmA, SIZEOF_MMWORD + por xmmA, xmmB +.column_ld16: + test cl, SIZEOF_XMMWORD + jz short .column_ld32 + movdqa xmmF, xmmA + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + mov rcx, SIZEOF_XMMWORD + jmp short .rgb_gray_cnv +.column_ld32: + test cl, 2*SIZEOF_XMMWORD + mov rcx, SIZEOF_XMMWORD + jz short .rgb_gray_cnv + movdqa xmmB, xmmA + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD] + jmp short .rgb_gray_cnv + +.columnloop: + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD] + movdqu xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD] + +.rgb_gray_cnv: + ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + movdqa xmmG, xmmA + pslldq xmmA, 8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12) + psrldq xmmG, 8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --) + + punpckhbw xmmA, xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A) + pslldq xmmF, 8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27) + + punpcklbw xmmG, xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D) + punpckhbw xmmF, xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F) + + movdqa xmmD, xmmA + pslldq xmmA, 8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09) + psrldq xmmD, 8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --) + + punpckhbw xmmA, xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D) + pslldq xmmG, 8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B) + + punpcklbw xmmD, xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E) + punpckhbw xmmG, xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F) + + movdqa xmmE, xmmA + pslldq xmmA, 8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C) + psrldq xmmE, 8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --) + + punpckhbw xmmA, xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + pslldq xmmD, 8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D) + + punpcklbw xmmE, xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD, xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F) + + pxor xmmH, xmmH + + movdqa xmmC, xmmA + punpcklbw xmmA, xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC, xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmB, xmmE + punpcklbw xmmE, xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmB, xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F) + + movdqa xmmF, xmmD + punpcklbw xmmD, xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F) + punpckhbw xmmF, xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F) + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +.column_ld1: + test cl, SIZEOF_XMMWORD/16 + jz short .column_ld2 + sub rcx, byte SIZEOF_XMMWORD/16 + movd xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE] +.column_ld2: + test cl, SIZEOF_XMMWORD/8 + jz short .column_ld4 + sub rcx, byte SIZEOF_XMMWORD/8 + movq xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE] + pslldq xmmA, SIZEOF_MMWORD + por xmmA, xmmE +.column_ld4: + test cl, SIZEOF_XMMWORD/4 + jz short .column_ld8 + sub rcx, byte SIZEOF_XMMWORD/4 + movdqa xmmE, xmmA + movdqu xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE] +.column_ld8: + test cl, SIZEOF_XMMWORD/2 + mov rcx, SIZEOF_XMMWORD + jz short .rgb_gray_cnv + movdqa xmmF, xmmA + movdqa xmmH, xmmE + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD] + jmp short .rgb_gray_cnv + +.columnloop: + movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD] + movdqu xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD] + movdqu xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD] + +.rgb_gray_cnv: + ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + movdqa xmmD, xmmA + punpcklbw xmmA, xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35) + punpckhbw xmmD, xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37) + + movdqa xmmC, xmmF + punpcklbw xmmF, xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D) + punpckhbw xmmC, xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F) + + movdqa xmmB, xmmA + punpcklwd xmmA, xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C) + punpckhwd xmmB, xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D) + + movdqa xmmG, xmmD + punpcklwd xmmD, xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E) + punpckhwd xmmG, xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F) + + movdqa xmmE, xmmA + punpcklbw xmmA, xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E) + punpckhbw xmmE, xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E) + + movdqa xmmH, xmmB + punpcklbw xmmB, xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F) + punpckhbw xmmH, xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F) + + pxor xmmF, xmmF + + movdqa xmmC, xmmA + punpcklbw xmmA, xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E) + punpckhbw xmmC, xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E) + + movdqa xmmD, xmmB + punpcklbw xmmB, xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F) + punpckhbw xmmD, xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F) + + movdqa xmmG, xmmE + punpcklbw xmmE, xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E) + punpckhbw xmmG, xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E) + + punpcklbw xmmF, xmmH + punpckhbw xmmH, xmmH + psrlw xmmF, BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F) + psrlw xmmH, BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F) + +%endif ; RGB_PIXELSIZE ; --------------- + + ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE + ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO + + ; (Original) + ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B + ; + ; (This implementation) + ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G + + movdqa xmm6, xmm1 + punpcklwd xmm1, xmm3 + punpckhwd xmm6, xmm3 + pmaddwd xmm1, [rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337) + pmaddwd xmm6, [rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337) + + movdqa xmm7, xmm6 ; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337) + + movdqa xmm6, xmm0 + punpcklwd xmm0, xmm2 + punpckhwd xmm6, xmm2 + pmaddwd xmm0, [rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337) + pmaddwd xmm6, [rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337) + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337) + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337) + + movdqa xmm0, xmm5 ; xmm0=BO + movdqa xmm6, xmm4 ; xmm6=BE + + movdqa xmm4, xmm0 + punpcklwd xmm0, xmm3 + punpckhwd xmm4, xmm3 + pmaddwd xmm0, [rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250) + pmaddwd xmm4, [rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250) + + movdqa xmm3, [rel PD_ONEHALF] ; xmm3=[PD_ONEHALF] + + paddd xmm0, xmm1 + paddd xmm4, xmm7 + paddd xmm0, xmm3 + paddd xmm4, xmm3 + psrld xmm0, SCALEBITS ; xmm0=YOL + psrld xmm4, SCALEBITS ; xmm4=YOH + packssdw xmm0, xmm4 ; xmm0=YO + + movdqa xmm4, xmm6 + punpcklwd xmm6, xmm2 + punpckhwd xmm4, xmm2 + pmaddwd xmm6, [rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250) + pmaddwd xmm4, [rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250) + + movdqa xmm2, [rel PD_ONEHALF] ; xmm2=[PD_ONEHALF] + + paddd xmm6, XMMWORD [wk(0)] + paddd xmm4, XMMWORD [wk(1)] + paddd xmm6, xmm2 + paddd xmm4, xmm2 + psrld xmm6, SCALEBITS ; xmm6=YEL + psrld xmm4, SCALEBITS ; xmm4=YEH + packssdw xmm6, xmm4 ; xmm6=YE + + psllw xmm0, BYTE_BIT + por xmm6, xmm0 ; xmm6=Y + movdqa XMMWORD [rdi], xmm6 ; Save Y + + sub rcx, byte SIZEOF_XMMWORD + add rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr + add rdi, byte SIZEOF_XMMWORD ; outptr0 + cmp rcx, byte SIZEOF_XMMWORD + jae near .columnloop + test rcx, rcx + jnz near .column_ld1 + + pop rcx ; col + pop rsi + pop rdi + + add rsi, byte SIZEOF_JSAMPROW ; input_buf + add rdi, byte SIZEOF_JSAMPROW + dec rax ; num_rows + jg near .rowloop + +.return: + pop rbx + uncollect_args 5 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jchuff-sse2.asm b/simd/x86_64/jchuff-sse2.asm new file mode 100644 index 0000000..1b091ad --- /dev/null +++ b/simd/x86_64/jchuff-sse2.asm @@ -0,0 +1,348 @@ +; +; jchuff-sse2.asm - Huffman entropy encoding (64-bit SSE2) +; +; Copyright (C) 2009-2011, 2014-2016, D. R. Commander. +; Copyright (C) 2015, Matthieu Darbois. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains an SSE2 implementation for Huffman coding of one block. +; The following code is based directly on jchuff.c; see jchuff.c for more +; details. +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_huff_encode_one_block) + +EXTN(jconst_huff_encode_one_block): + +%include "jpeg_nbits_table.inc" + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +; These macros perform the same task as the emit_bits() function in the +; original libjpeg code. In addition to reducing overhead by explicitly +; inlining the code, additional performance is achieved by taking into +; account the size of the bit buffer and waiting until it is almost full +; before emptying it. This mostly benefits 64-bit platforms, since 6 +; bytes can be stored in a 64-bit bit buffer before it has to be emptied. + +%macro EMIT_BYTE 0 + sub put_bits, 8 ; put_bits -= 8; + mov rdx, put_buffer + mov ecx, put_bits + shr rdx, cl ; c = (JOCTET)GETJOCTET(put_buffer >> put_bits); + mov byte [buffer], dl ; *buffer++ = c; + add buffer, 1 + cmp dl, 0xFF ; need to stuff a zero byte? + jne %%.EMIT_BYTE_END + mov byte [buffer], 0 ; *buffer++ = 0; + add buffer, 1 +%%.EMIT_BYTE_END: +%endmacro + +%macro PUT_BITS 1 + add put_bits, ecx ; put_bits += size; + shl put_buffer, cl ; put_buffer = (put_buffer << size); + or put_buffer, %1 +%endmacro + +%macro CHECKBUF31 0 + cmp put_bits, 32 ; if (put_bits > 31) { + jl %%.CHECKBUF31_END + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE +%%.CHECKBUF31_END: +%endmacro + +%macro CHECKBUF47 0 + cmp put_bits, 48 ; if (put_bits > 47) { + jl %%.CHECKBUF47_END + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE + EMIT_BYTE +%%.CHECKBUF47_END: +%endmacro + +%macro EMIT_BITS 2 + CHECKBUF47 + mov ecx, %2 + PUT_BITS %1 +%endmacro + +%macro kloop_prepare 37 ;(ko, jno0, ..., jno31, xmm0, xmm1, xmm2, xmm3) + pxor xmm8, xmm8 ; __m128i neg = _mm_setzero_si128(); + pxor xmm9, xmm9 ; __m128i neg = _mm_setzero_si128(); + pxor xmm10, xmm10 ; __m128i neg = _mm_setzero_si128(); + pxor xmm11, xmm11 ; __m128i neg = _mm_setzero_si128(); + pinsrw %34, word [r12 + %2 * SIZEOF_WORD], 0 ; xmm_shadow[0] = block[jno0]; + pinsrw %35, word [r12 + %10 * SIZEOF_WORD], 0 ; xmm_shadow[8] = block[jno8]; + pinsrw %36, word [r12 + %18 * SIZEOF_WORD], 0 ; xmm_shadow[16] = block[jno16]; + pinsrw %37, word [r12 + %26 * SIZEOF_WORD], 0 ; xmm_shadow[24] = block[jno24]; + pinsrw %34, word [r12 + %3 * SIZEOF_WORD], 1 ; xmm_shadow[1] = block[jno1]; + pinsrw %35, word [r12 + %11 * SIZEOF_WORD], 1 ; xmm_shadow[9] = block[jno9]; + pinsrw %36, word [r12 + %19 * SIZEOF_WORD], 1 ; xmm_shadow[17] = block[jno17]; + pinsrw %37, word [r12 + %27 * SIZEOF_WORD], 1 ; xmm_shadow[25] = block[jno25]; + pinsrw %34, word [r12 + %4 * SIZEOF_WORD], 2 ; xmm_shadow[2] = block[jno2]; + pinsrw %35, word [r12 + %12 * SIZEOF_WORD], 2 ; xmm_shadow[10] = block[jno10]; + pinsrw %36, word [r12 + %20 * SIZEOF_WORD], 2 ; xmm_shadow[18] = block[jno18]; + pinsrw %37, word [r12 + %28 * SIZEOF_WORD], 2 ; xmm_shadow[26] = block[jno26]; + pinsrw %34, word [r12 + %5 * SIZEOF_WORD], 3 ; xmm_shadow[3] = block[jno3]; + pinsrw %35, word [r12 + %13 * SIZEOF_WORD], 3 ; xmm_shadow[11] = block[jno11]; + pinsrw %36, word [r12 + %21 * SIZEOF_WORD], 3 ; xmm_shadow[19] = block[jno19]; + pinsrw %37, word [r12 + %29 * SIZEOF_WORD], 3 ; xmm_shadow[27] = block[jno27]; + pinsrw %34, word [r12 + %6 * SIZEOF_WORD], 4 ; xmm_shadow[4] = block[jno4]; + pinsrw %35, word [r12 + %14 * SIZEOF_WORD], 4 ; xmm_shadow[12] = block[jno12]; + pinsrw %36, word [r12 + %22 * SIZEOF_WORD], 4 ; xmm_shadow[20] = block[jno20]; + pinsrw %37, word [r12 + %30 * SIZEOF_WORD], 4 ; xmm_shadow[28] = block[jno28]; + pinsrw %34, word [r12 + %7 * SIZEOF_WORD], 5 ; xmm_shadow[5] = block[jno5]; + pinsrw %35, word [r12 + %15 * SIZEOF_WORD], 5 ; xmm_shadow[13] = block[jno13]; + pinsrw %36, word [r12 + %23 * SIZEOF_WORD], 5 ; xmm_shadow[21] = block[jno21]; + pinsrw %37, word [r12 + %31 * SIZEOF_WORD], 5 ; xmm_shadow[29] = block[jno29]; + pinsrw %34, word [r12 + %8 * SIZEOF_WORD], 6 ; xmm_shadow[6] = block[jno6]; + pinsrw %35, word [r12 + %16 * SIZEOF_WORD], 6 ; xmm_shadow[14] = block[jno14]; + pinsrw %36, word [r12 + %24 * SIZEOF_WORD], 6 ; xmm_shadow[22] = block[jno22]; + pinsrw %37, word [r12 + %32 * SIZEOF_WORD], 6 ; xmm_shadow[30] = block[jno30]; + pinsrw %34, word [r12 + %9 * SIZEOF_WORD], 7 ; xmm_shadow[7] = block[jno7]; + pinsrw %35, word [r12 + %17 * SIZEOF_WORD], 7 ; xmm_shadow[15] = block[jno15]; + pinsrw %36, word [r12 + %25 * SIZEOF_WORD], 7 ; xmm_shadow[23] = block[jno23]; +%if %1 != 32 + pinsrw %37, word [r12 + %33 * SIZEOF_WORD], 7 ; xmm_shadow[31] = block[jno31]; +%else + pinsrw %37, ebx, 7 ; xmm_shadow[31] = block[jno31]; +%endif + pcmpgtw xmm8, %34 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm9, %35 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm10, %36 ; neg = _mm_cmpgt_epi16(neg, x1); + pcmpgtw xmm11, %37 ; neg = _mm_cmpgt_epi16(neg, x1); + paddw %34, xmm8 ; x1 = _mm_add_epi16(x1, neg); + paddw %35, xmm9 ; x1 = _mm_add_epi16(x1, neg); + paddw %36, xmm10 ; x1 = _mm_add_epi16(x1, neg); + paddw %37, xmm11 ; x1 = _mm_add_epi16(x1, neg); + pxor %34, xmm8 ; x1 = _mm_xor_si128(x1, neg); + pxor %35, xmm9 ; x1 = _mm_xor_si128(x1, neg); + pxor %36, xmm10 ; x1 = _mm_xor_si128(x1, neg); + pxor %37, xmm11 ; x1 = _mm_xor_si128(x1, neg); + pxor xmm8, %34 ; neg = _mm_xor_si128(neg, x1); + pxor xmm9, %35 ; neg = _mm_xor_si128(neg, x1); + pxor xmm10, %36 ; neg = _mm_xor_si128(neg, x1); + pxor xmm11, %37 ; neg = _mm_xor_si128(neg, x1); + movdqa XMMWORD [t1 + %1 * SIZEOF_WORD], %34 ; _mm_storeu_si128((__m128i *)(t1 + ko), x1); + movdqa XMMWORD [t1 + (%1 + 8) * SIZEOF_WORD], %35 ; _mm_storeu_si128((__m128i *)(t1 + ko + 8), x1); + movdqa XMMWORD [t1 + (%1 + 16) * SIZEOF_WORD], %36 ; _mm_storeu_si128((__m128i *)(t1 + ko + 16), x1); + movdqa XMMWORD [t1 + (%1 + 24) * SIZEOF_WORD], %37 ; _mm_storeu_si128((__m128i *)(t1 + ko + 24), x1); + movdqa XMMWORD [t2 + %1 * SIZEOF_WORD], xmm8 ; _mm_storeu_si128((__m128i *)(t2 + ko), neg); + movdqa XMMWORD [t2 + (%1 + 8) * SIZEOF_WORD], xmm9 ; _mm_storeu_si128((__m128i *)(t2 + ko + 8), neg); + movdqa XMMWORD [t2 + (%1 + 16) * SIZEOF_WORD], xmm10 ; _mm_storeu_si128((__m128i *)(t2 + ko + 16), neg); + movdqa XMMWORD [t2 + (%1 + 24) * SIZEOF_WORD], xmm11 ; _mm_storeu_si128((__m128i *)(t2 + ko + 24), neg); +%endmacro + +; +; Encode a single block's worth of coefficients. +; +; GLOBAL(JOCTET *) +; jsimd_huff_encode_one_block_sse2(working_state *state, JOCTET *buffer, +; JCOEFPTR block, int last_dc_val, +; c_derived_tbl *dctbl, c_derived_tbl *actbl) +; + +; r10 = working_state *state +; r11 = JOCTET *buffer +; r12 = JCOEFPTR block +; r13d = int last_dc_val +; r14 = c_derived_tbl *dctbl +; r15 = c_derived_tbl *actbl + +%define t1 rbp - (DCTSIZE2 * SIZEOF_WORD) +%define t2 t1 - (DCTSIZE2 * SIZEOF_WORD) +%define put_buffer r8 +%define put_bits r9d +%define buffer rax + + align 32 + GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2) + +EXTN(jsimd_huff_encode_one_block_sse2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [t2] + push_xmm 4 + collect_args 6 + push rbx + + mov buffer, r11 ; r11 is now sratch + + mov put_buffer, MMWORD [r10+16] ; put_buffer = state->cur.put_buffer; + mov put_bits, DWORD [r10+24] ; put_bits = state->cur.put_bits; + push r10 ; r10 is now scratch + + ; Encode the DC coefficient difference per section F.1.2.1 + movsx edi, word [r12] ; temp = temp2 = block[0] - last_dc_val; + sub edi, r13d ; r13 is not used anymore + mov ebx, edi + + ; This is a well-known technique for obtaining the absolute value + ; without a branch. It is derived from an assembly language technique + ; presented in "How to Optimize for the Pentium Processors", + ; Copyright (c) 1996, 1997 by Agner Fog. + mov esi, edi + sar esi, 31 ; temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); + xor edi, esi ; temp ^= temp3; + sub edi, esi ; temp -= temp3; + + ; For a negative input, want temp2 = bitwise complement of abs(input) + ; This code assumes we are on a two's complement machine + add ebx, esi ; temp2 += temp3; + + ; Find the number of bits needed for the magnitude of the coefficient + lea r11, [rel jpeg_nbits_table] + movzx rdi, byte [r11 + rdi] ; nbits = JPEG_NBITS(temp); + ; Emit the Huffman-coded symbol for the number of bits + mov r11d, INT [r14 + rdi * 4] ; code = dctbl->ehufco[nbits]; + movzx esi, byte [r14 + rdi + 1024] ; size = dctbl->ehufsi[nbits]; + EMIT_BITS r11, esi ; EMIT_BITS(code, size) + + ; Mask off any extra bits in code + mov esi, 1 + mov ecx, edi + shl esi, cl + dec esi + and ebx, esi ; temp2 &= (((JLONG)1)<ehufco[0xf0]; + movzx r14d, byte [r15 + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0]; + lea rsi, [t1] +.BLOOP: + bsf r12, r11 ; r = __builtin_ctzl(index); + jz .ELOOP + mov rcx, r12 + lea rsi, [rsi+r12*2] ; k += r; + shr r11, cl ; index >>= r; + movzx rdi, word [rsi] ; temp = t1[k]; + lea rbx, [rel jpeg_nbits_table] + movzx rdi, byte [rbx + rdi] ; nbits = JPEG_NBITS(temp); +.BRLOOP: + cmp r12, 16 ; while (r > 15) { + jl .ERLOOP + EMIT_BITS r13, r14d ; EMIT_BITS(code_0xf0, size_0xf0) + sub r12, 16 ; r -= 16; + jmp .BRLOOP +.ERLOOP: + ; Emit Huffman symbol for run length / number of bits + CHECKBUF31 ; uses rcx, rdx + + shl r12, 4 ; temp3 = (r << 4) + nbits; + add r12, rdi + mov ebx, INT [r15 + r12 * 4] ; code = actbl->ehufco[temp3]; + movzx ecx, byte [r15 + r12 + 1024] ; size = actbl->ehufsi[temp3]; + PUT_BITS rbx + + ;EMIT_CODE(code, size) + + movsx ebx, word [rsi-DCTSIZE2*2] ; temp2 = t2[k]; + ; Mask off any extra bits in code + mov rcx, rdi + mov rdx, 1 + shl rdx, cl + dec rdx + and rbx, rdx ; temp2 &= (((JLONG)1)<>= 1; + add rsi, 2 ; ++k; + jmp .BLOOP +.ELOOP: + ; If the last coef(s) were zero, emit an end-of-block code + lea rdi, [t1 + (DCTSIZE2-1) * 2] ; r = DCTSIZE2-1-k; + cmp rdi, rsi ; if (r > 0) { + je .EFN + mov ebx, INT [r15] ; code = actbl->ehufco[0]; + movzx r12d, byte [r15 + 1024] ; size = actbl->ehufsi[0]; + EMIT_BITS rbx, r12d +.EFN: + pop r10 + ; Save put_buffer & put_bits + mov MMWORD [r10+16], put_buffer ; state->cur.put_buffer = put_buffer; + mov DWORD [r10+24], put_bits ; state->cur.put_bits = put_bits; + + pop rbx + uncollect_args 6 + pop_xmm 4 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jcphuff-sse2.asm b/simd/x86_64/jcphuff-sse2.asm new file mode 100644 index 0000000..b17488a --- /dev/null +++ b/simd/x86_64/jcphuff-sse2.asm @@ -0,0 +1,637 @@ +; +; jcphuff-sse2.asm - prepare data for progressive Huffman encoding +; (64-bit SSE2) +; +; Copyright (C) 2016, 2018, Matthieu Darbois +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains an SSE2 implementation of data preparation for progressive +; Huffman encoding. See jcphuff.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +; -------------------------------------------------------------------------- +; Macros to load data for jsimd_encode_mcu_AC_first_prepare_sse2() and +; jsimd_encode_mcu_AC_refine_prepare_sse2() + +%macro LOAD16 0 + pxor N0, N0 + pxor N1, N1 + + mov T0d, INT [LUT + 0*SIZEOF_INT] + mov T1d, INT [LUT + 8*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 0 + pinsrw X1, word [BLOCK + T1 * 2], 0 + + mov T0d, INT [LUT + 1*SIZEOF_INT] + mov T1d, INT [LUT + 9*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 1 + pinsrw X1, word [BLOCK + T1 * 2], 1 + + mov T0d, INT [LUT + 2*SIZEOF_INT] + mov T1d, INT [LUT + 10*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 2 + pinsrw X1, word [BLOCK + T1 * 2], 2 + + mov T0d, INT [LUT + 3*SIZEOF_INT] + mov T1d, INT [LUT + 11*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 3 + pinsrw X1, word [BLOCK + T1 * 2], 3 + + mov T0d, INT [LUT + 4*SIZEOF_INT] + mov T1d, INT [LUT + 12*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 4 + pinsrw X1, word [BLOCK + T1 * 2], 4 + + mov T0d, INT [LUT + 5*SIZEOF_INT] + mov T1d, INT [LUT + 13*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 5 + pinsrw X1, word [BLOCK + T1 * 2], 5 + + mov T0d, INT [LUT + 6*SIZEOF_INT] + mov T1d, INT [LUT + 14*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 6 + pinsrw X1, word [BLOCK + T1 * 2], 6 + + mov T0d, INT [LUT + 7*SIZEOF_INT] + mov T1d, INT [LUT + 15*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 7 + pinsrw X1, word [BLOCK + T1 * 2], 7 +%endmacro + +%macro LOAD15 0 + pxor N0, N0 + pxor N1, N1 + pxor X1, X1 + + mov T0d, INT [LUT + 0*SIZEOF_INT] + mov T1d, INT [LUT + 8*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 0 + pinsrw X1, word [BLOCK + T1 * 2], 0 + + mov T0d, INT [LUT + 1*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 1 + + mov T0d, INT [LUT + 2*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 2 + + mov T0d, INT [LUT + 3*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 3 + + mov T0d, INT [LUT + 4*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 4 + + mov T0d, INT [LUT + 5*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 5 + + mov T0d, INT [LUT + 6*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 6 + + mov T0d, INT [LUT + 7*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 7 + + cmp LENEND, 2 + jl %%.ELOAD15 + mov T1d, INT [LUT + 9*SIZEOF_INT] + pinsrw X1, word [BLOCK + T1 * 2], 1 + + cmp LENEND, 3 + jl %%.ELOAD15 + mov T1d, INT [LUT + 10*SIZEOF_INT] + pinsrw X1, word [BLOCK + T1 * 2], 2 + + cmp LENEND, 4 + jl %%.ELOAD15 + mov T1d, INT [LUT + 11*SIZEOF_INT] + pinsrw X1, word [BLOCK + T1 * 2], 3 + + cmp LENEND, 5 + jl %%.ELOAD15 + mov T1d, INT [LUT + 12*SIZEOF_INT] + pinsrw X1, word [BLOCK + T1 * 2], 4 + + cmp LENEND, 6 + jl %%.ELOAD15 + mov T1d, INT [LUT + 13*SIZEOF_INT] + pinsrw X1, word [BLOCK + T1 * 2], 5 + + cmp LENEND, 7 + jl %%.ELOAD15 + mov T1d, INT [LUT + 14*SIZEOF_INT] + pinsrw X1, word [BLOCK + T1 * 2], 6 +%%.ELOAD15: +%endmacro + +%macro LOAD8 0 + pxor N0, N0 + + mov T0d, INT [LUT + 0*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 0 + + mov T0d, INT [LUT + 1*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 1 + + mov T0d, INT [LUT + 2*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 2 + + mov T0d, INT [LUT + 3*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 3 + + mov T0d, INT [LUT + 4*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 4 + + mov T0d, INT [LUT + 5*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 5 + + mov T0d, INT [LUT + 6*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 6 + + mov T0d, INT [LUT + 7*SIZEOF_INT] + pinsrw X0, word [BLOCK + T0 * 2], 7 +%endmacro + +%macro LOAD7 0 + pxor N0, N0 + pxor X0, X0 + + mov T1d, INT [LUT + 0*SIZEOF_INT] + pinsrw X0, word [BLOCK + T1 * 2], 0 + + cmp LENEND, 2 + jl %%.ELOAD7 + mov T1d, INT [LUT + 1*SIZEOF_INT] + pinsrw X0, word [BLOCK + T1 * 2], 1 + + cmp LENEND, 3 + jl %%.ELOAD7 + mov T1d, INT [LUT + 2*SIZEOF_INT] + pinsrw X0, word [BLOCK + T1 * 2], 2 + + cmp LENEND, 4 + jl %%.ELOAD7 + mov T1d, INT [LUT + 3*SIZEOF_INT] + pinsrw X0, word [BLOCK + T1 * 2], 3 + + cmp LENEND, 5 + jl %%.ELOAD7 + mov T1d, INT [LUT + 4*SIZEOF_INT] + pinsrw X0, word [BLOCK + T1 * 2], 4 + + cmp LENEND, 6 + jl %%.ELOAD7 + mov T1d, INT [LUT + 5*SIZEOF_INT] + pinsrw X0, word [BLOCK + T1 * 2], 5 + + cmp LENEND, 7 + jl %%.ELOAD7 + mov T1d, INT [LUT + 6*SIZEOF_INT] + pinsrw X0, word [BLOCK + T1 * 2], 6 +%%.ELOAD7: +%endmacro + +%macro REDUCE0 0 + movdqa xmm0, XMMWORD [VALUES + ( 0*2)] + movdqa xmm1, XMMWORD [VALUES + ( 8*2)] + movdqa xmm2, XMMWORD [VALUES + (16*2)] + movdqa xmm3, XMMWORD [VALUES + (24*2)] + movdqa xmm4, XMMWORD [VALUES + (32*2)] + movdqa xmm5, XMMWORD [VALUES + (40*2)] + movdqa xmm6, XMMWORD [VALUES + (48*2)] + movdqa xmm7, XMMWORD [VALUES + (56*2)] + + pcmpeqw xmm0, ZERO + pcmpeqw xmm1, ZERO + pcmpeqw xmm2, ZERO + pcmpeqw xmm3, ZERO + pcmpeqw xmm4, ZERO + pcmpeqw xmm5, ZERO + pcmpeqw xmm6, ZERO + pcmpeqw xmm7, ZERO + + packsswb xmm0, xmm1 + packsswb xmm2, xmm3 + packsswb xmm4, xmm5 + packsswb xmm6, xmm7 + + pmovmskb eax, xmm0 + pmovmskb ecx, xmm2 + pmovmskb edx, xmm4 + pmovmskb esi, xmm6 + + shl rcx, 16 + shl rdx, 32 + shl rsi, 48 + + or rax, rcx + or rdx, rsi + or rax, rdx + + not rax + + mov MMWORD [r15], rax +%endmacro + +; +; Prepare data for jsimd_encode_mcu_AC_first(). +; +; GLOBAL(void) +; jsimd_encode_mcu_AC_first_prepare_sse2(const JCOEF *block, +; const int *jpeg_natural_order_start, +; int Sl, int Al, JCOEF *values, +; size_t *zerobits) +; +; r10 = const JCOEF *block +; r11 = const int *jpeg_natural_order_start +; r12 = int Sl +; r13 = int Al +; r14 = JCOEF *values +; r15 = size_t *zerobits + +%define ZERO xmm9 +%define X0 xmm0 +%define X1 xmm1 +%define N0 xmm2 +%define N1 xmm3 +%define AL xmm4 +%define K eax +%define LUT r11 +%define T0 rcx +%define T0d ecx +%define T1 rdx +%define T1d edx +%define BLOCK r10 +%define VALUES r14 +%define LEN r12d +%define LENEND r13d + + align 32 + GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2) + +EXTN(jsimd_encode_mcu_AC_first_prepare_sse2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [rbp - 16] + collect_args 6 + + movdqa XMMWORD [rbp - 16], ZERO + + movd AL, r13d + pxor ZERO, ZERO + mov K, LEN + mov LENEND, LEN + and K, -16 + and LENEND, 7 + shr K, 4 + jz .ELOOP16 +.BLOOP16: + LOAD16 + pcmpgtw N0, X0 + pcmpgtw N1, X1 + paddw X0, N0 + paddw X1, N1 + pxor X0, N0 + pxor X1, N1 + psrlw X0, AL + psrlw X1, AL + pxor N0, X0 + pxor N1, X1 + movdqa XMMWORD [VALUES + (0) * 2], X0 + movdqa XMMWORD [VALUES + (8) * 2], X1 + movdqa XMMWORD [VALUES + (0 + DCTSIZE2) * 2], N0 + movdqa XMMWORD [VALUES + (8 + DCTSIZE2) * 2], N1 + add VALUES, 16*2 + add LUT, 16*SIZEOF_INT + dec K + jnz .BLOOP16 +.ELOOP16: + test LEN, 8 + jz .TRY7 + test LEN, 7 + jz .TRY8 + + LOAD15 + pcmpgtw N0, X0 + pcmpgtw N1, X1 + paddw X0, N0 + paddw X1, N1 + pxor X0, N0 + pxor X1, N1 + psrlw X0, AL + psrlw X1, AL + pxor N0, X0 + pxor N1, X1 + movdqa XMMWORD [VALUES + (0) * 2], X0 + movdqa XMMWORD [VALUES + (8) * 2], X1 + movdqa XMMWORD [VALUES + (0 + DCTSIZE2) * 2], N0 + movdqa XMMWORD [VALUES + (8 + DCTSIZE2) * 2], N1 + add VALUES, 16*2 + jmp .PADDING +.TRY8: + LOAD8 + pcmpgtw N0, X0 + paddw X0, N0 + pxor X0, N0 + psrlw X0, AL + pxor N0, X0 + movdqa XMMWORD [VALUES + (0) * 2], X0 + movdqa XMMWORD [VALUES + (0 + DCTSIZE2) * 2], N0 + add VALUES, 8*2 + jmp .PADDING +.TRY7: + LOAD7 + pcmpgtw N0, X0 + paddw X0, N0 + pxor X0, N0 + psrlw X0, AL + pxor N0, X0 + movdqa XMMWORD [VALUES + (0) * 2], X0 + movdqa XMMWORD [VALUES + (0 + DCTSIZE2) * 2], N0 + add VALUES, 8*2 +.PADDING: + mov K, LEN + add K, 7 + and K, -8 + shr K, 3 + sub K, DCTSIZE2/8 + jz .EPADDING + align 16 +.ZEROLOOP: + movdqa XMMWORD [VALUES + 0], ZERO + add VALUES, 8*2 + inc K + jnz .ZEROLOOP +.EPADDING: + sub VALUES, DCTSIZE2*2 + + REDUCE0 + + movdqa ZERO, XMMWORD [rbp - 16] + uncollect_args 6 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +%undef ZERO +%undef X0 +%undef X1 +%undef N0 +%undef N1 +%undef AL +%undef K +%undef LUT +%undef T0 +%undef T0d +%undef T1 +%undef T1d +%undef BLOCK +%undef VALUES +%undef LEN +%undef LENEND + +; +; Prepare data for jsimd_encode_mcu_AC_refine(). +; +; GLOBAL(int) +; jsimd_encode_mcu_AC_refine_prepare_sse2(const JCOEF *block, +; const int *jpeg_natural_order_start, +; int Sl, int Al, JCOEF *absvalues, +; size_t *bits) +; +; r10 = const JCOEF *block +; r11 = const int *jpeg_natural_order_start +; r12 = int Sl +; r13 = int Al +; r14 = JCOEF *values +; r15 = size_t *bits + +%define ZERO xmm9 +%define ONE xmm5 +%define X0 xmm0 +%define X1 xmm1 +%define N0 xmm2 +%define N1 xmm3 +%define AL xmm4 +%define K eax +%define KK r9d +%define EOB r8d +%define SIGN rdi +%define LUT r11 +%define T0 rcx +%define T0d ecx +%define T1 rdx +%define T1d edx +%define BLOCK r10 +%define VALUES r14 +%define LEN r12d +%define LENEND r13d + + align 32 + GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2) + +EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [rbp - 16] + collect_args 6 + + movdqa XMMWORD [rbp - 16], ZERO + + xor SIGN, SIGN + xor EOB, EOB + xor KK, KK + movd AL, r13d + pxor ZERO, ZERO + pcmpeqw ONE, ONE + psrlw ONE, 15 + mov K, LEN + mov LENEND, LEN + and K, -16 + and LENEND, 7 + shr K, 4 + jz .ELOOPR16 +.BLOOPR16: + LOAD16 + pcmpgtw N0, X0 + pcmpgtw N1, X1 + paddw X0, N0 + paddw X1, N1 + pxor X0, N0 + pxor X1, N1 + psrlw X0, AL + psrlw X1, AL + movdqa XMMWORD [VALUES + (0) * 2], X0 + movdqa XMMWORD [VALUES + (8) * 2], X1 + pcmpeqw X0, ONE + pcmpeqw X1, ONE + packsswb N0, N1 + packsswb X0, X1 + pmovmskb T0d, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg); + pmovmskb T1d, X0 ; idx = _mm_movemask_epi8(x1); + shr SIGN, 16 ; make room for sizebits + shl T0, 48 + or SIGN, T0 + bsr T1d, T1d ; idx = 16 - (__builtin_clz(idx)>>1); + jz .CONTINUER16 ; if (idx) { + mov EOB, KK + add EOB, T1d ; EOB = k + idx; +.CONTINUER16: + add VALUES, 16*2 + add LUT, 16*SIZEOF_INT + add KK, 16 + dec K + jnz .BLOOPR16 +.ELOOPR16: + test LEN, 8 + jz .TRYR7 + test LEN, 7 + jz .TRYR8 + + LOAD15 + pcmpgtw N0, X0 + pcmpgtw N1, X1 + paddw X0, N0 + paddw X1, N1 + pxor X0, N0 + pxor X1, N1 + psrlw X0, AL + psrlw X1, AL + movdqa XMMWORD [VALUES + (0) * 2], X0 + movdqa XMMWORD [VALUES + (8) * 2], X1 + pcmpeqw X0, ONE + pcmpeqw X1, ONE + packsswb N0, N1 + packsswb X0, X1 + pmovmskb T0d, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg); + pmovmskb T1d, X0 ; idx = _mm_movemask_epi8(x1); + shr SIGN, 16 ; make room for sizebits + shl T0, 48 + or SIGN, T0 + bsr T1d, T1d ; idx = 16 - (__builtin_clz(idx)>>1); + jz .CONTINUER15 ; if (idx) { + mov EOB, KK + add EOB, T1d ; EOB = k + idx; +.CONTINUER15: + add VALUES, 16*2 + jmp .PADDINGR +.TRYR8: + LOAD8 + + pcmpgtw N0, X0 + paddw X0, N0 + pxor X0, N0 + psrlw X0, AL + movdqa XMMWORD [VALUES + (0) * 2], X0 + pcmpeqw X0, ONE + packsswb N0, ZERO + packsswb X0, ZERO + pmovmskb T0d, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg); + pmovmskb T1d, X0 ; idx = _mm_movemask_epi8(x1); + shr SIGN, 8 ; make room for sizebits + shl T0, 56 + or SIGN, T0 + bsr T1d, T1d ; idx = 16 - (__builtin_clz(idx)>>1); + jz .CONTINUER8 ; if (idx) { + mov EOB, KK + add EOB, T1d ; EOB = k + idx; +.CONTINUER8: + add VALUES, 8*2 + jmp .PADDINGR +.TRYR7: + LOAD7 + + pcmpgtw N0, X0 + paddw X0, N0 + pxor X0, N0 + psrlw X0, AL + movdqa XMMWORD [VALUES + (0) * 2], X0 + pcmpeqw X0, ONE + packsswb N0, ZERO + packsswb X0, ZERO + pmovmskb T0d, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg); + pmovmskb T1d, X0 ; idx = _mm_movemask_epi8(x1); + shr SIGN, 8 ; make room for sizebits + shl T0, 56 + or SIGN, T0 + bsr T1d, T1d ; idx = 16 - (__builtin_clz(idx)>>1); + jz .CONTINUER7 ; if (idx) { + mov EOB, KK + add EOB, T1d ; EOB = k + idx; +.CONTINUER7: + add VALUES, 8*2 +.PADDINGR: + mov K, LEN + add K, 7 + and K, -8 + shr K, 3 + sub K, DCTSIZE2/8 + jz .EPADDINGR + align 16 +.ZEROLOOPR: + movdqa XMMWORD [VALUES + 0], ZERO + shr SIGN, 8 + add VALUES, 8*2 + inc K + jnz .ZEROLOOPR +.EPADDINGR: + not SIGN + sub VALUES, DCTSIZE2*2 + mov MMWORD [r15+SIZEOF_MMWORD], SIGN + + REDUCE0 + + mov eax, EOB + movdqa ZERO, XMMWORD [rbp - 16] + uncollect_args 6 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +%undef ZERO +%undef ONE +%undef X0 +%undef X1 +%undef N0 +%undef N1 +%undef AL +%undef K +%undef KK +%undef EOB +%undef SIGN +%undef LUT +%undef T0 +%undef T0d +%undef T1 +%undef T1d +%undef BLOCK +%undef VALUES +%undef LEN +%undef LENEND + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jcsample-avx2.asm b/simd/x86_64/jcsample-avx2.asm new file mode 100644 index 0000000..9d5a861 --- /dev/null +++ b/simd/x86_64/jcsample-avx2.asm @@ -0,0 +1,368 @@ +; +; jcsample.asm - downsampling (64-bit AVX2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Downsample pixel values of a single component. +; This version handles the common case of 2:1 horizontal and 1:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v1_downsample_avx2(JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, +; JDIMENSION width_in_blocks, JSAMPARRAY input_data, +; JSAMPARRAY output_data); +; + +; r10d = JDIMENSION image_width +; r11 = int max_v_samp_factor +; r12d = JDIMENSION v_samp_factor +; r13d = JDIMENSION width_in_blocks +; r14 = JSAMPARRAY input_data +; r15 = JSAMPARRAY output_data + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2) + +EXTN(jsimd_h2v1_downsample_avx2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 6 + + mov ecx, r13d + shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols) + jz near .return + + mov edx, r10d + + ; -- expand_right_edge + + push rcx + shl rcx, 1 ; output_cols * 2 + sub rcx, rdx + jle short .expand_end + + mov rax, r11 + test rax, rax + jle short .expand_end + + cld + mov rsi, r14 ; input_data +.expandloop: + push rax + push rcx + + mov rdi, JSAMPROW [rsi] + add rdi, rdx + mov al, JSAMPLE [rdi-1] + + rep stosb + + pop rcx + pop rax + + add rsi, byte SIZEOF_JSAMPROW + dec rax + jg short .expandloop + +.expand_end: + pop rcx ; output_cols + + ; -- h2v1_downsample + + mov eax, r12d ; rowctr + test eax, eax + jle near .return + + mov rdx, 0x00010000 ; bias pattern + vmovd xmm7, edx + vpshufd xmm7, xmm7, 0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1} + vperm2i128 ymm7, ymm7, ymm7, 0 ; ymm7={xmm7, xmm7} + vpcmpeqw ymm6, ymm6, ymm6 + vpsrlw ymm6, ymm6, BYTE_BIT ; ymm6={0xFF 0x00 0xFF 0x00 ..} + + mov rsi, r14 ; input_data + mov rdi, r15 ; output_data +.rowloop: + push rcx + push rdi + push rsi + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr + + cmp rcx, byte SIZEOF_YMMWORD + jae short .columnloop + +.columnloop_r24: + ; rcx can possibly be 8, 16, 24 + cmp rcx, 24 + jne .columnloop_r16 + vmovdqu ymm0, YMMWORD [rsi+0*SIZEOF_YMMWORD] + vmovdqu xmm1, XMMWORD [rsi+1*SIZEOF_YMMWORD] + mov rcx, SIZEOF_YMMWORD + jmp short .downsample + +.columnloop_r16: + cmp rcx, 16 + jne .columnloop_r8 + vmovdqu ymm0, YMMWORD [rsi+0*SIZEOF_YMMWORD] + vpxor ymm1, ymm1, ymm1 + mov rcx, SIZEOF_YMMWORD + jmp short .downsample + +.columnloop_r8: + vmovdqu xmm0, XMMWORD[rsi+0*SIZEOF_YMMWORD] + vpxor ymm1, ymm1, ymm1 + mov rcx, SIZEOF_YMMWORD + jmp short .downsample + +.columnloop: + vmovdqu ymm0, YMMWORD [rsi+0*SIZEOF_YMMWORD] + vmovdqu ymm1, YMMWORD [rsi+1*SIZEOF_YMMWORD] + +.downsample: + vpsrlw ymm2, ymm0, BYTE_BIT + vpand ymm0, ymm0, ymm6 + vpsrlw ymm3, ymm1, BYTE_BIT + vpand ymm1, ymm1, ymm6 + + vpaddw ymm0, ymm0, ymm2 + vpaddw ymm1, ymm1, ymm3 + vpaddw ymm0, ymm0, ymm7 + vpaddw ymm1, ymm1, ymm7 + vpsrlw ymm0, ymm0, 1 + vpsrlw ymm1, ymm1, 1 + + vpackuswb ymm0, ymm0, ymm1 + vpermq ymm0, ymm0, 0xd8 + + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymm0 + + sub rcx, byte SIZEOF_YMMWORD ; outcol + add rsi, byte 2*SIZEOF_YMMWORD ; inptr + add rdi, byte 1*SIZEOF_YMMWORD ; outptr + cmp rcx, byte SIZEOF_YMMWORD + jae short .columnloop + test rcx, rcx + jnz near .columnloop_r24 + + pop rsi + pop rdi + pop rcx + + add rsi, byte SIZEOF_JSAMPROW ; input_data + add rdi, byte SIZEOF_JSAMPROW ; output_data + dec rax ; rowctr + jg near .rowloop + +.return: + vzeroupper + uncollect_args 6 + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Downsample pixel values of a single component. +; This version handles the standard case of 2:1 horizontal and 2:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v2_downsample_avx2(JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, +; JDIMENSION width_in_blocks, JSAMPARRAY input_data, +; JSAMPARRAY output_data); +; + +; r10d = JDIMENSION image_width +; r11 = int max_v_samp_factor +; r12d = JDIMENSION v_samp_factor +; r13d = JDIMENSION width_in_blocks +; r14 = JSAMPARRAY input_data +; r15 = JSAMPARRAY output_data + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2) + +EXTN(jsimd_h2v2_downsample_avx2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 6 + + mov ecx, r13d + shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols) + jz near .return + + mov edx, r10d + + ; -- expand_right_edge + + push rcx + shl rcx, 1 ; output_cols * 2 + sub rcx, rdx + jle short .expand_end + + mov rax, r11 + test rax, rax + jle short .expand_end + + cld + mov rsi, r14 ; input_data +.expandloop: + push rax + push rcx + + mov rdi, JSAMPROW [rsi] + add rdi, rdx + mov al, JSAMPLE [rdi-1] + + rep stosb + + pop rcx + pop rax + + add rsi, byte SIZEOF_JSAMPROW + dec rax + jg short .expandloop + +.expand_end: + pop rcx ; output_cols + + ; -- h2v2_downsample + + mov eax, r12d ; rowctr + test rax, rax + jle near .return + + mov rdx, 0x00020001 ; bias pattern + vmovd xmm7, edx + vpcmpeqw ymm6, ymm6, ymm6 + vpshufd xmm7, xmm7, 0x00 ; ymm7={1, 2, 1, 2, 1, 2, 1, 2} + vperm2i128 ymm7, ymm7, ymm7, 0 + vpsrlw ymm6, ymm6, BYTE_BIT ; ymm6={0xFF 0x00 0xFF 0x00 ..} + + mov rsi, r14 ; input_data + mov rdi, r15 ; output_data +.rowloop: + push rcx + push rdi + push rsi + + mov rdx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0 + mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1 + mov rdi, JSAMPROW [rdi] ; outptr + + cmp rcx, byte SIZEOF_YMMWORD + jae short .columnloop + +.columnloop_r24: + cmp rcx, 24 + jne .columnloop_r16 + vmovdqu ymm0, YMMWORD [rdx+0*SIZEOF_YMMWORD] + vmovdqu ymm1, YMMWORD [rsi+0*SIZEOF_YMMWORD] + vmovdqu xmm2, XMMWORD [rdx+1*SIZEOF_YMMWORD] + vmovdqu xmm3, XMMWORD [rsi+1*SIZEOF_YMMWORD] + mov rcx, SIZEOF_YMMWORD + jmp short .downsample + +.columnloop_r16: + cmp rcx, 16 + jne .columnloop_r8 + vmovdqu ymm0, YMMWORD [rdx+0*SIZEOF_YMMWORD] + vmovdqu ymm1, YMMWORD [rsi+0*SIZEOF_YMMWORD] + vpxor ymm2, ymm2, ymm2 + vpxor ymm3, ymm3, ymm3 + mov rcx, SIZEOF_YMMWORD + jmp short .downsample + +.columnloop_r8: + vmovdqu xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD] + vmovdqu xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD] + vpxor ymm2, ymm2, ymm2 + vpxor ymm3, ymm3, ymm3 + mov rcx, SIZEOF_YMMWORD + jmp short .downsample + +.columnloop: + vmovdqu ymm0, YMMWORD [rdx+0*SIZEOF_YMMWORD] + vmovdqu ymm1, YMMWORD [rsi+0*SIZEOF_YMMWORD] + vmovdqu ymm2, YMMWORD [rdx+1*SIZEOF_YMMWORD] + vmovdqu ymm3, YMMWORD [rsi+1*SIZEOF_YMMWORD] + +.downsample: + vpand ymm4, ymm0, ymm6 + vpsrlw ymm0, ymm0, BYTE_BIT + vpand ymm5, ymm1, ymm6 + vpsrlw ymm1, ymm1, BYTE_BIT + vpaddw ymm0, ymm0, ymm4 + vpaddw ymm1, ymm1, ymm5 + + vpand ymm4, ymm2, ymm6 + vpsrlw ymm2, ymm2, BYTE_BIT + vpand ymm5, ymm3, ymm6 + vpsrlw ymm3, ymm3, BYTE_BIT + vpaddw ymm2, ymm2, ymm4 + vpaddw ymm3, ymm3, ymm5 + + vpaddw ymm0, ymm0, ymm1 + vpaddw ymm2, ymm2, ymm3 + vpaddw ymm0, ymm0, ymm7 + vpaddw ymm2, ymm2, ymm7 + vpsrlw ymm0, ymm0, 2 + vpsrlw ymm2, ymm2, 2 + + vpackuswb ymm0, ymm0, ymm2 + vpermq ymm0, ymm0, 0xd8 + + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymm0 + + sub rcx, byte SIZEOF_YMMWORD ; outcol + add rdx, byte 2*SIZEOF_YMMWORD ; inptr0 + add rsi, byte 2*SIZEOF_YMMWORD ; inptr1 + add rdi, byte 1*SIZEOF_YMMWORD ; outptr + cmp rcx, byte SIZEOF_YMMWORD + jae near .columnloop + test rcx, rcx + jnz near .columnloop_r24 + + pop rsi + pop rdi + pop rcx + + add rsi, byte 2*SIZEOF_JSAMPROW ; input_data + add rdi, byte 1*SIZEOF_JSAMPROW ; output_data + dec rax ; rowctr + jg near .rowloop + +.return: + vzeroupper + uncollect_args 6 + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jcsample-sse2.asm b/simd/x86_64/jcsample-sse2.asm new file mode 100644 index 0000000..1b31536 --- /dev/null +++ b/simd/x86_64/jcsample-sse2.asm @@ -0,0 +1,331 @@ +; +; jcsample.asm - downsampling (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Downsample pixel values of a single component. +; This version handles the common case of 2:1 horizontal and 1:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v1_downsample_sse2(JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, +; JDIMENSION width_in_blocks, JSAMPARRAY input_data, +; JSAMPARRAY output_data); +; + +; r10d = JDIMENSION image_width +; r11 = int max_v_samp_factor +; r12d = JDIMENSION v_samp_factor +; r13d = JDIMENSION width_in_blocks +; r14 = JSAMPARRAY input_data +; r15 = JSAMPARRAY output_data + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2) + +EXTN(jsimd_h2v1_downsample_sse2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 6 + + mov ecx, r13d + shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols) + jz near .return + + mov edx, r10d + + ; -- expand_right_edge + + push rcx + shl rcx, 1 ; output_cols * 2 + sub rcx, rdx + jle short .expand_end + + mov rax, r11 + test rax, rax + jle short .expand_end + + cld + mov rsi, r14 ; input_data +.expandloop: + push rax + push rcx + + mov rdi, JSAMPROW [rsi] + add rdi, rdx + mov al, JSAMPLE [rdi-1] + + rep stosb + + pop rcx + pop rax + + add rsi, byte SIZEOF_JSAMPROW + dec rax + jg short .expandloop + +.expand_end: + pop rcx ; output_cols + + ; -- h2v1_downsample + + mov eax, r12d ; rowctr + test eax, eax + jle near .return + + mov rdx, 0x00010000 ; bias pattern + movd xmm7, edx + pcmpeqw xmm6, xmm6 + pshufd xmm7, xmm7, 0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1} + psrlw xmm6, BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + + mov rsi, r14 ; input_data + mov rdi, r15 ; output_data +.rowloop: + push rcx + push rdi + push rsi + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr + + cmp rcx, byte SIZEOF_XMMWORD + jae short .columnloop + +.columnloop_r8: + movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] + pxor xmm1, xmm1 + mov rcx, SIZEOF_XMMWORD + jmp short .downsample + +.columnloop: + movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [rsi+1*SIZEOF_XMMWORD] + +.downsample: + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + + pand xmm0, xmm6 + psrlw xmm2, BYTE_BIT + pand xmm1, xmm6 + psrlw xmm3, BYTE_BIT + + paddw xmm0, xmm2 + paddw xmm1, xmm3 + paddw xmm0, xmm7 + paddw xmm1, xmm7 + psrlw xmm0, 1 + psrlw xmm1, 1 + + packuswb xmm0, xmm1 + + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 + + sub rcx, byte SIZEOF_XMMWORD ; outcol + add rsi, byte 2*SIZEOF_XMMWORD ; inptr + add rdi, byte 1*SIZEOF_XMMWORD ; outptr + cmp rcx, byte SIZEOF_XMMWORD + jae short .columnloop + test rcx, rcx + jnz short .columnloop_r8 + + pop rsi + pop rdi + pop rcx + + add rsi, byte SIZEOF_JSAMPROW ; input_data + add rdi, byte SIZEOF_JSAMPROW ; output_data + dec rax ; rowctr + jg near .rowloop + +.return: + uncollect_args 6 + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Downsample pixel values of a single component. +; This version handles the standard case of 2:1 horizontal and 2:1 vertical, +; without smoothing. +; +; GLOBAL(void) +; jsimd_h2v2_downsample_sse2(JDIMENSION image_width, int max_v_samp_factor, +; JDIMENSION v_samp_factor, +; JDIMENSION width_in_blocks, JSAMPARRAY input_data, +; JSAMPARRAY output_data); +; + +; r10d = JDIMENSION image_width +; r11 = int max_v_samp_factor +; r12d = JDIMENSION v_samp_factor +; r13d = JDIMENSION width_in_blocks +; r14 = JSAMPARRAY input_data +; r15 = JSAMPARRAY output_data + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2) + +EXTN(jsimd_h2v2_downsample_sse2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 6 + + mov ecx, r13d + shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols) + jz near .return + + mov edx, r10d + + ; -- expand_right_edge + + push rcx + shl rcx, 1 ; output_cols * 2 + sub rcx, rdx + jle short .expand_end + + mov rax, r11 + test rax, rax + jle short .expand_end + + cld + mov rsi, r14 ; input_data +.expandloop: + push rax + push rcx + + mov rdi, JSAMPROW [rsi] + add rdi, rdx + mov al, JSAMPLE [rdi-1] + + rep stosb + + pop rcx + pop rax + + add rsi, byte SIZEOF_JSAMPROW + dec rax + jg short .expandloop + +.expand_end: + pop rcx ; output_cols + + ; -- h2v2_downsample + + mov eax, r12d ; rowctr + test rax, rax + jle near .return + + mov rdx, 0x00020001 ; bias pattern + movd xmm7, edx + pcmpeqw xmm6, xmm6 + pshufd xmm7, xmm7, 0x00 ; xmm7={1, 2, 1, 2, 1, 2, 1, 2} + psrlw xmm6, BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + + mov rsi, r14 ; input_data + mov rdi, r15 ; output_data +.rowloop: + push rcx + push rdi + push rsi + + mov rdx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0 + mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1 + mov rdi, JSAMPROW [rdi] ; outptr + + cmp rcx, byte SIZEOF_XMMWORD + jae short .columnloop + +.columnloop_r8: + movdqa xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD] + pxor xmm2, xmm2 + pxor xmm3, xmm3 + mov rcx, SIZEOF_XMMWORD + jmp short .downsample + +.columnloop: + movdqa xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD] + movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqa xmm2, XMMWORD [rdx+1*SIZEOF_XMMWORD] + movdqa xmm3, XMMWORD [rsi+1*SIZEOF_XMMWORD] + +.downsample: + movdqa xmm4, xmm0 + movdqa xmm5, xmm1 + pand xmm0, xmm6 + psrlw xmm4, BYTE_BIT + pand xmm1, xmm6 + psrlw xmm5, BYTE_BIT + paddw xmm0, xmm4 + paddw xmm1, xmm5 + + movdqa xmm4, xmm2 + movdqa xmm5, xmm3 + pand xmm2, xmm6 + psrlw xmm4, BYTE_BIT + pand xmm3, xmm6 + psrlw xmm5, BYTE_BIT + paddw xmm2, xmm4 + paddw xmm3, xmm5 + + paddw xmm0, xmm1 + paddw xmm2, xmm3 + paddw xmm0, xmm7 + paddw xmm2, xmm7 + psrlw xmm0, 2 + psrlw xmm2, 2 + + packuswb xmm0, xmm2 + + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 + + sub rcx, byte SIZEOF_XMMWORD ; outcol + add rdx, byte 2*SIZEOF_XMMWORD ; inptr0 + add rsi, byte 2*SIZEOF_XMMWORD ; inptr1 + add rdi, byte 1*SIZEOF_XMMWORD ; outptr + cmp rcx, byte SIZEOF_XMMWORD + jae near .columnloop + test rcx, rcx + jnz near .columnloop_r8 + + pop rsi + pop rdi + pop rcx + + add rsi, byte 2*SIZEOF_JSAMPROW ; input_data + add rdi, byte 1*SIZEOF_JSAMPROW ; output_data + dec rax ; rowctr + jg near .rowloop + +.return: + uncollect_args 6 + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jdcolext-avx2.asm b/simd/x86_64/jdcolext-avx2.asm new file mode 100644 index 0000000..e2b96c7 --- /dev/null +++ b/simd/x86_64/jdcolext-avx2.asm @@ -0,0 +1,497 @@ +; +; jdcolext.asm - colorspace conversion (64-bit AVX2) +; +; Copyright 2009, 2012 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2012, 2016, D. R. Commander. +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_ycc_rgb_convert_avx2(JDIMENSION out_width, JSAMPIMAGE input_buf, +; JDIMENSION input_row, JSAMPARRAY output_buf, +; int num_rows) +; + +; r10d = JDIMENSION out_width +; r11 = JSAMPIMAGE input_buf +; r12d = JDIMENSION input_row +; r13 = JSAMPARRAY output_buf +; r14d = int num_rows + +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM] +%define WK_NUM 2 + + align 32 + GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2) + +EXTN(jsimd_ycc_rgb_convert_avx2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args 5 + push rbx + + mov ecx, r10d ; num_cols + test rcx, rcx + jz near .return + + push rcx + + mov rdi, r11 + mov ecx, r12d + mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] + mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] + mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] + lea rsi, [rsi+rcx*SIZEOF_JSAMPROW] + lea rbx, [rbx+rcx*SIZEOF_JSAMPROW] + lea rdx, [rdx+rcx*SIZEOF_JSAMPROW] + + pop rcx + + mov rdi, r13 + mov eax, r14d + test rax, rax + jle near .return +.rowloop: + push rax + push rdi + push rdx + push rbx + push rsi + push rcx ; col + + mov rsi, JSAMPROW [rsi] ; inptr0 + mov rbx, JSAMPROW [rbx] ; inptr1 + mov rdx, JSAMPROW [rdx] ; inptr2 + mov rdi, JSAMPROW [rdi] ; outptr +.columnloop: + + vmovdqu ymm5, YMMWORD [rbx] ; ymm5=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV) + vmovdqu ymm1, YMMWORD [rdx] ; ymm1=Cr(0123456789ABCDEFGHIJKLMNOPQRSTUV) + + vpcmpeqw ymm0, ymm0, ymm0 + vpcmpeqw ymm7, ymm7, ymm7 + vpsrlw ymm0, ymm0, BYTE_BIT ; ymm0={0xFF 0x00 0xFF 0x00 ..} + vpsllw ymm7, ymm7, 7 ; ymm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + vpand ymm4, ymm0, ymm5 ; ymm4=Cb(02468ACEGIKMOQSU)=CbE + vpsrlw ymm5, ymm5, BYTE_BIT ; ymm5=Cb(13579BDFHJLNPRTV)=CbO + vpand ymm0, ymm0, ymm1 ; ymm0=Cr(02468ACEGIKMOQSU)=CrE + vpsrlw ymm1, ymm1, BYTE_BIT ; ymm1=Cr(13579BDFHJLNPRTV)=CrO + + vpaddw ymm2, ymm4, ymm7 + vpaddw ymm3, ymm5, ymm7 + vpaddw ymm6, ymm0, ymm7 + vpaddw ymm7, ymm1, ymm7 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + vpaddw ymm4, ymm2, ymm2 ; ymm4=2*CbE + vpaddw ymm5, ymm3, ymm3 ; ymm5=2*CbO + vpaddw ymm0, ymm6, ymm6 ; ymm0=2*CrE + vpaddw ymm1, ymm7, ymm7 ; ymm1=2*CrO + + vpmulhw ymm4, ymm4, [rel PW_MF0228] ; ymm4=(2*CbE * -FIX(0.22800)) + vpmulhw ymm5, ymm5, [rel PW_MF0228] ; ymm5=(2*CbO * -FIX(0.22800)) + vpmulhw ymm0, ymm0, [rel PW_F0402] ; ymm0=(2*CrE * FIX(0.40200)) + vpmulhw ymm1, ymm1, [rel PW_F0402] ; ymm1=(2*CrO * FIX(0.40200)) + + vpaddw ymm4, ymm4, [rel PW_ONE] + vpaddw ymm5, ymm5, [rel PW_ONE] + vpsraw ymm4, ymm4, 1 ; ymm4=(CbE * -FIX(0.22800)) + vpsraw ymm5, ymm5, 1 ; ymm5=(CbO * -FIX(0.22800)) + vpaddw ymm0, ymm0, [rel PW_ONE] + vpaddw ymm1, ymm1, [rel PW_ONE] + vpsraw ymm0, ymm0, 1 ; ymm0=(CrE * FIX(0.40200)) + vpsraw ymm1, ymm1, 1 ; ymm1=(CrO * FIX(0.40200)) + + vpaddw ymm4, ymm4, ymm2 + vpaddw ymm5, ymm5, ymm3 + vpaddw ymm4, ymm4, ymm2 ; ymm4=(CbE * FIX(1.77200))=(B-Y)E + vpaddw ymm5, ymm5, ymm3 ; ymm5=(CbO * FIX(1.77200))=(B-Y)O + vpaddw ymm0, ymm0, ymm6 ; ymm0=(CrE * FIX(1.40200))=(R-Y)E + vpaddw ymm1, ymm1, ymm7 ; ymm1=(CrO * FIX(1.40200))=(R-Y)O + + vmovdqa YMMWORD [wk(0)], ymm4 ; wk(0)=(B-Y)E + vmovdqa YMMWORD [wk(1)], ymm5 ; wk(1)=(B-Y)O + + vpunpckhwd ymm4, ymm2, ymm6 + vpunpcklwd ymm2, ymm2, ymm6 + vpmaddwd ymm2, ymm2, [rel PW_MF0344_F0285] + vpmaddwd ymm4, ymm4, [rel PW_MF0344_F0285] + vpunpckhwd ymm5, ymm3, ymm7 + vpunpcklwd ymm3, ymm3, ymm7 + vpmaddwd ymm3, ymm3, [rel PW_MF0344_F0285] + vpmaddwd ymm5, ymm5, [rel PW_MF0344_F0285] + + vpaddd ymm2, ymm2, [rel PD_ONEHALF] + vpaddd ymm4, ymm4, [rel PD_ONEHALF] + vpsrad ymm2, ymm2, SCALEBITS + vpsrad ymm4, ymm4, SCALEBITS + vpaddd ymm3, ymm3, [rel PD_ONEHALF] + vpaddd ymm5, ymm5, [rel PD_ONEHALF] + vpsrad ymm3, ymm3, SCALEBITS + vpsrad ymm5, ymm5, SCALEBITS + + vpackssdw ymm2, ymm2, ymm4 ; ymm2=CbE*-FIX(0.344)+CrE*FIX(0.285) + vpackssdw ymm3, ymm3, ymm5 ; ymm3=CbO*-FIX(0.344)+CrO*FIX(0.285) + vpsubw ymm2, ymm2, ymm6 ; ymm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E + vpsubw ymm3, ymm3, ymm7 ; ymm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O + + vmovdqu ymm5, YMMWORD [rsi] ; ymm5=Y(0123456789ABCDEFGHIJKLMNOPQRSTUV) + + vpcmpeqw ymm4, ymm4, ymm4 + vpsrlw ymm4, ymm4, BYTE_BIT ; ymm4={0xFF 0x00 0xFF 0x00 ..} + vpand ymm4, ymm4, ymm5 ; ymm4=Y(02468ACEGIKMOQSU)=YE + vpsrlw ymm5, ymm5, BYTE_BIT ; ymm5=Y(13579BDFHJLNPRTV)=YO + + vpaddw ymm0, ymm0, ymm4 ; ymm0=((R-Y)E+YE)=RE=R(02468ACEGIKMOQSU) + vpaddw ymm1, ymm1, ymm5 ; ymm1=((R-Y)O+YO)=RO=R(13579BDFHJLNPRTV) + vpackuswb ymm0, ymm0, ymm0 ; ymm0=R(02468ACE********GIKMOQSU********) + vpackuswb ymm1, ymm1, ymm1 ; ymm1=R(13579BDF********HJLNPRTV********) + + vpaddw ymm2, ymm2, ymm4 ; ymm2=((G-Y)E+YE)=GE=G(02468ACEGIKMOQSU) + vpaddw ymm3, ymm3, ymm5 ; ymm3=((G-Y)O+YO)=GO=G(13579BDFHJLNPRTV) + vpackuswb ymm2, ymm2, ymm2 ; ymm2=G(02468ACE********GIKMOQSU********) + vpackuswb ymm3, ymm3, ymm3 ; ymm3=G(13579BDF********HJLNPRTV********) + + vpaddw ymm4, ymm4, YMMWORD [wk(0)] ; ymm4=(YE+(B-Y)E)=BE=B(02468ACEGIKMOQSU) + vpaddw ymm5, ymm5, YMMWORD [wk(1)] ; ymm5=(YO+(B-Y)O)=BO=B(13579BDFHJLNPRTV) + vpackuswb ymm4, ymm4, ymm4 ; ymm4=B(02468ACE********GIKMOQSU********) + vpackuswb ymm5, ymm5, ymm5 ; ymm5=B(13579BDF********HJLNPRTV********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; ymmA=(00 02 04 06 08 0A 0C 0E ** 0G 0I 0K 0M 0O 0Q 0S 0U **) + ; ymmB=(01 03 05 07 09 0B 0D 0F ** 0H 0J 0L 0N 0P 0R 0T 0V **) + ; ymmC=(10 12 14 16 18 1A 1C 1E ** 1G 1I 1K 1M 1O 1Q 1S 1U **) + ; ymmD=(11 13 15 17 19 1B 1D 1F ** 1H 1J 1L 1N 1P 1R 1T 1V **) + ; ymmE=(20 22 24 26 28 2A 2C 2E ** 2G 2I 2K 2M 2O 2Q 2S 2U **) + ; ymmF=(21 23 25 27 29 2B 2D 2F ** 2H 2J 2L 2N 2P 2R 2T 2V **) + ; ymmG=(** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **) + ; ymmH=(** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **) + + vpunpcklbw ymmA, ymmA, ymmC ; ymmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E + ; 0G 1G 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U) + vpunpcklbw ymmE, ymmE, ymmB ; ymmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F + ; 2G 0H 2I 0J 2K 0L 2M 0N 2O 0P 2Q 0R 2S 0T 2U 0V) + vpunpcklbw ymmD, ymmD, ymmF ; ymmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F + ; 1H 2H 1J 2J 1L 2L 1N 2N 1P 2P 1R 2R 1T 2T 1V 2V) + + vpsrldq ymmH, ymmA, 2 ; ymmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E 0G 1G + ; 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U -- --) + vpunpckhwd ymmG, ymmA, ymmE ; ymmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F + ; 0O 1O 2O 0P 0Q 1Q 2Q 0R 0S 1S 2S 0T 0U 1U 2U 0V) + vpunpcklwd ymmA, ymmA, ymmE ; ymmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07 + ; 0G 1G 2G 0H 0I 1I 2I 0J 0K 1K 2K 0L 0M 1M 2M 0N) + + vpsrldq ymmE, ymmE, 2 ; ymmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F 2G 0H + ; 2I 0J 2K 0L 2M 0N 2O 0P 2Q 0R 2S 0T 2U 0V -- --) + + vpsrldq ymmB, ymmD, 2 ; ymmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F 1H 2H + ; 1J 2J 1L 2L 1N 2N 1P 2P 1R 2R 1T 2T 1V 2V -- --) + vpunpckhwd ymmC, ymmD, ymmH ; ymmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F 0G 1G + ; 1P 2P 0Q 1Q 1R 2R 0S 1S 1T 2T 0U 1U 1V 2V -- --) + vpunpcklwd ymmD, ymmD, ymmH ; ymmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18 + ; 1H 2H 0I 1I 1J 2J 0K 1K 1L 2L 0M 1M 1N 2N 0O 1O) + + vpunpckhwd ymmF, ymmE, ymmB ; ymmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F 2G 0H 1H 2H + ; 2Q 0R 1R 2R 2S 0T 1T 2T 2U 0V 1V 2V -- -- -- --) + vpunpcklwd ymmE, ymmE, ymmB ; ymmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29 + ; 2I 0J 1J 2J 2K 0L 1L 2L 2M 0N 1N 2N 2O 0P 1P 2P) + + vpshufd ymmH, ymmA, 0x4E ; ymmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03 + ; 0K 1K 2K 0L 0M 1M 2M 0N 0G 1G 2G 0H 0I 1I 2I 0J) + vpunpckldq ymmA, ymmA, ymmD ; ymmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14 + ; 0G 1G 2G 0H 1H 2H 0I 1I 0I 1I 2I 0J 1J 2J 0K 1K) + vpunpckhdq ymmD, ymmD, ymmE ; ymmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29 + ; 1L 2L 0M 1M 2M 0N 1N 2N 1N 2N 0O 1O 2O 0P 1P 2P) + vpunpckldq ymmE, ymmE, ymmH ; ymmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07 + ; 2I 0J 1J 2J 0K 1K 2K 0L 2K 0L 1L 2L 0M 1M 2M 0N) + + vpshufd ymmH, ymmG, 0x4E ; ymmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B + ; 0S 1S 2S 0T 0U 1U 2U 0V 0O 1O 2O 0P 0Q 1Q 2Q 0R) + vpunpckldq ymmG, ymmG, ymmC ; ymmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C + ; 0O 1O 2O 0P 1P 2P 0Q 1Q 0Q 1Q 2Q 0R 1R 2R 0S 1S) + vpunpckhdq ymmC, ymmC, ymmF ; ymmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F 0G 1G 2G 0H 1H 2H + ; 1T 2T 0U 1U 2U 0V 1V 2V 1V 2V -- -- -- -- -- --) + vpunpckldq ymmF, ymmF, ymmH ; ymmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F + ; 2Q 0R 1R 2R 0S 1S 2S 0T 2S 0T 1T 2T 0U 1U 2U 0V) + + vpunpcklqdq ymmH, ymmA, ymmE ; ymmH=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05 + ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L) + vpunpcklqdq ymmG, ymmD, ymmG ; ymmG=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A + ; 1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q) + vpunpcklqdq ymmC, ymmF, ymmC ; ymmC=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F + ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V) + + vperm2i128 ymmA, ymmH, ymmG, 0x20 ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05 + ; 15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + vperm2i128 ymmD, ymmC, ymmH, 0x30 ; ymmD=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F + ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L) + vperm2i128 ymmF, ymmG, ymmC, 0x31 ; ymmF=(1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q + ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V) + + cmp rcx, byte SIZEOF_YMMWORD + jb short .column_st64 + + test rdi, SIZEOF_YMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + vmovntdq YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA + vmovntdq YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD + vmovntdq YMMWORD [rdi+2*SIZEOF_YMMWORD], ymmF + jmp short .out0 +.out1: ; --(unaligned)----------------- + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA + vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD + vmovdqu YMMWORD [rdi+2*SIZEOF_YMMWORD], ymmF +.out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_YMMWORD ; outptr + sub rcx, byte SIZEOF_YMMWORD + jz near .nextrow + + add rsi, byte SIZEOF_YMMWORD ; inptr0 + add rbx, byte SIZEOF_YMMWORD ; inptr1 + add rdx, byte SIZEOF_YMMWORD ; inptr2 + jmp near .columnloop + +.column_st64: + lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE + cmp rcx, byte 2*SIZEOF_YMMWORD + jb short .column_st32 + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA + vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD + add rdi, byte 2*SIZEOF_YMMWORD ; outptr + vmovdqa ymmA, ymmF + sub rcx, byte 2*SIZEOF_YMMWORD + jmp short .column_st31 +.column_st32: + cmp rcx, byte SIZEOF_YMMWORD + jb short .column_st31 + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA + add rdi, byte SIZEOF_YMMWORD ; outptr + vmovdqa ymmA, ymmD + sub rcx, byte SIZEOF_YMMWORD + jmp short .column_st31 +.column_st31: + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st15 + vmovdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + add rdi, byte SIZEOF_XMMWORD ; outptr + vperm2i128 ymmA, ymmA, ymmA, 1 + sub rcx, byte SIZEOF_XMMWORD +.column_st15: + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_MMWORD + jb short .column_st7 + vmovq XMM_MMWORD [rdi], xmmA + add rdi, byte SIZEOF_MMWORD + sub rcx, byte SIZEOF_MMWORD + vpsrldq xmmA, xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_DWORD + jb short .column_st3 + vmovd XMM_DWORD [rdi], xmmA + add rdi, byte SIZEOF_DWORD + sub rcx, byte SIZEOF_DWORD + vpsrldq xmmA, xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of rax to the output when it has enough + ; space. + vmovd eax, xmmA + cmp rcx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [rdi], ax + add rdi, byte SIZEOF_WORD + sub rcx, byte SIZEOF_WORD + shr rax, 16 +.column_st1: + ; Store the lower 1 byte of rax to the output when it has enough + ; space. + test rcx, rcx + jz short .nextrow + mov BYTE [rdi], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + vpcmpeqb ymm6, ymm6, ymm6 ; ymm6=XE=X(02468ACE********GIKMOQSU********) + vpcmpeqb ymm7, ymm7, ymm7 ; ymm7=XO=X(13579BDF********HJLNPRTV********) +%else + vpxor ymm6, ymm6, ymm6 ; ymm6=XE=X(02468ACE********GIKMOQSU********) + vpxor ymm7, ymm7, ymm7 ; ymm7=XO=X(13579BDF********HJLNPRTV********) +%endif + ; ymmA=(00 02 04 06 08 0A 0C 0E ** 0G 0I 0K 0M 0O 0Q 0S 0U **) + ; ymmB=(01 03 05 07 09 0B 0D 0F ** 0H 0J 0L 0N 0P 0R 0T 0V **) + ; ymmC=(10 12 14 16 18 1A 1C 1E ** 1G 1I 1K 1M 1O 1Q 1S 1U **) + ; ymmD=(11 13 15 17 19 1B 1D 1F ** 1H 1J 1L 1N 1P 1R 1T 1V **) + ; ymmE=(20 22 24 26 28 2A 2C 2E ** 2G 2I 2K 2M 2O 2Q 2S 2U **) + ; ymmF=(21 23 25 27 29 2B 2D 2F ** 2H 2J 2L 2N 2P 2R 2T 2V **) + ; ymmG=(30 32 34 36 38 3A 3C 3E ** 3G 3I 3K 3M 3O 3Q 3S 3U **) + ; ymmH=(31 33 35 37 39 3B 3D 3F ** 3H 3J 3L 3N 3P 3R 3T 3V **) + + vpunpcklbw ymmA, ymmA, ymmC ; ymmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E + ; 0G 1G 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U) + vpunpcklbw ymmE, ymmE, ymmG ; ymmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E + ; 2G 3G 2I 3I 2K 3K 2M 3M 2O 3O 2Q 3Q 2S 3S 2U 3U) + vpunpcklbw ymmB, ymmB, ymmD ; ymmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F + ; 0H 1H 0J 1J 0L 1L 0N 1N 0P 1P 0R 1R 0T 1T 0V 1V) + vpunpcklbw ymmF, ymmF, ymmH ; ymmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F + ; 2H 3H 2J 3J 2L 3L 2N 3N 2P 3P 2R 3R 2T 3T 2V 3V) + + vpunpckhwd ymmC, ymmA, ymmE ; ymmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E + ; 0O 1O 2O 3O 0Q 1Q 2Q 3Q 0S 1S 2S 3S 0U 1U 2U 3U) + vpunpcklwd ymmA, ymmA, ymmE ; ymmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36 + ; 0G 1G 2G 3G 0I 1I 2I 3I 0K 1K 2K 3K 0M 1M 2M 3M) + vpunpckhwd ymmG, ymmB, ymmF ; ymmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F + ; 0P 1P 2P 3P 0R 1R 2R 3R 0T 1T 2T 3T 0V 1V 2V 3V) + vpunpcklwd ymmB, ymmB, ymmF ; ymmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37 + ; 0H 1H 2H 3H 0J 1J 2J 3J 0L 1L 2L 3L 0N 1N 2N 3N) + + vpunpckhdq ymmE, ymmA, ymmB ; ymmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 + ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N) + vpunpckldq ymmB, ymmA, ymmB ; ymmB=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ; 0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J) + vpunpckhdq ymmF, ymmC, ymmG ; ymmF=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F + ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V) + vpunpckldq ymmG, ymmC, ymmG ; ymmG=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + ; 0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R) + + vperm2i128 ymmA, ymmB, ymmE, 0x20 ; ymmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ; 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + vperm2i128 ymmD, ymmG, ymmF, 0x20 ; ymmD=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + ; 0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + vperm2i128 ymmC, ymmB, ymmE, 0x31 ; ymmC=(0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J + ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N) + vperm2i128 ymmH, ymmG, ymmF, 0x31 ; ymmH=(0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R + ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V) + + cmp rcx, byte SIZEOF_YMMWORD + jb short .column_st64 + + test rdi, SIZEOF_YMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + vmovntdq YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA + vmovntdq YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD + vmovntdq YMMWORD [rdi+2*SIZEOF_YMMWORD], ymmC + vmovntdq YMMWORD [rdi+3*SIZEOF_YMMWORD], ymmH + jmp short .out0 +.out1: ; --(unaligned)----------------- + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA + vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD + vmovdqu YMMWORD [rdi+2*SIZEOF_YMMWORD], ymmC + vmovdqu YMMWORD [rdi+3*SIZEOF_YMMWORD], ymmH +.out0: + add rdi, RGB_PIXELSIZE*SIZEOF_YMMWORD ; outptr + sub rcx, byte SIZEOF_YMMWORD + jz near .nextrow + + add rsi, byte SIZEOF_YMMWORD ; inptr0 + add rbx, byte SIZEOF_YMMWORD ; inptr1 + add rdx, byte SIZEOF_YMMWORD ; inptr2 + jmp near .columnloop + +.column_st64: + cmp rcx, byte SIZEOF_YMMWORD/2 + jb short .column_st32 + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA + vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD + add rdi, byte 2*SIZEOF_YMMWORD ; outptr + vmovdqa ymmA, ymmC + vmovdqa ymmD, ymmH + sub rcx, byte SIZEOF_YMMWORD/2 +.column_st32: + cmp rcx, byte SIZEOF_YMMWORD/4 + jb short .column_st16 + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA + add rdi, byte SIZEOF_YMMWORD ; outptr + vmovdqa ymmA, ymmD + sub rcx, byte SIZEOF_YMMWORD/4 +.column_st16: + cmp rcx, byte SIZEOF_YMMWORD/8 + jb short .column_st15 + vmovdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + vperm2i128 ymmA, ymmA, ymmA, 1 + add rdi, byte SIZEOF_XMMWORD ; outptr + sub rcx, byte SIZEOF_YMMWORD/8 +.column_st15: + ; Store two pixels (8 bytes) of ymmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_YMMWORD/16 + jb short .column_st7 + vmovq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_YMMWORD/16*4 + sub rcx, byte SIZEOF_YMMWORD/16 + vpsrldq xmmA, SIZEOF_YMMWORD/16*4 +.column_st7: + ; Store one pixel (4 bytes) of ymmA to the output when it has enough + ; space. + test rcx, rcx + jz short .nextrow + vmovd XMM_DWORD [rdi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.nextrow: + pop rcx + pop rsi + pop rbx + pop rdx + pop rdi + pop rax + + add rsi, byte SIZEOF_JSAMPROW + add rbx, byte SIZEOF_JSAMPROW + add rdx, byte SIZEOF_JSAMPROW + add rdi, byte SIZEOF_JSAMPROW ; output_buf + dec rax ; num_rows + jg near .rowloop + + sfence ; flush the write buffer + +.return: + pop rbx + vzeroupper + uncollect_args 5 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jdcolext-sse2.asm b/simd/x86_64/jdcolext-sse2.asm new file mode 100644 index 0000000..a94954b --- /dev/null +++ b/simd/x86_64/jdcolext-sse2.asm @@ -0,0 +1,440 @@ +; +; jdcolext.asm - colorspace conversion (64-bit SSE2) +; +; Copyright 2009, 2012 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2012, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Convert some rows of samples to the output colorspace. +; +; GLOBAL(void) +; jsimd_ycc_rgb_convert_sse2(JDIMENSION out_width, JSAMPIMAGE input_buf, +; JDIMENSION input_row, JSAMPARRAY output_buf, +; int num_rows) +; + +; r10d = JDIMENSION out_width +; r11 = JSAMPIMAGE input_buf +; r12d = JDIMENSION input_row +; r13 = JSAMPARRAY output_buf +; r14d = int num_rows + +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 32 + GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2) + +EXTN(jsimd_ycc_rgb_convert_sse2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args 5 + push rbx + + mov ecx, r10d ; num_cols + test rcx, rcx + jz near .return + + push rcx + + mov rdi, r11 + mov ecx, r12d + mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] + mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] + mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] + lea rsi, [rsi+rcx*SIZEOF_JSAMPROW] + lea rbx, [rbx+rcx*SIZEOF_JSAMPROW] + lea rdx, [rdx+rcx*SIZEOF_JSAMPROW] + + pop rcx + + mov rdi, r13 + mov eax, r14d + test rax, rax + jle near .return +.rowloop: + push rax + push rdi + push rdx + push rbx + push rsi + push rcx ; col + + mov rsi, JSAMPROW [rsi] ; inptr0 + mov rbx, JSAMPROW [rbx] ; inptr1 + mov rdx, JSAMPROW [rdx] ; inptr2 + mov rdi, JSAMPROW [rdi] ; outptr +.columnloop: + + movdqa xmm5, XMMWORD [rbx] ; xmm5=Cb(0123456789ABCDEF) + movdqa xmm1, XMMWORD [rdx] ; xmm1=Cr(0123456789ABCDEF) + + pcmpeqw xmm4, xmm4 + pcmpeqw xmm7, xmm7 + psrlw xmm4, BYTE_BIT + psllw xmm7, 7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + movdqa xmm0, xmm4 ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..} + + pand xmm4, xmm5 ; xmm4=Cb(02468ACE)=CbE + psrlw xmm5, BYTE_BIT ; xmm5=Cb(13579BDF)=CbO + pand xmm0, xmm1 ; xmm0=Cr(02468ACE)=CrE + psrlw xmm1, BYTE_BIT ; xmm1=Cr(13579BDF)=CrO + + paddw xmm4, xmm7 + paddw xmm5, xmm7 + paddw xmm0, xmm7 + paddw xmm1, xmm7 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movdqa xmm2, xmm4 ; xmm2=CbE + movdqa xmm3, xmm5 ; xmm3=CbO + paddw xmm4, xmm4 ; xmm4=2*CbE + paddw xmm5, xmm5 ; xmm5=2*CbO + movdqa xmm6, xmm0 ; xmm6=CrE + movdqa xmm7, xmm1 ; xmm7=CrO + paddw xmm0, xmm0 ; xmm0=2*CrE + paddw xmm1, xmm1 ; xmm1=2*CrO + + pmulhw xmm4, [rel PW_MF0228] ; xmm4=(2*CbE * -FIX(0.22800)) + pmulhw xmm5, [rel PW_MF0228] ; xmm5=(2*CbO * -FIX(0.22800)) + pmulhw xmm0, [rel PW_F0402] ; xmm0=(2*CrE * FIX(0.40200)) + pmulhw xmm1, [rel PW_F0402] ; xmm1=(2*CrO * FIX(0.40200)) + + paddw xmm4, [rel PW_ONE] + paddw xmm5, [rel PW_ONE] + psraw xmm4, 1 ; xmm4=(CbE * -FIX(0.22800)) + psraw xmm5, 1 ; xmm5=(CbO * -FIX(0.22800)) + paddw xmm0, [rel PW_ONE] + paddw xmm1, [rel PW_ONE] + psraw xmm0, 1 ; xmm0=(CrE * FIX(0.40200)) + psraw xmm1, 1 ; xmm1=(CrO * FIX(0.40200)) + + paddw xmm4, xmm2 + paddw xmm5, xmm3 + paddw xmm4, xmm2 ; xmm4=(CbE * FIX(1.77200))=(B-Y)E + paddw xmm5, xmm3 ; xmm5=(CbO * FIX(1.77200))=(B-Y)O + paddw xmm0, xmm6 ; xmm0=(CrE * FIX(1.40200))=(R-Y)E + paddw xmm1, xmm7 ; xmm1=(CrO * FIX(1.40200))=(R-Y)O + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=(B-Y)E + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(B-Y)O + + movdqa xmm4, xmm2 + movdqa xmm5, xmm3 + punpcklwd xmm2, xmm6 + punpckhwd xmm4, xmm6 + pmaddwd xmm2, [rel PW_MF0344_F0285] + pmaddwd xmm4, [rel PW_MF0344_F0285] + punpcklwd xmm3, xmm7 + punpckhwd xmm5, xmm7 + pmaddwd xmm3, [rel PW_MF0344_F0285] + pmaddwd xmm5, [rel PW_MF0344_F0285] + + paddd xmm2, [rel PD_ONEHALF] + paddd xmm4, [rel PD_ONEHALF] + psrad xmm2, SCALEBITS + psrad xmm4, SCALEBITS + paddd xmm3, [rel PD_ONEHALF] + paddd xmm5, [rel PD_ONEHALF] + psrad xmm3, SCALEBITS + psrad xmm5, SCALEBITS + + packssdw xmm2, xmm4 ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285) + packssdw xmm3, xmm5 ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285) + psubw xmm2, xmm6 ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E + psubw xmm3, xmm7 ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O + + movdqa xmm5, XMMWORD [rsi] ; xmm5=Y(0123456789ABCDEF) + + pcmpeqw xmm4, xmm4 + psrlw xmm4, BYTE_BIT ; xmm4={0xFF 0x00 0xFF 0x00 ..} + pand xmm4, xmm5 ; xmm4=Y(02468ACE)=YE + psrlw xmm5, BYTE_BIT ; xmm5=Y(13579BDF)=YO + + paddw xmm0, xmm4 ; xmm0=((R-Y)E+YE)=RE=R(02468ACE) + paddw xmm1, xmm5 ; xmm1=((R-Y)O+YO)=RO=R(13579BDF) + packuswb xmm0, xmm0 ; xmm0=R(02468ACE********) + packuswb xmm1, xmm1 ; xmm1=R(13579BDF********) + + paddw xmm2, xmm4 ; xmm2=((G-Y)E+YE)=GE=G(02468ACE) + paddw xmm3, xmm5 ; xmm3=((G-Y)O+YO)=GO=G(13579BDF) + packuswb xmm2, xmm2 ; xmm2=G(02468ACE********) + packuswb xmm3, xmm3 ; xmm3=G(13579BDF********) + + paddw xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE) + paddw xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF) + packuswb xmm4, xmm4 ; xmm4=B(02468ACE********) + packuswb xmm5, xmm5 ; xmm5=B(13579BDF********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) + + punpcklbw xmmA, xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE, xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) + punpcklbw xmmD, xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) + + movdqa xmmG, xmmA + movdqa xmmH, xmmA + punpcklwd xmmA, xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) + punpckhwd xmmG, xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) + + psrldq xmmH, 2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) + psrldq xmmE, 2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) + + movdqa xmmC, xmmD + movdqa xmmB, xmmD + punpcklwd xmmD, xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) + punpckhwd xmmC, xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) + + psrldq xmmB, 2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) + + movdqa xmmF, xmmE + punpcklwd xmmE, xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) + punpckhwd xmmF, xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) + + pshufd xmmH, xmmA, 0x4E ; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) + movdqa xmmB, xmmE + punpckldq xmmA, xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) + punpckldq xmmE, xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) + punpckhdq xmmD, xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) + + pshufd xmmH, xmmG, 0x4E ; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) + movdqa xmmB, xmmF + punpckldq xmmG, xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) + punpckldq xmmF, xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) + punpckhdq xmmC, xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) + + punpcklqdq xmmA, xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + punpcklqdq xmmD, xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + punpcklqdq xmmF, xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test rdi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF +.out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub rcx, byte SIZEOF_XMMWORD + jz near .nextrow + + add rsi, byte SIZEOF_XMMWORD ; inptr0 + add rbx, byte SIZEOF_XMMWORD ; inptr1 + add rdx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + +.column_st32: + lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE + cmp rcx, byte 2*SIZEOF_XMMWORD + jb short .column_st16 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + add rdi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA, xmmF + sub rcx, byte 2*SIZEOF_XMMWORD + jmp short .column_st15 +.column_st16: + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st15 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + add rdi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA, xmmD + sub rcx, byte SIZEOF_XMMWORD +.column_st15: + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_MMWORD + jb short .column_st7 + movq XMM_MMWORD [rdi], xmmA + add rdi, byte SIZEOF_MMWORD + sub rcx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_DWORD + jb short .column_st3 + movd XMM_DWORD [rdi], xmmA + add rdi, byte SIZEOF_DWORD + sub rcx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of rax to the output when it has enough + ; space. + movd eax, xmmA + cmp rcx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [rdi], ax + add rdi, byte SIZEOF_WORD + sub rcx, byte SIZEOF_WORD + shr rax, 16 +.column_st1: + ; Store the lower 1 byte of rax to the output when it has enough + ; space. + test rcx, rcx + jz short .nextrow + mov BYTE [rdi], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb xmm6, xmm6 ; xmm6=XE=X(02468ACE********) + pcmpeqb xmm7, xmm7 ; xmm7=XO=X(13579BDF********) +%else + pxor xmm6, xmm6 ; xmm6=XE=X(02468ACE********) + pxor xmm7, xmm7 ; xmm7=XO=X(13579BDF********) +%endif + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) + + punpcklbw xmmA, xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE, xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) + punpcklbw xmmB, xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) + punpcklbw xmmF, xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) + + movdqa xmmC, xmmA + punpcklwd xmmA, xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) + punpckhwd xmmC, xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) + movdqa xmmG, xmmB + punpcklwd xmmB, xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) + punpckhwd xmmG, xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) + + movdqa xmmD, xmmA + punpckldq xmmA, xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + punpckhdq xmmD, xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + movdqa xmmH, xmmC + punpckldq xmmC, xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + punpckhdq xmmH, xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test rdi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC + movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC + movdqu XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH +.out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub rcx, byte SIZEOF_XMMWORD + jz near .nextrow + + add rsi, byte SIZEOF_XMMWORD ; inptr0 + add rbx, byte SIZEOF_XMMWORD ; inptr1 + add rdx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + +.column_st32: + cmp rcx, byte SIZEOF_XMMWORD/2 + jb short .column_st16 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + add rdi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA, xmmC + movdqa xmmD, xmmH + sub rcx, byte SIZEOF_XMMWORD/2 +.column_st16: + cmp rcx, byte SIZEOF_XMMWORD/4 + jb short .column_st15 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + add rdi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA, xmmD + sub rcx, byte SIZEOF_XMMWORD/4 +.column_st15: + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_XMMWORD/8*4 + sub rcx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test rcx, rcx + jz short .nextrow + movd XMM_DWORD [rdi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.nextrow: + pop rcx + pop rsi + pop rbx + pop rdx + pop rdi + pop rax + + add rsi, byte SIZEOF_JSAMPROW + add rbx, byte SIZEOF_JSAMPROW + add rdx, byte SIZEOF_JSAMPROW + add rdi, byte SIZEOF_JSAMPROW ; output_buf + dec rax ; num_rows + jg near .rowloop + + sfence ; flush the write buffer + +.return: + pop rbx + uncollect_args 5 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jdcolor-avx2.asm b/simd/x86_64/jdcolor-avx2.asm new file mode 100644 index 0000000..abad176 --- /dev/null +++ b/simd/x86_64/jdcolor-avx2.asm @@ -0,0 +1,120 @@ +; +; jdcolor.asm - colorspace conversion (64-bit AVX2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_ycc_rgb_convert_avx2) + +EXTN(jconst_ycc_rgb_convert_avx2): + +PW_F0402 times 16 dw F_0_402 +PW_MF0228 times 16 dw -F_0_228 +PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285 +PW_ONE times 16 dw 1 +PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +%include "jdcolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_avx2 jsimd_ycc_extrgb_convert_avx2 +%include "jdcolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_ycc_rgb_convert_avx2 jsimd_ycc_extrgbx_convert_avx2 +%include "jdcolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_avx2 jsimd_ycc_extbgr_convert_avx2 +%include "jdcolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_ycc_rgb_convert_avx2 jsimd_ycc_extbgrx_convert_avx2 +%include "jdcolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_avx2 jsimd_ycc_extxbgr_convert_avx2 +%include "jdcolext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_avx2 jsimd_ycc_extxrgb_convert_avx2 +%include "jdcolext-avx2.asm" diff --git a/simd/x86_64/jdcolor-sse2.asm b/simd/x86_64/jdcolor-sse2.asm new file mode 100644 index 0000000..e7079f6 --- /dev/null +++ b/simd/x86_64/jdcolor-sse2.asm @@ -0,0 +1,119 @@ +; +; jdcolor.asm - colorspace conversion (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_ycc_rgb_convert_sse2) + +EXTN(jconst_ycc_rgb_convert_sse2): + +PW_F0402 times 8 dw F_0_402 +PW_MF0228 times 8 dw -F_0_228 +PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 +PW_ONE times 8 dw 1 +PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2 +%include "jdcolext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2 +%include "jdcolext-sse2.asm" diff --git a/simd/x86_64/jdmerge-avx2.asm b/simd/x86_64/jdmerge-avx2.asm new file mode 100644 index 0000000..ca3f063 --- /dev/null +++ b/simd/x86_64/jdmerge-avx2.asm @@ -0,0 +1,138 @@ +; +; jdmerge.asm - merged upsampling/color conversion (64-bit AVX2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_merged_upsample_avx2) + +EXTN(jconst_merged_upsample_avx2): + +PW_F0402 times 16 dw F_0_402 +PW_MF0228 times 16 dw -F_0_228 +PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285 +PW_ONE times 16 dw 1 +PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +%include "jdmrgext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_avx2 \ + jsimd_h2v1_extrgb_merged_upsample_avx2 +%define jsimd_h2v2_merged_upsample_avx2 \ + jsimd_h2v2_extrgb_merged_upsample_avx2 +%include "jdmrgext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_avx2 \ + jsimd_h2v1_extrgbx_merged_upsample_avx2 +%define jsimd_h2v2_merged_upsample_avx2 \ + jsimd_h2v2_extrgbx_merged_upsample_avx2 +%include "jdmrgext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_avx2 \ + jsimd_h2v1_extbgr_merged_upsample_avx2 +%define jsimd_h2v2_merged_upsample_avx2 \ + jsimd_h2v2_extbgr_merged_upsample_avx2 +%include "jdmrgext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_avx2 \ + jsimd_h2v1_extbgrx_merged_upsample_avx2 +%define jsimd_h2v2_merged_upsample_avx2 \ + jsimd_h2v2_extbgrx_merged_upsample_avx2 +%include "jdmrgext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_avx2 \ + jsimd_h2v1_extxbgr_merged_upsample_avx2 +%define jsimd_h2v2_merged_upsample_avx2 \ + jsimd_h2v2_extxbgr_merged_upsample_avx2 +%include "jdmrgext-avx2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_avx2 \ + jsimd_h2v1_extxrgb_merged_upsample_avx2 +%define jsimd_h2v2_merged_upsample_avx2 \ + jsimd_h2v2_extxrgb_merged_upsample_avx2 +%include "jdmrgext-avx2.asm" diff --git a/simd/x86_64/jdmerge-sse2.asm b/simd/x86_64/jdmerge-sse2.asm new file mode 100644 index 0000000..f3e09fa --- /dev/null +++ b/simd/x86_64/jdmerge-sse2.asm @@ -0,0 +1,137 @@ +; +; jdmerge.asm - merged upsampling/color conversion (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + +%define SCALEBITS 16 + +F_0_344 equ 22554 ; FIX(0.34414) +F_0_714 equ 46802 ; FIX(0.71414) +F_1_402 equ 91881 ; FIX(1.40200) +F_1_772 equ 116130 ; FIX(1.77200) +F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1) +F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414) +F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200) + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_merged_upsample_sse2) + +EXTN(jconst_merged_upsample_sse2): + +PW_F0402 times 8 dw F_0_402 +PW_MF0228 times 8 dw -F_0_228 +PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285 +PW_ONE times 8 dw 1 +PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 + +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGB_RED +%define RGB_GREEN EXT_RGB_GREEN +%define RGB_BLUE EXT_RGB_BLUE +%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 \ + jsimd_h2v1_extrgb_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 \ + jsimd_h2v2_extrgb_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_RGBX_RED +%define RGB_GREEN EXT_RGBX_GREEN +%define RGB_BLUE EXT_RGBX_BLUE +%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 \ + jsimd_h2v1_extrgbx_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 \ + jsimd_h2v2_extrgbx_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGR_RED +%define RGB_GREEN EXT_BGR_GREEN +%define RGB_BLUE EXT_BGR_BLUE +%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 \ + jsimd_h2v1_extbgr_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 \ + jsimd_h2v2_extbgr_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_BGRX_RED +%define RGB_GREEN EXT_BGRX_GREEN +%define RGB_BLUE EXT_BGRX_BLUE +%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 \ + jsimd_h2v1_extbgrx_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 \ + jsimd_h2v2_extbgrx_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XBGR_RED +%define RGB_GREEN EXT_XBGR_GREEN +%define RGB_BLUE EXT_XBGR_BLUE +%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 \ + jsimd_h2v1_extxbgr_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 \ + jsimd_h2v2_extxbgr_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" + +%undef RGB_RED +%undef RGB_GREEN +%undef RGB_BLUE +%undef RGB_PIXELSIZE +%define RGB_RED EXT_XRGB_RED +%define RGB_GREEN EXT_XRGB_GREEN +%define RGB_BLUE EXT_XRGB_BLUE +%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE +%define jsimd_h2v1_merged_upsample_sse2 \ + jsimd_h2v1_extxrgb_merged_upsample_sse2 +%define jsimd_h2v2_merged_upsample_sse2 \ + jsimd_h2v2_extxrgb_merged_upsample_sse2 +%include "jdmrgext-sse2.asm" diff --git a/simd/x86_64/jdmrgext-avx2.asm b/simd/x86_64/jdmrgext-avx2.asm new file mode 100644 index 0000000..04e8a94 --- /dev/null +++ b/simd/x86_64/jdmrgext-avx2.asm @@ -0,0 +1,595 @@ +; +; jdmrgext.asm - merged upsampling/color conversion (64-bit AVX2) +; +; Copyright 2009, 2012 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2012, 2016, D. R. Commander. +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v1_merged_upsample_avx2(JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +; r10d = JDIMENSION output_width +; r11 = JSAMPIMAGE input_buf +; r12d = JDIMENSION in_row_group_ctr +; r13 = JSAMPARRAY output_buf + +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM] +%define WK_NUM 3 + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2) + +EXTN(jsimd_h2v1_merged_upsample_avx2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args 4 + push rbx + + mov ecx, r10d ; col + test rcx, rcx + jz near .return + + push rcx + + mov rdi, r11 + mov ecx, r12d + mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] + mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] + mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] + mov rdi, r13 + mov rsi, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0 + mov rbx, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1 + mov rdx, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2 + mov rdi, JSAMPROW [rdi] ; outptr + + pop rcx ; col + +.columnloop: + + vmovdqu ymm6, YMMWORD [rbx] ; ymm6=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV) + vmovdqu ymm7, YMMWORD [rdx] ; ymm7=Cr(0123456789ABCDEFGHIJKLMNOPQRSTUV) + + vpxor ymm1, ymm1, ymm1 ; ymm1=(all 0's) + vpcmpeqw ymm3, ymm3, ymm3 + vpsllw ymm3, ymm3, 7 ; ymm3={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + vpermq ymm6, ymm6, 0xd8 ; ymm6=Cb(01234567GHIJKLMN89ABCDEFOPQRSTUV) + vpermq ymm7, ymm7, 0xd8 ; ymm7=Cr(01234567GHIJKLMN89ABCDEFOPQRSTUV) + vpunpcklbw ymm4, ymm6, ymm1 ; ymm4=Cb(0123456789ABCDEF)=CbL + vpunpckhbw ymm6, ymm6, ymm1 ; ymm6=Cb(GHIJKLMNOPQRSTUV)=CbH + vpunpcklbw ymm0, ymm7, ymm1 ; ymm0=Cr(0123456789ABCDEF)=CrL + vpunpckhbw ymm7, ymm7, ymm1 ; ymm7=Cr(GHIJKLMNOPQRSTUV)=CrH + + vpaddw ymm5, ymm6, ymm3 + vpaddw ymm2, ymm4, ymm3 + vpaddw ymm1, ymm7, ymm3 + vpaddw ymm3, ymm0, ymm3 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + vpaddw ymm6, ymm5, ymm5 ; ymm6=2*CbH + vpaddw ymm4, ymm2, ymm2 ; ymm4=2*CbL + vpaddw ymm7, ymm1, ymm1 ; ymm7=2*CrH + vpaddw ymm0, ymm3, ymm3 ; ymm0=2*CrL + + vpmulhw ymm6, ymm6, [rel PW_MF0228] ; ymm6=(2*CbH * -FIX(0.22800)) + vpmulhw ymm4, ymm4, [rel PW_MF0228] ; ymm4=(2*CbL * -FIX(0.22800)) + vpmulhw ymm7, ymm7, [rel PW_F0402] ; ymm7=(2*CrH * FIX(0.40200)) + vpmulhw ymm0, ymm0, [rel PW_F0402] ; ymm0=(2*CrL * FIX(0.40200)) + + vpaddw ymm6, ymm6, [rel PW_ONE] + vpaddw ymm4, ymm4, [rel PW_ONE] + vpsraw ymm6, ymm6, 1 ; ymm6=(CbH * -FIX(0.22800)) + vpsraw ymm4, ymm4, 1 ; ymm4=(CbL * -FIX(0.22800)) + vpaddw ymm7, ymm7, [rel PW_ONE] + vpaddw ymm0, ymm0, [rel PW_ONE] + vpsraw ymm7, ymm7, 1 ; ymm7=(CrH * FIX(0.40200)) + vpsraw ymm0, ymm0, 1 ; ymm0=(CrL * FIX(0.40200)) + + vpaddw ymm6, ymm6, ymm5 + vpaddw ymm4, ymm4, ymm2 + vpaddw ymm6, ymm6, ymm5 ; ymm6=(CbH * FIX(1.77200))=(B-Y)H + vpaddw ymm4, ymm4, ymm2 ; ymm4=(CbL * FIX(1.77200))=(B-Y)L + vpaddw ymm7, ymm7, ymm1 ; ymm7=(CrH * FIX(1.40200))=(R-Y)H + vpaddw ymm0, ymm0, ymm3 ; ymm0=(CrL * FIX(1.40200))=(R-Y)L + + vmovdqa YMMWORD [wk(0)], ymm6 ; wk(0)=(B-Y)H + vmovdqa YMMWORD [wk(1)], ymm7 ; wk(1)=(R-Y)H + + vpunpckhwd ymm6, ymm5, ymm1 + vpunpcklwd ymm5, ymm5, ymm1 + vpmaddwd ymm5, ymm5, [rel PW_MF0344_F0285] + vpmaddwd ymm6, ymm6, [rel PW_MF0344_F0285] + vpunpckhwd ymm7, ymm2, ymm3 + vpunpcklwd ymm2, ymm2, ymm3 + vpmaddwd ymm2, ymm2, [rel PW_MF0344_F0285] + vpmaddwd ymm7, ymm7, [rel PW_MF0344_F0285] + + vpaddd ymm5, ymm5, [rel PD_ONEHALF] + vpaddd ymm6, ymm6, [rel PD_ONEHALF] + vpsrad ymm5, ymm5, SCALEBITS + vpsrad ymm6, ymm6, SCALEBITS + vpaddd ymm2, ymm2, [rel PD_ONEHALF] + vpaddd ymm7, ymm7, [rel PD_ONEHALF] + vpsrad ymm2, ymm2, SCALEBITS + vpsrad ymm7, ymm7, SCALEBITS + + vpackssdw ymm5, ymm5, ymm6 ; ymm5=CbH*-FIX(0.344)+CrH*FIX(0.285) + vpackssdw ymm2, ymm2, ymm7 ; ymm2=CbL*-FIX(0.344)+CrL*FIX(0.285) + vpsubw ymm5, ymm5, ymm1 ; ymm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H + vpsubw ymm2, ymm2, ymm3 ; ymm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L + + vmovdqa YMMWORD [wk(2)], ymm5 ; wk(2)=(G-Y)H + + mov al, 2 ; Yctr + jmp short .Yloop_1st + +.Yloop_2nd: + vmovdqa ymm0, YMMWORD [wk(1)] ; ymm0=(R-Y)H + vmovdqa ymm2, YMMWORD [wk(2)] ; ymm2=(G-Y)H + vmovdqa ymm4, YMMWORD [wk(0)] ; ymm4=(B-Y)H + +.Yloop_1st: + vmovdqu ymm7, YMMWORD [rsi] ; ymm7=Y(0123456789ABCDEFGHIJKLMNOPQRSTUV) + + vpcmpeqw ymm6, ymm6, ymm6 + vpsrlw ymm6, ymm6, BYTE_BIT ; ymm6={0xFF 0x00 0xFF 0x00 ..} + vpand ymm6, ymm6, ymm7 ; ymm6=Y(02468ACEGIKMOQSU)=YE + vpsrlw ymm7, ymm7, BYTE_BIT ; ymm7=Y(13579BDFHJLNPRTV)=YO + + vmovdqa ymm1, ymm0 ; ymm1=ymm0=(R-Y)(L/H) + vmovdqa ymm3, ymm2 ; ymm3=ymm2=(G-Y)(L/H) + vmovdqa ymm5, ymm4 ; ymm5=ymm4=(B-Y)(L/H) + + vpaddw ymm0, ymm0, ymm6 ; ymm0=((R-Y)+YE)=RE=R(02468ACEGIKMOQSU) + vpaddw ymm1, ymm1, ymm7 ; ymm1=((R-Y)+YO)=RO=R(13579BDFHJLNPRTV) + vpackuswb ymm0, ymm0, ymm0 ; ymm0=R(02468ACE********GIKMOQSU********) + vpackuswb ymm1, ymm1, ymm1 ; ymm1=R(13579BDF********HJLNPRTV********) + + vpaddw ymm2, ymm2, ymm6 ; ymm2=((G-Y)+YE)=GE=G(02468ACEGIKMOQSU) + vpaddw ymm3, ymm3, ymm7 ; ymm3=((G-Y)+YO)=GO=G(13579BDFHJLNPRTV) + vpackuswb ymm2, ymm2, ymm2 ; ymm2=G(02468ACE********GIKMOQSU********) + vpackuswb ymm3, ymm3, ymm3 ; ymm3=G(13579BDF********HJLNPRTV********) + + vpaddw ymm4, ymm4, ymm6 ; ymm4=((B-Y)+YE)=BE=B(02468ACEGIKMOQSU) + vpaddw ymm5, ymm5, ymm7 ; ymm5=((B-Y)+YO)=BO=B(13579BDFHJLNPRTV) + vpackuswb ymm4, ymm4, ymm4 ; ymm4=B(02468ACE********GIKMOQSU********) + vpackuswb ymm5, ymm5, ymm5 ; ymm5=B(13579BDF********HJLNPRTV********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; ymmA=(00 02 04 06 08 0A 0C 0E ** 0G 0I 0K 0M 0O 0Q 0S 0U **) + ; ymmB=(01 03 05 07 09 0B 0D 0F ** 0H 0J 0L 0N 0P 0R 0T 0V **) + ; ymmC=(10 12 14 16 18 1A 1C 1E ** 1G 1I 1K 1M 1O 1Q 1S 1U **) + ; ymmD=(11 13 15 17 19 1B 1D 1F ** 1H 1J 1L 1N 1P 1R 1T 1V **) + ; ymmE=(20 22 24 26 28 2A 2C 2E ** 2G 2I 2K 2M 2O 2Q 2S 2U **) + ; ymmF=(21 23 25 27 29 2B 2D 2F ** 2H 2J 2L 2N 2P 2R 2T 2V **) + ; ymmG=(** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **) + ; ymmH=(** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** **) + + vpunpcklbw ymmA, ymmA, ymmC ; ymmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E + ; 0G 1G 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U) + vpunpcklbw ymmE, ymmE, ymmB ; ymmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F + ; 2G 0H 2I 0J 2K 0L 2M 0N 2O 0P 2Q 0R 2S 0T 2U 0V) + vpunpcklbw ymmD, ymmD, ymmF ; ymmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F + ; 1H 2H 1J 2J 1L 2L 1N 2N 1P 2P 1R 2R 1T 2T 1V 2V) + + vpsrldq ymmH, ymmA, 2 ; ymmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E 0G 1G + ; 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U -- --) + vpunpckhwd ymmG, ymmA, ymmE ; ymmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F + ; 0O 1O 2O 0P 0Q 1Q 2Q 0R 0S 1S 2S 0T 0U 1U 2U 0V) + vpunpcklwd ymmA, ymmA, ymmE ; ymmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07 + ; 0G 1G 2G 0H 0I 1I 2I 0J 0K 1K 2K 0L 0M 1M 2M 0N) + + vpsrldq ymmE, ymmE, 2 ; ymmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F 2G 0H + ; 2I 0J 2K 0L 2M 0N 2O 0P 2Q 0R 2S 0T 2U 0V -- --) + + vpsrldq ymmB, ymmD, 2 ; ymmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F 1H 2H + ; 1J 2J 1L 2L 1N 2N 1P 2P 1R 2R 1T 2T 1V 2V -- --) + vpunpckhwd ymmC, ymmD, ymmH ; ymmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F 0G 1G + ; 1P 2P 0Q 1Q 1R 2R 0S 1S 1T 2T 0U 1U 1V 2V -- --) + vpunpcklwd ymmD, ymmD, ymmH ; ymmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18 + ; 1H 2H 0I 1I 1J 2J 0K 1K 1L 2L 0M 1M 1N 2N 0O 1O) + + vpunpckhwd ymmF, ymmE, ymmB ; ymmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F 2G 0H 1H 2H + ; 2Q 0R 1R 2R 2S 0T 1T 2T 2U 0V 1V 2V -- -- -- --) + vpunpcklwd ymmE, ymmE, ymmB ; ymmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29 + ; 2I 0J 1J 2J 2K 0L 1L 2L 2M 0N 1N 2N 2O 0P 1P 2P) + + vpshufd ymmH, ymmA, 0x4E ; ymmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03 + ; 0K 1K 2K 0L 0M 1M 2M 0N 0G 1G 2G 0H 0I 1I 2I 0J) + vpunpckldq ymmA, ymmA, ymmD ; ymmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14 + ; 0G 1G 2G 0H 1H 2H 0I 1I 0I 1I 2I 0J 1J 2J 0K 1K) + vpunpckhdq ymmD, ymmD, ymmE ; ymmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29 + ; 1L 2L 0M 1M 2M 0N 1N 2N 1N 2N 0O 1O 2O 0P 1P 2P) + vpunpckldq ymmE, ymmE, ymmH ; ymmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07 + ; 2I 0J 1J 2J 0K 1K 2K 0L 2K 0L 1L 2L 0M 1M 2M 0N) + + vpshufd ymmH, ymmG, 0x4E ; ymmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B + ; 0S 1S 2S 0T 0U 1U 2U 0V 0O 1O 2O 0P 0Q 1Q 2Q 0R) + vpunpckldq ymmG, ymmG, ymmC ; ymmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C + ; 0O 1O 2O 0P 1P 2P 0Q 1Q 0Q 1Q 2Q 0R 1R 2R 0S 1S) + vpunpckhdq ymmC, ymmC, ymmF ; ymmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F 0G 1G 2G 0H 1H 2H + ; 1T 2T 0U 1U 2U 0V 1V 2V 1V 2V -- -- -- -- -- --) + vpunpckldq ymmF, ymmF, ymmH ; ymmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F + ; 2Q 0R 1R 2R 0S 1S 2S 0T 2S 0T 1T 2T 0U 1U 2U 0V) + + vpunpcklqdq ymmH, ymmA, ymmE ; ymmH=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05 + ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L) + vpunpcklqdq ymmG, ymmD, ymmG ; ymmG=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A + ; 1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q) + vpunpcklqdq ymmC, ymmF, ymmC ; ymmC=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F + ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V) + + vperm2i128 ymmA, ymmH, ymmG, 0x20 ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05 + ; 15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + vperm2i128 ymmD, ymmC, ymmH, 0x30 ; ymmD=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F + ; 0G 1G 2G 0H 1H 2H 0I 1I 2I 0J 1J 2J 0K 1K 2K 0L) + vperm2i128 ymmF, ymmG, ymmC, 0x31 ; ymmF=(1L 2L 0M 1M 2M 0N 1N 2N 0O 1O 2O 0P 1P 2P 0Q 1Q + ; 2Q 0R 1R 2R 0S 1S 2S 0T 1T 2T 0U 1U 2U 0V 1V 2V) + + cmp rcx, byte SIZEOF_YMMWORD + jb short .column_st64 + + test rdi, SIZEOF_YMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + vmovntdq YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA + vmovntdq YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD + vmovntdq YMMWORD [rdi+2*SIZEOF_YMMWORD], ymmF + jmp short .out0 +.out1: ; --(unaligned)----------------- + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA + vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD + vmovdqu YMMWORD [rdi+2*SIZEOF_YMMWORD], ymmF +.out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_YMMWORD ; outptr + sub rcx, byte SIZEOF_YMMWORD + jz near .endcolumn + + add rsi, byte SIZEOF_YMMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add rbx, byte SIZEOF_YMMWORD ; inptr1 + add rdx, byte SIZEOF_YMMWORD ; inptr2 + jmp near .columnloop + +.column_st64: + lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE + cmp rcx, byte 2*SIZEOF_YMMWORD + jb short .column_st32 + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA + vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD + add rdi, byte 2*SIZEOF_YMMWORD ; outptr + vmovdqa ymmA, ymmF + sub rcx, byte 2*SIZEOF_YMMWORD + jmp short .column_st31 +.column_st32: + cmp rcx, byte SIZEOF_YMMWORD + jb short .column_st31 + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA + add rdi, byte SIZEOF_YMMWORD ; outptr + vmovdqa ymmA, ymmD + sub rcx, byte SIZEOF_YMMWORD + jmp short .column_st31 +.column_st31: + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st15 + vmovdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + add rdi, byte SIZEOF_XMMWORD ; outptr + vperm2i128 ymmA, ymmA, ymmA, 1 + sub rcx, byte SIZEOF_XMMWORD +.column_st15: + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_MMWORD + jb short .column_st7 + vmovq XMM_MMWORD [rdi], xmmA + add rdi, byte SIZEOF_MMWORD + sub rcx, byte SIZEOF_MMWORD + vpsrldq xmmA, xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_DWORD + jb short .column_st3 + vmovd XMM_DWORD [rdi], xmmA + add rdi, byte SIZEOF_DWORD + sub rcx, byte SIZEOF_DWORD + vpsrldq xmmA, xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of rax to the output when it has enough + ; space. + vmovd eax, xmmA + cmp rcx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [rdi], ax + add rdi, byte SIZEOF_WORD + sub rcx, byte SIZEOF_WORD + shr rax, 16 +.column_st1: + ; Store the lower 1 byte of rax to the output when it has enough + ; space. + test rcx, rcx + jz short .endcolumn + mov BYTE [rdi], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + vpcmpeqb ymm6, ymm6, ymm6 ; ymm6=XE=X(02468ACE********GIKMOQSU********) + vpcmpeqb ymm7, ymm7, ymm7 ; ymm7=XO=X(13579BDF********HJLNPRTV********) +%else + vpxor ymm6, ymm6, ymm6 ; ymm6=XE=X(02468ACE********GIKMOQSU********) + vpxor ymm7, ymm7, ymm7 ; ymm7=XO=X(13579BDF********HJLNPRTV********) +%endif + ; ymmA=(00 02 04 06 08 0A 0C 0E ** 0G 0I 0K 0M 0O 0Q 0S 0U **) + ; ymmB=(01 03 05 07 09 0B 0D 0F ** 0H 0J 0L 0N 0P 0R 0T 0V **) + ; ymmC=(10 12 14 16 18 1A 1C 1E ** 1G 1I 1K 1M 1O 1Q 1S 1U **) + ; ymmD=(11 13 15 17 19 1B 1D 1F ** 1H 1J 1L 1N 1P 1R 1T 1V **) + ; ymmE=(20 22 24 26 28 2A 2C 2E ** 2G 2I 2K 2M 2O 2Q 2S 2U **) + ; ymmF=(21 23 25 27 29 2B 2D 2F ** 2H 2J 2L 2N 2P 2R 2T 2V **) + ; ymmG=(30 32 34 36 38 3A 3C 3E ** 3G 3I 3K 3M 3O 3Q 3S 3U **) + ; ymmH=(31 33 35 37 39 3B 3D 3F ** 3H 3J 3L 3N 3P 3R 3T 3V **) + + vpunpcklbw ymmA, ymmA, ymmC ; ymmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E + ; 0G 1G 0I 1I 0K 1K 0M 1M 0O 1O 0Q 1Q 0S 1S 0U 1U) + vpunpcklbw ymmE, ymmE, ymmG ; ymmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E + ; 2G 3G 2I 3I 2K 3K 2M 3M 2O 3O 2Q 3Q 2S 3S 2U 3U) + vpunpcklbw ymmB, ymmB, ymmD ; ymmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F + ; 0H 1H 0J 1J 0L 1L 0N 1N 0P 1P 0R 1R 0T 1T 0V 1V) + vpunpcklbw ymmF, ymmF, ymmH ; ymmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F + ; 2H 3H 2J 3J 2L 3L 2N 3N 2P 3P 2R 3R 2T 3T 2V 3V) + + vpunpckhwd ymmC, ymmA, ymmE ; ymmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E + ; 0O 1O 2O 3O 0Q 1Q 2Q 3Q 0S 1S 2S 3S 0U 1U 2U 3U) + vpunpcklwd ymmA, ymmA, ymmE ; ymmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36 + ; 0G 1G 2G 3G 0I 1I 2I 3I 0K 1K 2K 3K 0M 1M 2M 3M) + vpunpckhwd ymmG, ymmB, ymmF ; ymmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F + ; 0P 1P 2P 3P 0R 1R 2R 3R 0T 1T 2T 3T 0V 1V 2V 3V) + vpunpcklwd ymmB, ymmB, ymmF ; ymmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37 + ; 0H 1H 2H 3H 0J 1J 2J 3J 0L 1L 2L 3L 0N 1N 2N 3N) + + vpunpckhdq ymmE, ymmA, ymmB ; ymmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 + ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N) + vpunpckldq ymmB, ymmA, ymmB ; ymmB=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ; 0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J) + vpunpckhdq ymmF, ymmC, ymmG ; ymmF=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F + ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V) + vpunpckldq ymmG, ymmC, ymmG ; ymmG=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + ; 0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R) + + vperm2i128 ymmA, ymmB, ymmE, 0x20 ; ymmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 + ; 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + vperm2i128 ymmD, ymmG, ymmF, 0x20 ; ymmD=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B + ; 0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + vperm2i128 ymmC, ymmB, ymmE, 0x31 ; ymmC=(0G 1G 2G 3G 0H 1H 2H 3H 0I 1I 2I 3I 0J 1J 2J 3J + ; 0K 1K 2K 3K 0L 1L 2L 3L 0M 1M 2M 3M 0N 1N 2N 3N) + vperm2i128 ymmH, ymmG, ymmF, 0x31 ; ymmH=(0O 1O 2O 3O 0P 1P 2P 3P 0Q 1Q 2Q 3Q 0R 1R 2R 3R + ; 0S 1S 2S 3S 0T 1T 2T 3T 0U 1U 2U 3U 0V 1V 2V 3V) + + cmp rcx, byte SIZEOF_YMMWORD + jb short .column_st64 + + test rdi, SIZEOF_YMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + vmovntdq YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA + vmovntdq YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD + vmovntdq YMMWORD [rdi+2*SIZEOF_YMMWORD], ymmC + vmovntdq YMMWORD [rdi+3*SIZEOF_YMMWORD], ymmH + jmp short .out0 +.out1: ; --(unaligned)----------------- + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA + vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD + vmovdqu YMMWORD [rdi+2*SIZEOF_YMMWORD], ymmC + vmovdqu YMMWORD [rdi+3*SIZEOF_YMMWORD], ymmH +.out0: + add rdi, RGB_PIXELSIZE*SIZEOF_YMMWORD ; outptr + sub rcx, byte SIZEOF_YMMWORD + jz near .endcolumn + + add rsi, byte SIZEOF_YMMWORD ; inptr0 + dec al + jnz near .Yloop_2nd + + add rbx, byte SIZEOF_YMMWORD ; inptr1 + add rdx, byte SIZEOF_YMMWORD ; inptr2 + jmp near .columnloop + +.column_st64: + cmp rcx, byte SIZEOF_YMMWORD/2 + jb short .column_st32 + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA + vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymmD + add rdi, byte 2*SIZEOF_YMMWORD ; outptr + vmovdqa ymmA, ymmC + vmovdqa ymmD, ymmH + sub rcx, byte SIZEOF_YMMWORD/2 +.column_st32: + cmp rcx, byte SIZEOF_YMMWORD/4 + jb short .column_st16 + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymmA + add rdi, byte SIZEOF_YMMWORD ; outptr + vmovdqa ymmA, ymmD + sub rcx, byte SIZEOF_YMMWORD/4 +.column_st16: + cmp rcx, byte SIZEOF_YMMWORD/8 + jb short .column_st15 + vmovdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + add rdi, byte SIZEOF_XMMWORD ; outptr + vperm2i128 ymmA, ymmA, ymmA, 1 + sub rcx, byte SIZEOF_YMMWORD/8 +.column_st15: + ; Store two pixels (8 bytes) of ymmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_YMMWORD/16 + jb short .column_st7 + vmovq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_YMMWORD/16*4 + sub rcx, byte SIZEOF_YMMWORD/16 + vpsrldq xmmA, SIZEOF_YMMWORD/16*4 +.column_st7: + ; Store one pixel (4 bytes) of ymmA to the output when it has enough + ; space. + test rcx, rcx + jz short .endcolumn + vmovd XMM_DWORD [rdi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.endcolumn: + sfence ; flush the write buffer + +.return: + pop rbx + vzeroupper + uncollect_args 4 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v2_merged_upsample_avx2(JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +; r10d = JDIMENSION output_width +; r11 = JSAMPIMAGE input_buf +; r12d = JDIMENSION in_row_group_ctr +; r13 = JSAMPARRAY output_buf + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2) + +EXTN(jsimd_h2v2_merged_upsample_avx2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 4 + push rbx + + mov eax, r10d + + mov rdi, r11 + mov ecx, r12d + mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] + mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] + mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] + mov rdi, r13 + lea rsi, [rsi+rcx*SIZEOF_JSAMPROW] + + push rdx ; inptr2 + push rbx ; inptr1 + push rsi ; inptr00 + mov rbx, rsp + + push rdi + push rcx + push rax + + %ifdef WIN64 + mov r8, rcx + mov r9, rdi + mov rcx, rax + mov rdx, rbx + %else + mov rdx, rcx + mov rcx, rdi + mov rdi, rax + mov rsi, rbx + %endif + + call EXTN(jsimd_h2v1_merged_upsample_avx2) + + pop rax + pop rcx + pop rdi + pop rsi + pop rbx + pop rdx + + add rdi, byte SIZEOF_JSAMPROW ; outptr1 + add rsi, byte SIZEOF_JSAMPROW ; inptr01 + + push rdx ; inptr2 + push rbx ; inptr1 + push rsi ; inptr00 + mov rbx, rsp + + push rdi + push rcx + push rax + + %ifdef WIN64 + mov r8, rcx + mov r9, rdi + mov rcx, rax + mov rdx, rbx + %else + mov rdx, rcx + mov rcx, rdi + mov rdi, rax + mov rsi, rbx + %endif + + call EXTN(jsimd_h2v1_merged_upsample_avx2) + + pop rax + pop rcx + pop rdi + pop rsi + pop rbx + pop rdx + + pop rbx + uncollect_args 4 + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jdmrgext-sse2.asm b/simd/x86_64/jdmrgext-sse2.asm new file mode 100644 index 0000000..1cc3345 --- /dev/null +++ b/simd/x86_64/jdmrgext-sse2.asm @@ -0,0 +1,537 @@ +; +; jdmrgext.asm - merged upsampling/color conversion (64-bit SSE2) +; +; Copyright 2009, 2012 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2012, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jcolsamp.inc" + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v1_merged_upsample_sse2(JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +; r10d = JDIMENSION output_width +; r11 = JSAMPIMAGE input_buf +; r12d = JDIMENSION in_row_group_ctr +; r13 = JSAMPARRAY output_buf + +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 3 + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2) + +EXTN(jsimd_h2v1_merged_upsample_sse2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args 4 + push rbx + + mov ecx, r10d ; col + test rcx, rcx + jz near .return + + push rcx + + mov rdi, r11 + mov ecx, r12d + mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] + mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] + mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] + mov rdi, r13 + mov rsi, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0 + mov rbx, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1 + mov rdx, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2 + mov rdi, JSAMPROW [rdi] ; outptr + + pop rcx ; col + +.columnloop: + + movdqa xmm6, XMMWORD [rbx] ; xmm6=Cb(0123456789ABCDEF) + movdqa xmm7, XMMWORD [rdx] ; xmm7=Cr(0123456789ABCDEF) + + pxor xmm1, xmm1 ; xmm1=(all 0's) + pcmpeqw xmm3, xmm3 + psllw xmm3, 7 ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + movdqa xmm4, xmm6 + punpckhbw xmm6, xmm1 ; xmm6=Cb(89ABCDEF)=CbH + punpcklbw xmm4, xmm1 ; xmm4=Cb(01234567)=CbL + movdqa xmm0, xmm7 + punpckhbw xmm7, xmm1 ; xmm7=Cr(89ABCDEF)=CrH + punpcklbw xmm0, xmm1 ; xmm0=Cr(01234567)=CrL + + paddw xmm6, xmm3 + paddw xmm4, xmm3 + paddw xmm7, xmm3 + paddw xmm0, xmm3 + + ; (Original) + ; R = Y + 1.40200 * Cr + ; G = Y - 0.34414 * Cb - 0.71414 * Cr + ; B = Y + 1.77200 * Cb + ; + ; (This implementation) + ; R = Y + 0.40200 * Cr + Cr + ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr + ; B = Y - 0.22800 * Cb + Cb + Cb + + movdqa xmm5, xmm6 ; xmm5=CbH + movdqa xmm2, xmm4 ; xmm2=CbL + paddw xmm6, xmm6 ; xmm6=2*CbH + paddw xmm4, xmm4 ; xmm4=2*CbL + movdqa xmm1, xmm7 ; xmm1=CrH + movdqa xmm3, xmm0 ; xmm3=CrL + paddw xmm7, xmm7 ; xmm7=2*CrH + paddw xmm0, xmm0 ; xmm0=2*CrL + + pmulhw xmm6, [rel PW_MF0228] ; xmm6=(2*CbH * -FIX(0.22800)) + pmulhw xmm4, [rel PW_MF0228] ; xmm4=(2*CbL * -FIX(0.22800)) + pmulhw xmm7, [rel PW_F0402] ; xmm7=(2*CrH * FIX(0.40200)) + pmulhw xmm0, [rel PW_F0402] ; xmm0=(2*CrL * FIX(0.40200)) + + paddw xmm6, [rel PW_ONE] + paddw xmm4, [rel PW_ONE] + psraw xmm6, 1 ; xmm6=(CbH * -FIX(0.22800)) + psraw xmm4, 1 ; xmm4=(CbL * -FIX(0.22800)) + paddw xmm7, [rel PW_ONE] + paddw xmm0, [rel PW_ONE] + psraw xmm7, 1 ; xmm7=(CrH * FIX(0.40200)) + psraw xmm0, 1 ; xmm0=(CrL * FIX(0.40200)) + + paddw xmm6, xmm5 + paddw xmm4, xmm2 + paddw xmm6, xmm5 ; xmm6=(CbH * FIX(1.77200))=(B-Y)H + paddw xmm4, xmm2 ; xmm4=(CbL * FIX(1.77200))=(B-Y)L + paddw xmm7, xmm1 ; xmm7=(CrH * FIX(1.40200))=(R-Y)H + paddw xmm0, xmm3 ; xmm0=(CrL * FIX(1.40200))=(R-Y)L + + movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=(B-Y)H + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(R-Y)H + + movdqa xmm6, xmm5 + movdqa xmm7, xmm2 + punpcklwd xmm5, xmm1 + punpckhwd xmm6, xmm1 + pmaddwd xmm5, [rel PW_MF0344_F0285] + pmaddwd xmm6, [rel PW_MF0344_F0285] + punpcklwd xmm2, xmm3 + punpckhwd xmm7, xmm3 + pmaddwd xmm2, [rel PW_MF0344_F0285] + pmaddwd xmm7, [rel PW_MF0344_F0285] + + paddd xmm5, [rel PD_ONEHALF] + paddd xmm6, [rel PD_ONEHALF] + psrad xmm5, SCALEBITS + psrad xmm6, SCALEBITS + paddd xmm2, [rel PD_ONEHALF] + paddd xmm7, [rel PD_ONEHALF] + psrad xmm2, SCALEBITS + psrad xmm7, SCALEBITS + + packssdw xmm5, xmm6 ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285) + packssdw xmm2, xmm7 ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285) + psubw xmm5, xmm1 ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H + psubw xmm2, xmm3 ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L + + movdqa XMMWORD [wk(2)], xmm5 ; wk(2)=(G-Y)H + + mov al, 2 ; Yctr + jmp short .Yloop_1st + +.Yloop_2nd: + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(R-Y)H + movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(G-Y)H + movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(B-Y)H + +.Yloop_1st: + movdqa xmm7, XMMWORD [rsi] ; xmm7=Y(0123456789ABCDEF) + + pcmpeqw xmm6, xmm6 + psrlw xmm6, BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} + pand xmm6, xmm7 ; xmm6=Y(02468ACE)=YE + psrlw xmm7, BYTE_BIT ; xmm7=Y(13579BDF)=YO + + movdqa xmm1, xmm0 ; xmm1=xmm0=(R-Y)(L/H) + movdqa xmm3, xmm2 ; xmm3=xmm2=(G-Y)(L/H) + movdqa xmm5, xmm4 ; xmm5=xmm4=(B-Y)(L/H) + + paddw xmm0, xmm6 ; xmm0=((R-Y)+YE)=RE=R(02468ACE) + paddw xmm1, xmm7 ; xmm1=((R-Y)+YO)=RO=R(13579BDF) + packuswb xmm0, xmm0 ; xmm0=R(02468ACE********) + packuswb xmm1, xmm1 ; xmm1=R(13579BDF********) + + paddw xmm2, xmm6 ; xmm2=((G-Y)+YE)=GE=G(02468ACE) + paddw xmm3, xmm7 ; xmm3=((G-Y)+YO)=GO=G(13579BDF) + packuswb xmm2, xmm2 ; xmm2=G(02468ACE********) + packuswb xmm3, xmm3 ; xmm3=G(13579BDF********) + + paddw xmm4, xmm6 ; xmm4=((B-Y)+YE)=BE=B(02468ACE) + paddw xmm5, xmm7 ; xmm5=((B-Y)+YO)=BO=B(13579BDF) + packuswb xmm4, xmm4 ; xmm4=B(02468ACE********) + packuswb xmm5, xmm5 ; xmm5=B(13579BDF********) + +%if RGB_PIXELSIZE == 3 ; --------------- + + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **) + + punpcklbw xmmA, xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE, xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F) + punpcklbw xmmD, xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F) + + movdqa xmmG, xmmA + movdqa xmmH, xmmA + punpcklwd xmmA, xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07) + punpckhwd xmmG, xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F) + + psrldq xmmH, 2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --) + psrldq xmmE, 2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --) + + movdqa xmmC, xmmD + movdqa xmmB, xmmD + punpcklwd xmmD, xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18) + punpckhwd xmmC, xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --) + + psrldq xmmB, 2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --) + + movdqa xmmF, xmmE + punpcklwd xmmE, xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29) + punpckhwd xmmF, xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --) + + pshufd xmmH, xmmA, 0x4E ; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03) + movdqa xmmB, xmmE + punpckldq xmmA, xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14) + punpckldq xmmE, xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07) + punpckhdq xmmD, xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29) + + pshufd xmmH, xmmG, 0x4E ; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B) + movdqa xmmB, xmmF + punpckldq xmmG, xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C) + punpckldq xmmF, xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F) + punpckhdq xmmC, xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --) + + punpcklqdq xmmA, xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05) + punpcklqdq xmmD, xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A) + punpcklqdq xmmF, xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F) + + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test rdi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF +.out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub rcx, byte SIZEOF_XMMWORD + jz near .endcolumn + + add rsi, byte SIZEOF_XMMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add rbx, byte SIZEOF_XMMWORD ; inptr1 + add rdx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + +.column_st32: + lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE + cmp rcx, byte 2*SIZEOF_XMMWORD + jb short .column_st16 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + add rdi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA, xmmF + sub rcx, byte 2*SIZEOF_XMMWORD + jmp short .column_st15 +.column_st16: + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st15 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + add rdi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA, xmmD + sub rcx, byte SIZEOF_XMMWORD +.column_st15: + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_MMWORD + jb short .column_st7 + movq XMM_MMWORD [rdi], xmmA + add rdi, byte SIZEOF_MMWORD + sub rcx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_DWORD + jb short .column_st3 + movd XMM_DWORD [rdi], xmmA + add rdi, byte SIZEOF_DWORD + sub rcx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of rax to the output when it has enough + ; space. + movd eax, xmmA + cmp rcx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [rdi], ax + add rdi, byte SIZEOF_WORD + sub rcx, byte SIZEOF_WORD + shr rax, 16 +.column_st1: + ; Store the lower 1 byte of rax to the output when it has enough + ; space. + test rcx, rcx + jz short .endcolumn + mov BYTE [rdi], al + +%else ; RGB_PIXELSIZE == 4 ; ----------- + +%ifdef RGBX_FILLER_0XFF + pcmpeqb xmm6, xmm6 ; xmm6=XE=X(02468ACE********) + pcmpeqb xmm7, xmm7 ; xmm7=XO=X(13579BDF********) +%else + pxor xmm6, xmm6 ; xmm6=XE=X(02468ACE********) + pxor xmm7, xmm7 ; xmm7=XO=X(13579BDF********) +%endif + ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **) + ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **) + ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **) + ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **) + + punpcklbw xmmA, xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E) + punpcklbw xmmE, xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E) + punpcklbw xmmB, xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F) + punpcklbw xmmF, xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F) + + movdqa xmmC, xmmA + punpcklwd xmmA, xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36) + punpckhwd xmmC, xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E) + movdqa xmmG, xmmB + punpcklwd xmmB, xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37) + punpckhwd xmmG, xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F) + + movdqa xmmD, xmmA + punpckldq xmmA, xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33) + punpckhdq xmmD, xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37) + movdqa xmmH, xmmC + punpckldq xmmC, xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B) + punpckhdq xmmH, xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F) + + cmp rcx, byte SIZEOF_XMMWORD + jb short .column_st32 + + test rdi, SIZEOF_XMMWORD-1 + jnz short .out1 + ; --(aligned)------------------- + movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC + movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH + jmp short .out0 +.out1: ; --(unaligned)----------------- + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC + movdqu XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH +.out0: + add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr + sub rcx, byte SIZEOF_XMMWORD + jz near .endcolumn + + add rsi, byte SIZEOF_XMMWORD ; inptr0 + dec al ; Yctr + jnz near .Yloop_2nd + + add rbx, byte SIZEOF_XMMWORD ; inptr1 + add rdx, byte SIZEOF_XMMWORD ; inptr2 + jmp near .columnloop + +.column_st32: + cmp rcx, byte SIZEOF_XMMWORD/2 + jb short .column_st16 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD + add rdi, byte 2*SIZEOF_XMMWORD ; outptr + movdqa xmmA, xmmC + movdqa xmmD, xmmH + sub rcx, byte SIZEOF_XMMWORD/2 +.column_st16: + cmp rcx, byte SIZEOF_XMMWORD/4 + jb short .column_st15 + movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA + add rdi, byte SIZEOF_XMMWORD ; outptr + movdqa xmmA, xmmD + sub rcx, byte SIZEOF_XMMWORD/4 +.column_st15: + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq XMM_MMWORD [rdi], xmmA + add rdi, byte SIZEOF_XMMWORD/8*4 + sub rcx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test rcx, rcx + jz short .endcolumn + movd XMM_DWORD [rdi], xmmA + +%endif ; RGB_PIXELSIZE ; --------------- + +.endcolumn: + sfence ; flush the write buffer + +.return: + pop rbx + uncollect_args 4 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. +; +; GLOBAL(void) +; jsimd_h2v2_merged_upsample_sse2(JDIMENSION output_width, +; JSAMPIMAGE input_buf, +; JDIMENSION in_row_group_ctr, +; JSAMPARRAY output_buf); +; + +; r10d = JDIMENSION output_width +; r11 = JSAMPIMAGE input_buf +; r12d = JDIMENSION in_row_group_ctr +; r13 = JSAMPARRAY output_buf + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2) + +EXTN(jsimd_h2v2_merged_upsample_sse2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 4 + push rbx + + mov eax, r10d + + mov rdi, r11 + mov ecx, r12d + mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] + mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] + mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] + mov rdi, r13 + lea rsi, [rsi+rcx*SIZEOF_JSAMPROW] + + push rdx ; inptr2 + push rbx ; inptr1 + push rsi ; inptr00 + mov rbx, rsp + + push rdi + push rcx + push rax + + %ifdef WIN64 + mov r8, rcx + mov r9, rdi + mov rcx, rax + mov rdx, rbx + %else + mov rdx, rcx + mov rcx, rdi + mov rdi, rax + mov rsi, rbx + %endif + + call EXTN(jsimd_h2v1_merged_upsample_sse2) + + pop rax + pop rcx + pop rdi + pop rsi + pop rbx + pop rdx + + add rdi, byte SIZEOF_JSAMPROW ; outptr1 + add rsi, byte SIZEOF_JSAMPROW ; inptr01 + + push rdx ; inptr2 + push rbx ; inptr1 + push rsi ; inptr00 + mov rbx, rsp + + push rdi + push rcx + push rax + + %ifdef WIN64 + mov r8, rcx + mov r9, rdi + mov rcx, rax + mov rdx, rbx + %else + mov rdx, rcx + mov rcx, rdi + mov rdi, rax + mov rsi, rbx + %endif + + call EXTN(jsimd_h2v1_merged_upsample_sse2) + + pop rax + pop rcx + pop rdi + pop rsi + pop rbx + pop rdx + + pop rbx + uncollect_args 4 + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jdsample-avx2.asm b/simd/x86_64/jdsample-avx2.asm new file mode 100644 index 0000000..10fa5c4 --- /dev/null +++ b/simd/x86_64/jdsample-avx2.asm @@ -0,0 +1,697 @@ +; +; jdsample.asm - upsampling (64-bit AVX2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; Copyright (C) 2015, Intel Corporation. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_fancy_upsample_avx2) + +EXTN(jconst_fancy_upsample_avx2): + +PW_ONE times 16 dw 1 +PW_TWO times 16 dw 2 +PW_THREE times 16 dw 3 +PW_SEVEN times 16 dw 7 +PW_EIGHT times 16 dw 8 + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. +; +; The upsampling algorithm is linear interpolation between pixel centers, +; also known as a "triangle filter". This is a good compromise between +; speed and visual quality. The centers of the output pixels are 1/4 and 3/4 +; of the way between input pixel centers. +; +; GLOBAL(void) +; jsimd_h2v1_fancy_upsample_avx2(int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +; r10 = int max_v_samp_factor +; r11d = JDIMENSION downsampled_width +; r12 = JSAMPARRAY input_data +; r13 = JSAMPARRAY *output_data_ptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2) + +EXTN(jsimd_h2v1_fancy_upsample_avx2): + push rbp + mov rax, rsp + mov rbp, rsp + push_xmm 3 + collect_args 4 + + mov eax, r11d ; colctr + test rax, rax + jz near .return + + mov rcx, r10 ; rowctr + test rcx, rcx + jz near .return + + mov rsi, r12 ; input_data + mov rdi, r13 + mov rdi, JSAMPARRAY [rdi] ; output_data + + vpxor ymm0, ymm0, ymm0 ; ymm0=(all 0's) + vpcmpeqb xmm9, xmm9, xmm9 + vpsrldq xmm10, xmm9, (SIZEOF_XMMWORD-1) ; (ff -- -- -- ... -- --) LSB is ff + + vpslldq xmm9, xmm9, (SIZEOF_XMMWORD-1) + vperm2i128 ymm9, ymm9, ymm9, 1 ; (---- ---- ... ---- ---- ff) MSB is ff + +.rowloop: + push rax ; colctr + push rdi + push rsi + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr + + test rax, SIZEOF_YMMWORD-1 + jz short .skip + mov dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl ; insert a dummy sample +.skip: + vpand ymm7, ymm10, YMMWORD [rsi+0*SIZEOF_YMMWORD] + + add rax, byte SIZEOF_YMMWORD-1 + and rax, byte -SIZEOF_YMMWORD + cmp rax, byte SIZEOF_YMMWORD + ja short .columnloop + +.columnloop_last: + vpand ymm6, ymm9, YMMWORD [rsi+0*SIZEOF_YMMWORD] + jmp short .upsample + +.columnloop: + vmovdqu ymm6, YMMWORD [rsi+1*SIZEOF_YMMWORD] + vperm2i128 ymm6, ymm0, ymm6, 0x20 + vpslldq ymm6, ymm6, 15 + +.upsample: + vmovdqu ymm1, YMMWORD [rsi+0*SIZEOF_YMMWORD] ; ymm1=( 0 1 2 ... 29 30 31) + + vperm2i128 ymm2, ymm0, ymm1, 0x20 + vpalignr ymm2, ymm1, ymm2, 15 ; ymm2=(-- 0 1 ... 28 29 30) + vperm2i128 ymm4, ymm0, ymm1, 0x03 + vpalignr ymm3, ymm4, ymm1, 1 ; ymm3=( 1 2 3 ... 30 31 --) + + vpor ymm2, ymm2, ymm7 ; ymm2=(-1 0 1 ... 28 29 30) + vpor ymm3, ymm3, ymm6 ; ymm3=( 1 2 3 ... 30 31 32) + + vpsrldq ymm7, ymm4, (SIZEOF_XMMWORD-1) ; ymm7=(31 -- -- ... -- -- --) + + vpunpckhbw ymm4, ymm1, ymm0 ; ymm4=( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31) + vpunpcklbw ymm5, ymm1, ymm0 ; ymm5=( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23) + vperm2i128 ymm1, ymm5, ymm4, 0x20 ; ymm1=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vperm2i128 ymm4, ymm5, ymm4, 0x31 ; ymm4=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vpunpckhbw ymm5, ymm2, ymm0 ; ymm5=( 7 8 9 10 11 12 13 14 23 24 25 26 27 28 29 30) + vpunpcklbw ymm6, ymm2, ymm0 ; ymm6=(-1 0 1 2 3 4 5 6 15 16 17 18 19 20 21 22) + vperm2i128 ymm2, ymm6, ymm5, 0x20 ; ymm2=(-1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14) + vperm2i128 ymm5, ymm6, ymm5, 0x31 ; ymm5=(15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30) + + vpunpckhbw ymm6, ymm3, ymm0 ; ymm6=( 1 2 3 4 5 6 7 8 17 18 19 20 21 22 23 24) + vpunpcklbw ymm8, ymm3, ymm0 ; ymm8=( 9 10 11 12 13 14 15 16 25 26 27 28 29 30 31 32) + vperm2i128 ymm3, ymm8, ymm6, 0x20 ; ymm3=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16) + vperm2i128 ymm6, ymm8, ymm6, 0x31 ; ymm6=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32) + + vpmullw ymm1, ymm1, [rel PW_THREE] + vpmullw ymm4, ymm4, [rel PW_THREE] + vpaddw ymm2, ymm2, [rel PW_ONE] + vpaddw ymm5, ymm5, [rel PW_ONE] + vpaddw ymm3, ymm3, [rel PW_TWO] + vpaddw ymm6, ymm6, [rel PW_TWO] + + vpaddw ymm2, ymm2, ymm1 + vpaddw ymm5, ymm5, ymm4 + vpsrlw ymm2, ymm2, 2 ; ymm2=OutLE=( 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30) + vpsrlw ymm5, ymm5, 2 ; ymm5=OutHE=(32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62) + vpaddw ymm3, ymm3, ymm1 + vpaddw ymm6, ymm6, ymm4 + vpsrlw ymm3, ymm3, 2 ; ymm3=OutLO=( 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31) + vpsrlw ymm6, ymm6, 2 ; ymm6=OutHO=(33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63) + + vpsllw ymm3, ymm3, BYTE_BIT + vpsllw ymm6, ymm6, BYTE_BIT + vpor ymm2, ymm2, ymm3 ; ymm2=OutL=( 0 1 2 ... 29 30 31) + vpor ymm5, ymm5, ymm6 ; ymm5=OutH=(32 33 34 ... 61 62 63) + + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymm2 + vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymm5 + + sub rax, byte SIZEOF_YMMWORD + add rsi, byte 1*SIZEOF_YMMWORD ; inptr + add rdi, byte 2*SIZEOF_YMMWORD ; outptr + cmp rax, byte SIZEOF_YMMWORD + ja near .columnloop + test eax, eax + jnz near .columnloop_last + + pop rsi + pop rdi + pop rax + + add rsi, byte SIZEOF_JSAMPROW ; input_data + add rdi, byte SIZEOF_JSAMPROW ; output_data + dec rcx ; rowctr + jg near .rowloop + +.return: + vzeroupper + uncollect_args 4 + pop_xmm 3 + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. +; Again a triangle filter; see comments for h2v1 case, above. +; +; GLOBAL(void) +; jsimd_h2v2_fancy_upsample_avx2(int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +; r10 = int max_v_samp_factor +; r11d = JDIMENSION downsampled_width +; r12 = JSAMPARRAY input_data +; r13 = JSAMPARRAY *output_data_ptr + +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM] +%define WK_NUM 4 + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2) + +EXTN(jsimd_h2v2_fancy_upsample_avx2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + push_xmm 3 + collect_args 4 + push rbx + + mov eax, r11d ; colctr + test rax, rax + jz near .return + + mov rcx, r10 ; rowctr + test rcx, rcx + jz near .return + + mov rsi, r12 ; input_data + mov rdi, r13 + mov rdi, JSAMPARRAY [rdi] ; output_data +.rowloop: + push rax ; colctr + push rcx + push rdi + push rsi + + mov rcx, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above) + mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0 + mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below) + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0 + mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1 + + vpxor ymm8, ymm8, ymm8 ; ymm8=(all 0's) + vpcmpeqb xmm9, xmm9, xmm9 + vpsrldq xmm10, xmm9, (SIZEOF_XMMWORD-2) ; (ffff ---- ---- ... ---- ----) LSB is ffff + vpslldq xmm9, xmm9, (SIZEOF_XMMWORD-2) + vperm2i128 ymm9, ymm9, ymm9, 1 ; (---- ---- ... ---- ---- ffff) MSB is ffff + + test rax, SIZEOF_YMMWORD-1 + jz short .skip + push rdx + mov dl, JSAMPLE [rcx+(rax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [rcx+rax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [rbx+(rax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [rbx+rax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl ; insert a dummy sample + pop rdx +.skip: + ; -- process the first column block + + vmovdqu ymm0, YMMWORD [rbx+0*SIZEOF_YMMWORD] ; ymm0=row[ 0][0] + vmovdqu ymm1, YMMWORD [rcx+0*SIZEOF_YMMWORD] ; ymm1=row[-1][0] + vmovdqu ymm2, YMMWORD [rsi+0*SIZEOF_YMMWORD] ; ymm2=row[+1][0] + + vpunpckhbw ymm4, ymm0, ymm8 ; ymm4=row[ 0]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31) + vpunpcklbw ymm5, ymm0, ymm8 ; ymm5=row[ 0]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23) + vperm2i128 ymm0, ymm5, ymm4, 0x20 ; ymm0=row[ 0]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vperm2i128 ymm4, ymm5, ymm4, 0x31 ; ymm4=row[ 0](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vpunpckhbw ymm5, ymm1, ymm8 ; ymm5=row[-1]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31) + vpunpcklbw ymm6, ymm1, ymm8 ; ymm6=row[-1]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23) + vperm2i128 ymm1, ymm6, ymm5, 0x20 ; ymm1=row[-1]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vperm2i128 ymm5, ymm6, ymm5, 0x31 ; ymm5=row[-1](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vpunpckhbw ymm6, ymm2, ymm8 ; ymm6=row[+1]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31) + vpunpcklbw ymm3, ymm2, ymm8 ; ymm3=row[+1]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23) + vperm2i128 ymm2, ymm3, ymm6, 0x20 ; ymm2=row[+1]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vperm2i128 ymm6, ymm3, ymm6, 0x31 ; ymm6=row[+1](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vpmullw ymm0, ymm0, [rel PW_THREE] + vpmullw ymm4, ymm4, [rel PW_THREE] + + vpaddw ymm1, ymm1, ymm0 ; ymm1=Int0L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vpaddw ymm5, ymm5, ymm4 ; ymm5=Int0H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + vpaddw ymm2, ymm2, ymm0 ; ymm2=Int1L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vpaddw ymm6, ymm6, ymm4 ; ymm6=Int1H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vmovdqu YMMWORD [rdx+0*SIZEOF_YMMWORD], ymm1 ; temporarily save + vmovdqu YMMWORD [rdx+1*SIZEOF_YMMWORD], ymm5 ; the intermediate data + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymm2 + vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymm6 + + vpand ymm1, ymm1, ymm10 ; ymm1=( 0 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --) + vpand ymm2, ymm2, ymm10 ; ymm2=( 0 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --) + + vmovdqa YMMWORD [wk(0)], ymm1 + vmovdqa YMMWORD [wk(1)], ymm2 + + add rax, byte SIZEOF_YMMWORD-1 + and rax, byte -SIZEOF_YMMWORD + cmp rax, byte SIZEOF_YMMWORD + ja short .columnloop + +.columnloop_last: + ; -- process the last column block + + vpand ymm1, ymm9, YMMWORD [rdx+1*SIZEOF_YMMWORD] + vpand ymm2, ymm9, YMMWORD [rdi+1*SIZEOF_YMMWORD] + + vmovdqa YMMWORD [wk(2)], ymm1 ; ymm1=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 31) + vmovdqa YMMWORD [wk(3)], ymm2 ; ymm2=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 31) + + jmp near .upsample + +.columnloop: + ; -- process the next column block + + vmovdqu ymm0, YMMWORD [rbx+1*SIZEOF_YMMWORD] ; ymm0=row[ 0][1] + vmovdqu ymm1, YMMWORD [rcx+1*SIZEOF_YMMWORD] ; ymm1=row[-1][1] + vmovdqu ymm2, YMMWORD [rsi+1*SIZEOF_YMMWORD] ; ymm2=row[+1][1] + + vpunpckhbw ymm4, ymm0, ymm8 ; ymm4=row[ 0]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31) + vpunpcklbw ymm5, ymm0, ymm8 ; ymm5=row[ 0]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23) + vperm2i128 ymm0, ymm5, ymm4, 0x20 ; ymm0=row[ 0]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vperm2i128 ymm4, ymm5, ymm4, 0x31 ; ymm4=row[ 0](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vpunpckhbw ymm5, ymm1, ymm8 ; ymm5=row[-1]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31) + vpunpcklbw ymm6, ymm1, ymm8 ; ymm6=row[-1]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23) + vperm2i128 ymm1, ymm6, ymm5, 0x20 ; ymm1=row[-1]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vperm2i128 ymm5, ymm6, ymm5, 0x31 ; ymm5=row[-1](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vpunpckhbw ymm6, ymm2, ymm8 ; ymm6=row[+1]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31) + vpunpcklbw ymm7, ymm2, ymm8 ; ymm7=row[+1]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23) + vperm2i128 ymm2, ymm7, ymm6, 0x20 ; ymm2=row[+1]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vperm2i128 ymm6, ymm7, ymm6, 0x31 ; ymm6=row[+1](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vpmullw ymm0, ymm0, [rel PW_THREE] + vpmullw ymm4, ymm4, [rel PW_THREE] + + vpaddw ymm1, ymm1, ymm0 ; ymm1=Int0L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vpaddw ymm5, ymm5, ymm4 ; ymm5=Int0H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + vpaddw ymm2, ymm2, ymm0 ; ymm2=Int1L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vpaddw ymm6, ymm6, ymm4 ; ymm6=Int1H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vmovdqu YMMWORD [rdx+2*SIZEOF_YMMWORD], ymm1 ; temporarily save + vmovdqu YMMWORD [rdx+3*SIZEOF_YMMWORD], ymm5 ; the intermediate data + vmovdqu YMMWORD [rdi+2*SIZEOF_YMMWORD], ymm2 + vmovdqu YMMWORD [rdi+3*SIZEOF_YMMWORD], ymm6 + + vperm2i128 ymm1, ymm8, ymm1, 0x20 + vpslldq ymm1, ymm1, 14 ; ymm1=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 0) + vperm2i128 ymm2, ymm8, ymm2, 0x20 + vpslldq ymm2, ymm2, 14 ; ymm2=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 0) + + vmovdqa YMMWORD [wk(2)], ymm1 + vmovdqa YMMWORD [wk(3)], ymm2 + +.upsample: + ; -- process the upper row + + vmovdqu ymm7, YMMWORD [rdx+0*SIZEOF_YMMWORD] ; ymm7=Int0L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vmovdqu ymm3, YMMWORD [rdx+1*SIZEOF_YMMWORD] ; ymm3=Int0H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vperm2i128 ymm0, ymm8, ymm7, 0x03 + vpalignr ymm0, ymm0, ymm7, 2 ; ymm0=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 --) + vperm2i128 ymm4, ymm8, ymm3, 0x20 + vpslldq ymm4, ymm4, 14 ; ymm4=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 16) + + vperm2i128 ymm5, ymm8, ymm7, 0x03 + vpsrldq ymm5, ymm5, 14 ; ymm5=(15 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --) + vperm2i128 ymm6, ymm8, ymm3, 0x20 + vpalignr ymm6, ymm3, ymm6, 14 ; ymm6=(-- 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30) + + vpor ymm0, ymm0, ymm4 ; ymm0=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16) + vpor ymm5, ymm5, ymm6 ; ymm5=(15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30) + + vperm2i128 ymm2, ymm8, ymm3, 0x03 + vpalignr ymm2, ymm2, ymm3, 2 ; ymm2=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 --) + vperm2i128 ymm4, ymm8, ymm3, 0x03 + vpsrldq ymm4, ymm4, 14 ; ymm4=(31 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --) + vperm2i128 ymm1, ymm8, ymm7, 0x20 + vpalignr ymm1, ymm7, ymm1, 14 ; ymm1=(-- 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14) + + vpor ymm1, ymm1, YMMWORD [wk(0)] ; ymm1=(-1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14) + vpor ymm2, ymm2, YMMWORD [wk(2)] ; ymm2=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32) + + vmovdqa YMMWORD [wk(0)], ymm4 + + vpmullw ymm7, ymm7, [rel PW_THREE] + vpmullw ymm3, ymm3, [rel PW_THREE] + vpaddw ymm1, ymm1, [rel PW_EIGHT] + vpaddw ymm5, ymm5, [rel PW_EIGHT] + vpaddw ymm0, ymm0, [rel PW_SEVEN] + vpaddw ymm2, [rel PW_SEVEN] + + vpaddw ymm1, ymm1, ymm7 + vpaddw ymm5, ymm5, ymm3 + vpsrlw ymm1, ymm1, 4 ; ymm1=Out0LE=( 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30) + vpsrlw ymm5, ymm5, 4 ; ymm5=Out0HE=(32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62) + vpaddw ymm0, ymm0, ymm7 + vpaddw ymm2, ymm2, ymm3 + vpsrlw ymm0, ymm0, 4 ; ymm0=Out0LO=( 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31) + vpsrlw ymm2, ymm2, 4 ; ymm2=Out0HO=(33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63) + + vpsllw ymm0, ymm0, BYTE_BIT + vpsllw ymm2, ymm2, BYTE_BIT + vpor ymm1, ymm1, ymm0 ; ymm1=Out0L=( 0 1 2 ... 29 30 31) + vpor ymm5, ymm5, ymm2 ; ymm5=Out0H=(32 33 34 ... 61 62 63) + + vmovdqu YMMWORD [rdx+0*SIZEOF_YMMWORD], ymm1 + vmovdqu YMMWORD [rdx+1*SIZEOF_YMMWORD], ymm5 + + ; -- process the lower row + + vmovdqu ymm6, YMMWORD [rdi+0*SIZEOF_YMMWORD] ; ymm6=Int1L=( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) + vmovdqu ymm4, YMMWORD [rdi+1*SIZEOF_YMMWORD] ; ymm4=Int1H=(16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) + + vperm2i128 ymm7, ymm8, ymm6, 0x03 + vpalignr ymm7, ymm7, ymm6, 2 ; ymm7=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 --) + vperm2i128 ymm3, ymm8, ymm4, 0x20 + vpslldq ymm3, ymm3, 14 ; ymm3=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 16) + + vperm2i128 ymm0, ymm8, ymm6, 0x03 + vpsrldq ymm0, ymm0, 14 ; ymm0=(15 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --) + vperm2i128 ymm2, ymm8, ymm4, 0x20 + vpalignr ymm2, ymm4, ymm2, 14 ; ymm2=(-- 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30) + + vpor ymm7, ymm7, ymm3 ; ymm7=( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16) + vpor ymm0, ymm0, ymm2 ; ymm0=(15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30) + + vperm2i128 ymm5, ymm8, ymm4, 0x03 + vpalignr ymm5, ymm5, ymm4, 2 ; ymm5=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 --) + vperm2i128 ymm3, ymm8, ymm4, 0x03 + vpsrldq ymm3, ymm3, 14 ; ymm3=(31 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --) + vperm2i128 ymm1, ymm8, ymm6, 0x20 + vpalignr ymm1, ymm6, ymm1, 14 ; ymm1=(-- 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14) + + vpor ymm1, ymm1, YMMWORD [wk(1)] ; ymm1=(-1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14) + vpor ymm5, ymm5, YMMWORD [wk(3)] ; ymm5=(17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32) + + vmovdqa YMMWORD [wk(1)], ymm3 + + vpmullw ymm6, ymm6, [rel PW_THREE] + vpmullw ymm4, ymm4, [rel PW_THREE] + vpaddw ymm1, ymm1, [rel PW_EIGHT] + vpaddw ymm0, ymm0, [rel PW_EIGHT] + vpaddw ymm7, ymm7, [rel PW_SEVEN] + vpaddw ymm5, ymm5, [rel PW_SEVEN] + + vpaddw ymm1, ymm1, ymm6 + vpaddw ymm0, ymm0, ymm4 + vpsrlw ymm1, ymm1, 4 ; ymm1=Out1LE=( 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30) + vpsrlw ymm0, ymm0, 4 ; ymm0=Out1HE=(32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62) + vpaddw ymm7, ymm7, ymm6 + vpaddw ymm5, ymm5, ymm4 + vpsrlw ymm7, ymm7, 4 ; ymm7=Out1LO=( 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31) + vpsrlw ymm5, ymm5, 4 ; ymm5=Out1HO=(33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63) + + vpsllw ymm7, ymm7, BYTE_BIT + vpsllw ymm5, ymm5, BYTE_BIT + vpor ymm1, ymm1, ymm7 ; ymm1=Out1L=( 0 1 2 ... 29 30 31) + vpor ymm0, ymm0, ymm5 ; ymm0=Out1H=(32 33 34 ... 61 62 63) + + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymm1 + vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymm0 + + sub rax, byte SIZEOF_YMMWORD + add rcx, byte 1*SIZEOF_YMMWORD ; inptr1(above) + add rbx, byte 1*SIZEOF_YMMWORD ; inptr0 + add rsi, byte 1*SIZEOF_YMMWORD ; inptr1(below) + add rdx, byte 2*SIZEOF_YMMWORD ; outptr0 + add rdi, byte 2*SIZEOF_YMMWORD ; outptr1 + cmp rax, byte SIZEOF_YMMWORD + ja near .columnloop + test rax, rax + jnz near .columnloop_last + + pop rsi + pop rdi + pop rcx + pop rax + + add rsi, byte 1*SIZEOF_JSAMPROW ; input_data + add rdi, byte 2*SIZEOF_JSAMPROW ; output_data + sub rcx, byte 2 ; rowctr + jg near .rowloop + +.return: + pop rbx + vzeroupper + uncollect_args 4 + pop_xmm 3 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 1:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v1_upsample_avx2(int max_v_samp_factor, JDIMENSION output_width, +; JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +; + +; r10 = int max_v_samp_factor +; r11d = JDIMENSION output_width +; r12 = JSAMPARRAY input_data +; r13 = JSAMPARRAY *output_data_ptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2) + +EXTN(jsimd_h2v1_upsample_avx2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 4 + + mov edx, r11d + add rdx, byte (SIZEOF_YMMWORD-1) + and rdx, -SIZEOF_YMMWORD + jz near .return + + mov rcx, r10 ; rowctr + test rcx, rcx + jz short .return + + mov rsi, r12 ; input_data + mov rdi, r13 + mov rdi, JSAMPARRAY [rdi] ; output_data +.rowloop: + push rdi + push rsi + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr + mov rax, rdx ; colctr +.columnloop: + + cmp rax, byte SIZEOF_YMMWORD + ja near .above_16 + + vmovdqu xmm0, XMMWORD [rsi+0*SIZEOF_YMMWORD] + vpunpckhbw xmm1, xmm0, xmm0 + vpunpcklbw xmm0, xmm0, xmm0 + + vmovdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 + vmovdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1 + + jmp short .nextrow + +.above_16: + vmovdqu ymm0, YMMWORD [rsi+0*SIZEOF_YMMWORD] + + vpermq ymm0, ymm0, 0xd8 + vpunpckhbw ymm1, ymm0, ymm0 + vpunpcklbw ymm0, ymm0, ymm0 + + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymm0 + vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymm1 + + sub rax, byte 2*SIZEOF_YMMWORD + jz short .nextrow + + add rsi, byte SIZEOF_YMMWORD ; inptr + add rdi, byte 2*SIZEOF_YMMWORD ; outptr + jmp short .columnloop + +.nextrow: + pop rsi + pop rdi + + add rsi, byte SIZEOF_JSAMPROW ; input_data + add rdi, byte SIZEOF_JSAMPROW ; output_data + dec rcx ; rowctr + jg short .rowloop + +.return: + vzeroupper + uncollect_args 4 + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 2:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v2_upsample_avx2(int max_v_samp_factor, JDIMENSION output_width, +; JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +; + +; r10 = int max_v_samp_factor +; r11d = JDIMENSION output_width +; r12 = JSAMPARRAY input_data +; r13 = JSAMPARRAY *output_data_ptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2) + +EXTN(jsimd_h2v2_upsample_avx2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 4 + push rbx + + mov edx, r11d + add rdx, byte (SIZEOF_YMMWORD-1) + and rdx, -SIZEOF_YMMWORD + jz near .return + + mov rcx, r10 ; rowctr + test rcx, rcx + jz near .return + + mov rsi, r12 ; input_data + mov rdi, r13 + mov rdi, JSAMPARRAY [rdi] ; output_data +.rowloop: + push rdi + push rsi + + mov rsi, JSAMPROW [rsi] ; inptr + mov rbx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0 + mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1 + mov rax, rdx ; colctr +.columnloop: + + cmp rax, byte SIZEOF_YMMWORD + ja short .above_16 + + vmovdqu xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] + vpunpckhbw xmm1, xmm0, xmm0 + vpunpcklbw xmm0, xmm0, xmm0 + + vmovdqu XMMWORD [rbx+0*SIZEOF_XMMWORD], xmm0 + vmovdqu XMMWORD [rbx+1*SIZEOF_XMMWORD], xmm1 + vmovdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 + vmovdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1 + + jmp near .nextrow + +.above_16: + vmovdqu ymm0, YMMWORD [rsi+0*SIZEOF_YMMWORD] + + vpermq ymm0, ymm0, 0xd8 + vpunpckhbw ymm1, ymm0, ymm0 + vpunpcklbw ymm0, ymm0, ymm0 + + vmovdqu YMMWORD [rbx+0*SIZEOF_YMMWORD], ymm0 + vmovdqu YMMWORD [rbx+1*SIZEOF_YMMWORD], ymm1 + vmovdqu YMMWORD [rdi+0*SIZEOF_YMMWORD], ymm0 + vmovdqu YMMWORD [rdi+1*SIZEOF_YMMWORD], ymm1 + + sub rax, byte 2*SIZEOF_YMMWORD + jz short .nextrow + + add rsi, byte SIZEOF_YMMWORD ; inptr + add rbx, 2*SIZEOF_YMMWORD ; outptr0 + add rdi, 2*SIZEOF_YMMWORD ; outptr1 + jmp short .columnloop + +.nextrow: + pop rsi + pop rdi + + add rsi, byte 1*SIZEOF_JSAMPROW ; input_data + add rdi, byte 2*SIZEOF_JSAMPROW ; output_data + sub rcx, byte 2 ; rowctr + jg near .rowloop + +.return: + pop rbx + vzeroupper + uncollect_args 4 + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jdsample-sse2.asm b/simd/x86_64/jdsample-sse2.asm new file mode 100644 index 0000000..d8ccda9 --- /dev/null +++ b/simd/x86_64/jdsample-sse2.asm @@ -0,0 +1,666 @@ +; +; jdsample.asm - upsampling (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_fancy_upsample_sse2) + +EXTN(jconst_fancy_upsample_sse2): + +PW_ONE times 8 dw 1 +PW_TWO times 8 dw 2 +PW_THREE times 8 dw 3 +PW_SEVEN times 8 dw 7 +PW_EIGHT times 8 dw 8 + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. +; +; The upsampling algorithm is linear interpolation between pixel centers, +; also known as a "triangle filter". This is a good compromise between +; speed and visual quality. The centers of the output pixels are 1/4 and 3/4 +; of the way between input pixel centers. +; +; GLOBAL(void) +; jsimd_h2v1_fancy_upsample_sse2(int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +; r10 = int max_v_samp_factor +; r11d = JDIMENSION downsampled_width +; r12 = JSAMPARRAY input_data +; r13 = JSAMPARRAY *output_data_ptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2) + +EXTN(jsimd_h2v1_fancy_upsample_sse2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 4 + + mov eax, r11d ; colctr + test rax, rax + jz near .return + + mov rcx, r10 ; rowctr + test rcx, rcx + jz near .return + + mov rsi, r12 ; input_data + mov rdi, r13 + mov rdi, JSAMPARRAY [rdi] ; output_data +.rowloop: + push rax ; colctr + push rdi + push rsi + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr + + test rax, SIZEOF_XMMWORD-1 + jz short .skip + mov dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl ; insert a dummy sample +.skip: + pxor xmm0, xmm0 ; xmm0=(all 0's) + pcmpeqb xmm7, xmm7 + psrldq xmm7, (SIZEOF_XMMWORD-1) + pand xmm7, XMMWORD [rsi+0*SIZEOF_XMMWORD] + + add rax, byte SIZEOF_XMMWORD-1 + and rax, byte -SIZEOF_XMMWORD + cmp rax, byte SIZEOF_XMMWORD + ja short .columnloop + +.columnloop_last: + pcmpeqb xmm6, xmm6 + pslldq xmm6, (SIZEOF_XMMWORD-1) + pand xmm6, XMMWORD [rsi+0*SIZEOF_XMMWORD] + jmp short .upsample + +.columnloop: + movdqa xmm6, XMMWORD [rsi+1*SIZEOF_XMMWORD] + pslldq xmm6, (SIZEOF_XMMWORD-1) + +.upsample: + movdqa xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD] + movdqa xmm2, xmm1 + movdqa xmm3, xmm1 ; xmm1=( 0 1 2 ... 13 14 15) + pslldq xmm2, 1 ; xmm2=(-- 0 1 ... 12 13 14) + psrldq xmm3, 1 ; xmm3=( 1 2 3 ... 14 15 --) + + por xmm2, xmm7 ; xmm2=(-1 0 1 ... 12 13 14) + por xmm3, xmm6 ; xmm3=( 1 2 3 ... 14 15 16) + + movdqa xmm7, xmm1 + psrldq xmm7, (SIZEOF_XMMWORD-1) ; xmm7=(15 -- -- ... -- -- --) + + movdqa xmm4, xmm1 + punpcklbw xmm1, xmm0 ; xmm1=( 0 1 2 3 4 5 6 7) + punpckhbw xmm4, xmm0 ; xmm4=( 8 9 10 11 12 13 14 15) + movdqa xmm5, xmm2 + punpcklbw xmm2, xmm0 ; xmm2=(-1 0 1 2 3 4 5 6) + punpckhbw xmm5, xmm0 ; xmm5=( 7 8 9 10 11 12 13 14) + movdqa xmm6, xmm3 + punpcklbw xmm3, xmm0 ; xmm3=( 1 2 3 4 5 6 7 8) + punpckhbw xmm6, xmm0 ; xmm6=( 9 10 11 12 13 14 15 16) + + pmullw xmm1, [rel PW_THREE] + pmullw xmm4, [rel PW_THREE] + paddw xmm2, [rel PW_ONE] + paddw xmm5, [rel PW_ONE] + paddw xmm3, [rel PW_TWO] + paddw xmm6, [rel PW_TWO] + + paddw xmm2, xmm1 + paddw xmm5, xmm4 + psrlw xmm2, 2 ; xmm2=OutLE=( 0 2 4 6 8 10 12 14) + psrlw xmm5, 2 ; xmm5=OutHE=(16 18 20 22 24 26 28 30) + paddw xmm3, xmm1 + paddw xmm6, xmm4 + psrlw xmm3, 2 ; xmm3=OutLO=( 1 3 5 7 9 11 13 15) + psrlw xmm6, 2 ; xmm6=OutHO=(17 19 21 23 25 27 29 31) + + psllw xmm3, BYTE_BIT + psllw xmm6, BYTE_BIT + por xmm2, xmm3 ; xmm2=OutL=( 0 1 2 ... 13 14 15) + por xmm5, xmm6 ; xmm5=OutH=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm5 + + sub rax, byte SIZEOF_XMMWORD + add rsi, byte 1*SIZEOF_XMMWORD ; inptr + add rdi, byte 2*SIZEOF_XMMWORD ; outptr + cmp rax, byte SIZEOF_XMMWORD + ja near .columnloop + test eax, eax + jnz near .columnloop_last + + pop rsi + pop rdi + pop rax + + add rsi, byte SIZEOF_JSAMPROW ; input_data + add rdi, byte SIZEOF_JSAMPROW ; output_data + dec rcx ; rowctr + jg near .rowloop + +.return: + uncollect_args 4 + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. +; Again a triangle filter; see comments for h2v1 case, above. +; +; GLOBAL(void) +; jsimd_h2v2_fancy_upsample_sse2(int max_v_samp_factor, +; JDIMENSION downsampled_width, +; JSAMPARRAY input_data, +; JSAMPARRAY *output_data_ptr); +; + +; r10 = int max_v_samp_factor +; r11d = JDIMENSION downsampled_width +; r12 = JSAMPARRAY input_data +; r13 = JSAMPARRAY *output_data_ptr + +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 4 + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2) + +EXTN(jsimd_h2v2_fancy_upsample_sse2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args 4 + push rbx + + mov eax, r11d ; colctr + test rax, rax + jz near .return + + mov rcx, r10 ; rowctr + test rcx, rcx + jz near .return + + mov rsi, r12 ; input_data + mov rdi, r13 + mov rdi, JSAMPARRAY [rdi] ; output_data +.rowloop: + push rax ; colctr + push rcx + push rdi + push rsi + + mov rcx, JSAMPROW [rsi-1*SIZEOF_JSAMPROW] ; inptr1(above) + mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; inptr0 + mov rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; inptr1(below) + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0 + mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1 + + test rax, SIZEOF_XMMWORD-1 + jz short .skip + push rdx + mov dl, JSAMPLE [rcx+(rax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [rcx+rax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [rbx+(rax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [rbx+rax*SIZEOF_JSAMPLE], dl + mov dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE] + mov JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl ; insert a dummy sample + pop rdx +.skip: + ; -- process the first column block + + movdqa xmm0, XMMWORD [rbx+0*SIZEOF_XMMWORD] ; xmm0=row[ 0][0] + movdqa xmm1, XMMWORD [rcx+0*SIZEOF_XMMWORD] ; xmm1=row[-1][0] + movdqa xmm2, XMMWORD [rsi+0*SIZEOF_XMMWORD] ; xmm2=row[+1][0] + + pxor xmm3, xmm3 ; xmm3=(all 0's) + movdqa xmm4, xmm0 + punpcklbw xmm0, xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) + punpckhbw xmm4, xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) + movdqa xmm5, xmm1 + punpcklbw xmm1, xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm5, xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) + movdqa xmm6, xmm2 + punpcklbw xmm2, xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm6, xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) + + pmullw xmm0, [rel PW_THREE] + pmullw xmm4, [rel PW_THREE] + + pcmpeqb xmm7, xmm7 + psrldq xmm7, (SIZEOF_XMMWORD-2) + + paddw xmm1, xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) + paddw xmm5, xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) + paddw xmm2, xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) + paddw xmm6, xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) + + movdqa XMMWORD [rdx+0*SIZEOF_XMMWORD], xmm1 ; temporarily save + movdqa XMMWORD [rdx+1*SIZEOF_XMMWORD], xmm5 ; the intermediate data + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm6 + + pand xmm1, xmm7 ; xmm1=( 0 -- -- -- -- -- -- --) + pand xmm2, xmm7 ; xmm2=( 0 -- -- -- -- -- -- --) + + movdqa XMMWORD [wk(0)], xmm1 + movdqa XMMWORD [wk(1)], xmm2 + + add rax, byte SIZEOF_XMMWORD-1 + and rax, byte -SIZEOF_XMMWORD + cmp rax, byte SIZEOF_XMMWORD + ja short .columnloop + +.columnloop_last: + ; -- process the last column block + + pcmpeqb xmm1, xmm1 + pslldq xmm1, (SIZEOF_XMMWORD-2) + movdqa xmm2, xmm1 + + pand xmm1, XMMWORD [rdx+1*SIZEOF_XMMWORD] + pand xmm2, XMMWORD [rdi+1*SIZEOF_XMMWORD] + + movdqa XMMWORD [wk(2)], xmm1 ; xmm1=(-- -- -- -- -- -- -- 15) + movdqa XMMWORD [wk(3)], xmm2 ; xmm2=(-- -- -- -- -- -- -- 15) + + jmp near .upsample + +.columnloop: + ; -- process the next column block + + movdqa xmm0, XMMWORD [rbx+1*SIZEOF_XMMWORD] ; xmm0=row[ 0][1] + movdqa xmm1, XMMWORD [rcx+1*SIZEOF_XMMWORD] ; xmm1=row[-1][1] + movdqa xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD] ; xmm2=row[+1][1] + + pxor xmm3, xmm3 ; xmm3=(all 0's) + movdqa xmm4, xmm0 + punpcklbw xmm0, xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7) + punpckhbw xmm4, xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15) + movdqa xmm5, xmm1 + punpcklbw xmm1, xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm5, xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15) + movdqa xmm6, xmm2 + punpcklbw xmm2, xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7) + punpckhbw xmm6, xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15) + + pmullw xmm0, [rel PW_THREE] + pmullw xmm4, [rel PW_THREE] + + paddw xmm1, xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7) + paddw xmm5, xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15) + paddw xmm2, xmm0 ; xmm2=Int1L=( 0 1 2 3 4 5 6 7) + paddw xmm6, xmm4 ; xmm6=Int1H=( 8 9 10 11 12 13 14 15) + + movdqa XMMWORD [rdx+2*SIZEOF_XMMWORD], xmm1 ; temporarily save + movdqa XMMWORD [rdx+3*SIZEOF_XMMWORD], xmm5 ; the intermediate data + movdqa XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm6 + + pslldq xmm1, (SIZEOF_XMMWORD-2) ; xmm1=(-- -- -- -- -- -- -- 0) + pslldq xmm2, (SIZEOF_XMMWORD-2) ; xmm2=(-- -- -- -- -- -- -- 0) + + movdqa XMMWORD [wk(2)], xmm1 + movdqa XMMWORD [wk(3)], xmm2 + +.upsample: + ; -- process the upper row + + movdqa xmm7, XMMWORD [rdx+0*SIZEOF_XMMWORD] + movdqa xmm3, XMMWORD [rdx+1*SIZEOF_XMMWORD] + + movdqa xmm0, xmm7 ; xmm7=Int0L=( 0 1 2 3 4 5 6 7) + movdqa xmm4, xmm3 ; xmm3=Int0H=( 8 9 10 11 12 13 14 15) + psrldq xmm0, 2 ; xmm0=( 1 2 3 4 5 6 7 --) + pslldq xmm4, (SIZEOF_XMMWORD-2) ; xmm4=(-- -- -- -- -- -- -- 8) + movdqa xmm5, xmm7 + movdqa xmm6, xmm3 + psrldq xmm5, (SIZEOF_XMMWORD-2) ; xmm5=( 7 -- -- -- -- -- -- --) + pslldq xmm6, 2 ; xmm6=(-- 8 9 10 11 12 13 14) + + por xmm0, xmm4 ; xmm0=( 1 2 3 4 5 6 7 8) + por xmm5, xmm6 ; xmm5=( 7 8 9 10 11 12 13 14) + + movdqa xmm1, xmm7 + movdqa xmm2, xmm3 + pslldq xmm1, 2 ; xmm1=(-- 0 1 2 3 4 5 6) + psrldq xmm2, 2 ; xmm2=( 9 10 11 12 13 14 15 --) + movdqa xmm4, xmm3 + psrldq xmm4, (SIZEOF_XMMWORD-2) ; xmm4=(15 -- -- -- -- -- -- --) + + por xmm1, XMMWORD [wk(0)] ; xmm1=(-1 0 1 2 3 4 5 6) + por xmm2, XMMWORD [wk(2)] ; xmm2=( 9 10 11 12 13 14 15 16) + + movdqa XMMWORD [wk(0)], xmm4 + + pmullw xmm7, [rel PW_THREE] + pmullw xmm3, [rel PW_THREE] + paddw xmm1, [rel PW_EIGHT] + paddw xmm5, [rel PW_EIGHT] + paddw xmm0, [rel PW_SEVEN] + paddw xmm2, [rel PW_SEVEN] + + paddw xmm1, xmm7 + paddw xmm5, xmm3 + psrlw xmm1, 4 ; xmm1=Out0LE=( 0 2 4 6 8 10 12 14) + psrlw xmm5, 4 ; xmm5=Out0HE=(16 18 20 22 24 26 28 30) + paddw xmm0, xmm7 + paddw xmm2, xmm3 + psrlw xmm0, 4 ; xmm0=Out0LO=( 1 3 5 7 9 11 13 15) + psrlw xmm2, 4 ; xmm2=Out0HO=(17 19 21 23 25 27 29 31) + + psllw xmm0, BYTE_BIT + psllw xmm2, BYTE_BIT + por xmm1, xmm0 ; xmm1=Out0L=( 0 1 2 ... 13 14 15) + por xmm5, xmm2 ; xmm5=Out0H=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [rdx+0*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [rdx+1*SIZEOF_XMMWORD], xmm5 + + ; -- process the lower row + + movdqa xmm6, XMMWORD [rdi+0*SIZEOF_XMMWORD] + movdqa xmm4, XMMWORD [rdi+1*SIZEOF_XMMWORD] + + movdqa xmm7, xmm6 ; xmm6=Int1L=( 0 1 2 3 4 5 6 7) + movdqa xmm3, xmm4 ; xmm4=Int1H=( 8 9 10 11 12 13 14 15) + psrldq xmm7, 2 ; xmm7=( 1 2 3 4 5 6 7 --) + pslldq xmm3, (SIZEOF_XMMWORD-2) ; xmm3=(-- -- -- -- -- -- -- 8) + movdqa xmm0, xmm6 + movdqa xmm2, xmm4 + psrldq xmm0, (SIZEOF_XMMWORD-2) ; xmm0=( 7 -- -- -- -- -- -- --) + pslldq xmm2, 2 ; xmm2=(-- 8 9 10 11 12 13 14) + + por xmm7, xmm3 ; xmm7=( 1 2 3 4 5 6 7 8) + por xmm0, xmm2 ; xmm0=( 7 8 9 10 11 12 13 14) + + movdqa xmm1, xmm6 + movdqa xmm5, xmm4 + pslldq xmm1, 2 ; xmm1=(-- 0 1 2 3 4 5 6) + psrldq xmm5, 2 ; xmm5=( 9 10 11 12 13 14 15 --) + movdqa xmm3, xmm4 + psrldq xmm3, (SIZEOF_XMMWORD-2) ; xmm3=(15 -- -- -- -- -- -- --) + + por xmm1, XMMWORD [wk(1)] ; xmm1=(-1 0 1 2 3 4 5 6) + por xmm5, XMMWORD [wk(3)] ; xmm5=( 9 10 11 12 13 14 15 16) + + movdqa XMMWORD [wk(1)], xmm3 + + pmullw xmm6, [rel PW_THREE] + pmullw xmm4, [rel PW_THREE] + paddw xmm1, [rel PW_EIGHT] + paddw xmm0, [rel PW_EIGHT] + paddw xmm7, [rel PW_SEVEN] + paddw xmm5, [rel PW_SEVEN] + + paddw xmm1, xmm6 + paddw xmm0, xmm4 + psrlw xmm1, 4 ; xmm1=Out1LE=( 0 2 4 6 8 10 12 14) + psrlw xmm0, 4 ; xmm0=Out1HE=(16 18 20 22 24 26 28 30) + paddw xmm7, xmm6 + paddw xmm5, xmm4 + psrlw xmm7, 4 ; xmm7=Out1LO=( 1 3 5 7 9 11 13 15) + psrlw xmm5, 4 ; xmm5=Out1HO=(17 19 21 23 25 27 29 31) + + psllw xmm7, BYTE_BIT + psllw xmm5, BYTE_BIT + por xmm1, xmm7 ; xmm1=Out1L=( 0 1 2 ... 13 14 15) + por xmm0, xmm5 ; xmm0=Out1H=(16 17 18 ... 29 30 31) + + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm0 + + sub rax, byte SIZEOF_XMMWORD + add rcx, byte 1*SIZEOF_XMMWORD ; inptr1(above) + add rbx, byte 1*SIZEOF_XMMWORD ; inptr0 + add rsi, byte 1*SIZEOF_XMMWORD ; inptr1(below) + add rdx, byte 2*SIZEOF_XMMWORD ; outptr0 + add rdi, byte 2*SIZEOF_XMMWORD ; outptr1 + cmp rax, byte SIZEOF_XMMWORD + ja near .columnloop + test rax, rax + jnz near .columnloop_last + + pop rsi + pop rdi + pop rcx + pop rax + + add rsi, byte 1*SIZEOF_JSAMPROW ; input_data + add rdi, byte 2*SIZEOF_JSAMPROW ; output_data + sub rcx, byte 2 ; rowctr + jg near .rowloop + +.return: + pop rbx + uncollect_args 4 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 1:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v1_upsample_sse2(int max_v_samp_factor, JDIMENSION output_width, +; JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +; + +; r10 = int max_v_samp_factor +; r11d = JDIMENSION output_width +; r12 = JSAMPARRAY input_data +; r13 = JSAMPARRAY *output_data_ptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2) + +EXTN(jsimd_h2v1_upsample_sse2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 4 + + mov edx, r11d + add rdx, byte (2*SIZEOF_XMMWORD)-1 + and rdx, byte -(2*SIZEOF_XMMWORD) + jz near .return + + mov rcx, r10 ; rowctr + test rcx, rcx + jz short .return + + mov rsi, r12 ; input_data + mov rdi, r13 + mov rdi, JSAMPARRAY [rdi] ; output_data +.rowloop: + push rdi + push rsi + + mov rsi, JSAMPROW [rsi] ; inptr + mov rdi, JSAMPROW [rdi] ; outptr + mov rax, rdx ; colctr +.columnloop: + + movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] + + movdqa xmm1, xmm0 + punpcklbw xmm0, xmm0 + punpckhbw xmm1, xmm1 + + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1 + + sub rax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + movdqa xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD] + + movdqa xmm3, xmm2 + punpcklbw xmm2, xmm2 + punpckhbw xmm3, xmm3 + + movdqa XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm3 + + sub rax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + add rsi, byte 2*SIZEOF_XMMWORD ; inptr + add rdi, byte 4*SIZEOF_XMMWORD ; outptr + jmp short .columnloop + +.nextrow: + pop rsi + pop rdi + + add rsi, byte SIZEOF_JSAMPROW ; input_data + add rdi, byte SIZEOF_JSAMPROW ; output_data + dec rcx ; rowctr + jg short .rowloop + +.return: + uncollect_args 4 + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Fast processing for the common case of 2:1 horizontal and 2:1 vertical. +; It's still a box filter. +; +; GLOBAL(void) +; jsimd_h2v2_upsample_sse2(int max_v_samp_factor, JDIMENSION output_width, +; JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +; + +; r10 = int max_v_samp_factor +; r11d = JDIMENSION output_width +; r12 = JSAMPARRAY input_data +; r13 = JSAMPARRAY *output_data_ptr + + align 32 + GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2) + +EXTN(jsimd_h2v2_upsample_sse2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 4 + push rbx + + mov edx, r11d + add rdx, byte (2*SIZEOF_XMMWORD)-1 + and rdx, byte -(2*SIZEOF_XMMWORD) + jz near .return + + mov rcx, r10 ; rowctr + test rcx, rcx + jz near .return + + mov rsi, r12 ; input_data + mov rdi, r13 + mov rdi, JSAMPARRAY [rdi] ; output_data +.rowloop: + push rdi + push rsi + + mov rsi, JSAMPROW [rsi] ; inptr + mov rbx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] ; outptr0 + mov rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] ; outptr1 + mov rax, rdx ; colctr +.columnloop: + + movdqa xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD] + + movdqa xmm1, xmm0 + punpcklbw xmm0, xmm0 + punpckhbw xmm1, xmm1 + + movdqa XMMWORD [rbx+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [rbx+1*SIZEOF_XMMWORD], xmm1 + movdqa XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0 + movdqa XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1 + + sub rax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + movdqa xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD] + + movdqa xmm3, xmm2 + punpcklbw xmm2, xmm2 + punpckhbw xmm3, xmm3 + + movdqa XMMWORD [rbx+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rbx+3*SIZEOF_XMMWORD], xmm3 + movdqa XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2 + movdqa XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm3 + + sub rax, byte 2*SIZEOF_XMMWORD + jz short .nextrow + + add rsi, byte 2*SIZEOF_XMMWORD ; inptr + add rbx, byte 4*SIZEOF_XMMWORD ; outptr0 + add rdi, byte 4*SIZEOF_XMMWORD ; outptr1 + jmp short .columnloop + +.nextrow: + pop rsi + pop rdi + + add rsi, byte 1*SIZEOF_JSAMPROW ; input_data + add rdi, byte 2*SIZEOF_JSAMPROW ; output_data + sub rcx, byte 2 ; rowctr + jg near .rowloop + +.return: + pop rbx + uncollect_args 4 + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jfdctflt-sse.asm b/simd/x86_64/jfdctflt-sse.asm new file mode 100644 index 0000000..26f9fb6 --- /dev/null +++ b/simd/x86_64/jfdctflt-sse.asm @@ -0,0 +1,357 @@ +; +; jfdctflt.asm - floating-point FDCT (64-bit SSE) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the forward DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jfdctflt.c; see the jfdctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1, %2, 0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1, %2, 0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_fdct_float_sse) + +EXTN(jconst_fdct_float_sse): + +PD_0_382 times 4 dd 0.382683432365089771728460 +PD_0_707 times 4 dd 0.707106781186547524400844 +PD_0_541 times 4 dd 0.541196100146196984399723 +PD_1_306 times 4 dd 1.306562964876376527856643 + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_float_sse(FAST_FLOAT *data) +; + +; r10 = FAST_FLOAT *data + +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 32 + GLOBAL_FUNCTION(jsimd_fdct_float_sse) + +EXTN(jsimd_fdct_float_sse): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args 1 + + ; ---- Pass 1: process rows. + + mov rdx, r10 ; (FAST_FLOAT *) + mov rcx, DCTSIZE/4 +.rowloop: + + movaps xmm0, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(2,1,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,1,rdx,SIZEOF_FAST_FLOAT)] + + ; xmm0=(20 21 22 23), xmm2=(24 25 26 27) + ; xmm1=(30 31 32 33), xmm3=(34 35 36 37) + + movaps xmm4, xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0, xmm1 ; xmm0=(20 30 21 31) + unpckhps xmm4, xmm1 ; xmm4=(22 32 23 33) + movaps xmm5, xmm2 ; transpose coefficients(phase 1) + unpcklps xmm2, xmm3 ; xmm2=(24 34 25 35) + unpckhps xmm5, xmm3 ; xmm5=(26 36 27 37) + + movaps xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm7, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)] + + ; xmm6=(00 01 02 03), xmm1=(04 05 06 07) + ; xmm7=(10 11 12 13), xmm3=(14 15 16 17) + + movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 32 23 33) + movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(24 34 25 35) + + movaps xmm4, xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6, xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm4, xmm7 ; xmm4=(02 12 03 13) + movaps xmm2, xmm1 ; transpose coefficients(phase 1) + unpcklps xmm1, xmm3 ; xmm1=(04 14 05 15) + unpckhps xmm2, xmm3 ; xmm2=(06 16 07 17) + + movaps xmm7, xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6, xmm0 ; xmm6=(00 10 20 30)=data0 + unpckhps2 xmm7, xmm0 ; xmm7=(01 11 21 31)=data1 + movaps xmm3, xmm2 ; transpose coefficients(phase 2) + unpcklps2 xmm2, xmm5 ; xmm2=(06 16 26 36)=data6 + unpckhps2 xmm3, xmm5 ; xmm3=(07 17 27 37)=data7 + + movaps xmm0, xmm7 + movaps xmm5, xmm6 + subps xmm7, xmm2 ; xmm7=data1-data6=tmp6 + subps xmm6, xmm3 ; xmm6=data0-data7=tmp7 + addps xmm0, xmm2 ; xmm0=data1+data6=tmp1 + addps xmm5, xmm3 ; xmm5=data0+data7=tmp0 + + movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 32 23 33) + movaps xmm3, XMMWORD [wk(1)] ; xmm3=(24 34 25 35) + movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 + movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movaps xmm7, xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4, xmm2 ; xmm4=(02 12 22 32)=data2 + unpckhps2 xmm7, xmm2 ; xmm7=(03 13 23 33)=data3 + movaps xmm6, xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1, xmm3 ; xmm1=(04 14 24 34)=data4 + unpckhps2 xmm6, xmm3 ; xmm6=(05 15 25 35)=data5 + + movaps xmm2, xmm7 + movaps xmm3, xmm4 + addps xmm7, xmm1 ; xmm7=data3+data4=tmp3 + addps xmm4, xmm6 ; xmm4=data2+data5=tmp2 + subps xmm2, xmm1 ; xmm2=data3-data4=tmp4 + subps xmm3, xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movaps xmm1, xmm5 + movaps xmm6, xmm0 + subps xmm5, xmm7 ; xmm5=tmp13 + subps xmm0, xmm4 ; xmm0=tmp12 + addps xmm1, xmm7 ; xmm1=tmp10 + addps xmm6, xmm4 ; xmm6=tmp11 + + addps xmm0, xmm5 + mulps xmm0, [rel PD_0_707] ; xmm0=z1 + + movaps xmm7, xmm1 + movaps xmm4, xmm5 + subps xmm1, xmm6 ; xmm1=data4 + subps xmm5, xmm0 ; xmm5=data6 + addps xmm7, xmm6 ; xmm7=data0 + addps xmm4, xmm0 ; xmm4=data2 + + movaps XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,1,rdx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)], xmm4 + + ; -- Odd part + + movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 + movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 + + addps xmm2, xmm3 ; xmm2=tmp10 + addps xmm3, xmm6 ; xmm3=tmp11 + addps xmm6, xmm0 ; xmm6=tmp12, xmm0=tmp7 + + mulps xmm3, [rel PD_0_707] ; xmm3=z3 + + movaps xmm1, xmm2 ; xmm1=tmp10 + subps xmm2, xmm6 + mulps xmm2, [rel PD_0_382] ; xmm2=z5 + mulps xmm1, [rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) + mulps xmm6, [rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) + addps xmm1, xmm2 ; xmm1=z2 + addps xmm6, xmm2 ; xmm6=z4 + + movaps xmm5, xmm0 + subps xmm0, xmm3 ; xmm0=z13 + addps xmm5, xmm3 ; xmm5=z11 + + movaps xmm7, xmm0 + movaps xmm4, xmm5 + subps xmm0, xmm1 ; xmm0=data3 + subps xmm5, xmm6 ; xmm5=data7 + addps xmm7, xmm1 ; xmm7=data5 + addps xmm4, xmm6 ; xmm4=data1 + + movaps XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(3,1,rdx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)], xmm4 + + add rdx, 4*DCTSIZE*SIZEOF_FAST_FLOAT + dec rcx + jnz near .rowloop + + ; ---- Pass 2: process columns. + + mov rdx, r10 ; (FAST_FLOAT *) + mov rcx, DCTSIZE/4 +.columnloop: + + movaps xmm0, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FAST_FLOAT)] + + ; xmm0=(02 12 22 32), xmm2=(42 52 62 72) + ; xmm1=(03 13 23 33), xmm3=(43 53 63 73) + + movaps xmm4, xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0, xmm1 ; xmm0=(02 03 12 13) + unpckhps xmm4, xmm1 ; xmm4=(22 23 32 33) + movaps xmm5, xmm2 ; transpose coefficients(phase 1) + unpcklps xmm2, xmm3 ; xmm2=(42 43 52 53) + unpckhps xmm5, xmm3 ; xmm5=(62 63 72 73) + + movaps xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm7, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FAST_FLOAT)] + + ; xmm6=(00 10 20 30), xmm1=(40 50 60 70) + ; xmm7=(01 11 21 31), xmm3=(41 51 61 71) + + movaps XMMWORD [wk(0)], xmm4 ; wk(0)=(22 23 32 33) + movaps XMMWORD [wk(1)], xmm2 ; wk(1)=(42 43 52 53) + + movaps xmm4, xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6, xmm7 ; xmm6=(00 01 10 11) + unpckhps xmm4, xmm7 ; xmm4=(20 21 30 31) + movaps xmm2, xmm1 ; transpose coefficients(phase 1) + unpcklps xmm1, xmm3 ; xmm1=(40 41 50 51) + unpckhps xmm2, xmm3 ; xmm2=(60 61 70 71) + + movaps xmm7, xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6, xmm0 ; xmm6=(00 01 02 03)=data0 + unpckhps2 xmm7, xmm0 ; xmm7=(10 11 12 13)=data1 + movaps xmm3, xmm2 ; transpose coefficients(phase 2) + unpcklps2 xmm2, xmm5 ; xmm2=(60 61 62 63)=data6 + unpckhps2 xmm3, xmm5 ; xmm3=(70 71 72 73)=data7 + + movaps xmm0, xmm7 + movaps xmm5, xmm6 + subps xmm7, xmm2 ; xmm7=data1-data6=tmp6 + subps xmm6, xmm3 ; xmm6=data0-data7=tmp7 + addps xmm0, xmm2 ; xmm0=data1+data6=tmp1 + addps xmm5, xmm3 ; xmm5=data0+data7=tmp0 + + movaps xmm2, XMMWORD [wk(0)] ; xmm2=(22 23 32 33) + movaps xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53) + movaps XMMWORD [wk(0)], xmm7 ; wk(0)=tmp6 + movaps XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movaps xmm7, xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4, xmm2 ; xmm4=(20 21 22 23)=data2 + unpckhps2 xmm7, xmm2 ; xmm7=(30 31 32 33)=data3 + movaps xmm6, xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1, xmm3 ; xmm1=(40 41 42 43)=data4 + unpckhps2 xmm6, xmm3 ; xmm6=(50 51 52 53)=data5 + + movaps xmm2, xmm7 + movaps xmm3, xmm4 + addps xmm7, xmm1 ; xmm7=data3+data4=tmp3 + addps xmm4, xmm6 ; xmm4=data2+data5=tmp2 + subps xmm2, xmm1 ; xmm2=data3-data4=tmp4 + subps xmm3, xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movaps xmm1, xmm5 + movaps xmm6, xmm0 + subps xmm5, xmm7 ; xmm5=tmp13 + subps xmm0, xmm4 ; xmm0=tmp12 + addps xmm1, xmm7 ; xmm1=tmp10 + addps xmm6, xmm4 ; xmm6=tmp11 + + addps xmm0, xmm5 + mulps xmm0, [rel PD_0_707] ; xmm0=z1 + + movaps xmm7, xmm1 + movaps xmm4, xmm5 + subps xmm1, xmm6 ; xmm1=data4 + subps xmm5, xmm0 ; xmm5=data6 + addps xmm7, xmm6 ; xmm7=data0 + addps xmm4, xmm0 ; xmm4=data2 + + movaps XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)], xmm4 + + ; -- Odd part + + movaps xmm6, XMMWORD [wk(0)] ; xmm6=tmp6 + movaps xmm0, XMMWORD [wk(1)] ; xmm0=tmp7 + + addps xmm2, xmm3 ; xmm2=tmp10 + addps xmm3, xmm6 ; xmm3=tmp11 + addps xmm6, xmm0 ; xmm6=tmp12, xmm0=tmp7 + + mulps xmm3, [rel PD_0_707] ; xmm3=z3 + + movaps xmm1, xmm2 ; xmm1=tmp10 + subps xmm2, xmm6 + mulps xmm2, [rel PD_0_382] ; xmm2=z5 + mulps xmm1, [rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196) + mulps xmm6, [rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562) + addps xmm1, xmm2 ; xmm1=z2 + addps xmm6, xmm2 ; xmm6=z4 + + movaps xmm5, xmm0 + subps xmm0, xmm3 ; xmm0=z13 + addps xmm5, xmm3 ; xmm5=z11 + + movaps xmm7, xmm0 + movaps xmm4, xmm5 + subps xmm0, xmm1 ; xmm0=data3 + subps xmm5, xmm6 ; xmm5=data7 + addps xmm7, xmm1 ; xmm7=data5 + addps xmm4, xmm6 ; xmm4=data1 + + movaps XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FAST_FLOAT)], xmm7 + movaps XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)], xmm4 + + add rdx, byte 4*SIZEOF_FAST_FLOAT + dec rcx + jnz near .columnloop + + uncollect_args 1 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jfdctfst-sse2.asm b/simd/x86_64/jfdctfst-sse2.asm new file mode 100644 index 0000000..aaf8b9e --- /dev/null +++ b/simd/x86_64/jfdctfst-sse2.asm @@ -0,0 +1,391 @@ +; +; jfdctfst.asm - fast integer FDCT (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the forward DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. + +%if CONST_BITS == 8 +F_0_382 equ 98 ; FIX(0.382683433) +F_0_541 equ 139 ; FIX(0.541196100) +F_0_707 equ 181 ; FIX(0.707106781) +F_1_306 equ 334 ; FIX(1.306562965) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_0_382 equ DESCALE( 410903207, 30 - CONST_BITS) ; FIX(0.382683433) +F_0_541 equ DESCALE( 581104887, 30 - CONST_BITS) ; FIX(0.541196100) +F_0_707 equ DESCALE( 759250124, 30 - CONST_BITS) ; FIX(0.707106781) +F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS) ; FIX(1.306562965) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 32 + GLOBAL_DATA(jconst_fdct_ifast_sse2) + +EXTN(jconst_fdct_ifast_sse2): + +PW_F0707 times 8 dw F_0_707 << CONST_SHIFT +PW_F0382 times 8 dw F_0_382 << CONST_SHIFT +PW_F0541 times 8 dw F_0_541 << CONST_SHIFT +PW_F1306 times 8 dw F_1_306 << CONST_SHIFT + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_ifast_sse2(DCTELEM *data) +; + +; r10 = DCTELEM *data + +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 32 + GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2) + +EXTN(jsimd_fdct_ifast_sse2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args 1 + + ; ---- Pass 1: process rows. + + mov rdx, r10 ; (DCTELEM *) + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm2, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)] + + ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) + ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) + + movdqa xmm4, xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0, xmm1 ; xmm0=(00 10 01 11 02 12 03 13) + punpckhwd xmm4, xmm1 ; xmm4=(04 14 05 15 06 16 07 17) + movdqa xmm5, xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2, xmm3 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm5, xmm3 ; xmm5=(24 34 25 35 26 36 27 37) + + movdqa xmm6, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)] + + ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) + ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) + + movdqa xmm2, xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6, xmm7 ; xmm6=(40 50 41 51 42 52 43 53) + punpckhwd xmm2, xmm7 ; xmm2=(44 54 45 55 46 56 47 57) + movdqa xmm5, xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1, xmm3 ; xmm1=(60 70 61 71 62 72 63 73) + punpckhwd xmm5, xmm3 ; xmm5=(64 74 65 75 66 76 67 77) + + movdqa xmm7, xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6, xmm1 ; xmm6=(40 50 60 70 41 51 61 71) + punpckhdq xmm7, xmm1 ; xmm7=(42 52 62 72 43 53 63 73) + movdqa xmm3, xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2, xmm5 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm3, xmm5 ; xmm3=(46 56 66 76 47 57 67 77) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(42 52 62 72 43 53 63 73) + movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=(44 54 64 74 45 55 65 75) + + movdqa xmm7, xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0, xmm1 ; xmm0=(00 10 20 30 01 11 21 31) + punpckhdq xmm7, xmm1 ; xmm7=(02 12 22 32 03 13 23 33) + movdqa xmm2, xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4, xmm5 ; xmm4=(04 14 24 34 05 15 25 35) + punpckhdq xmm2, xmm5 ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa xmm1, xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0, xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 + punpckhqdq xmm1, xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 + movdqa xmm5, xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2, xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 + punpckhqdq xmm5, xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 + + movdqa xmm6, xmm1 + movdqa xmm3, xmm0 + psubw xmm1, xmm2 ; xmm1=data1-data6=tmp6 + psubw xmm0, xmm5 ; xmm0=data0-data7=tmp7 + paddw xmm6, xmm2 ; xmm6=data1+data6=tmp1 + paddw xmm3, xmm5 ; xmm3=data0+data7=tmp0 + + movdqa xmm2, XMMWORD [wk(0)] ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(44 54 64 74 45 55 65 75) + movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 + + movdqa xmm1, xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7, xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 + punpckhqdq xmm1, xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 + movdqa xmm0, xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4, xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 + punpckhqdq xmm0, xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 + + movdqa xmm2, xmm1 + movdqa xmm5, xmm7 + paddw xmm1, xmm4 ; xmm1=data3+data4=tmp3 + paddw xmm7, xmm0 ; xmm7=data2+data5=tmp2 + psubw xmm2, xmm4 ; xmm2=data3-data4=tmp4 + psubw xmm5, xmm0 ; xmm5=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4, xmm3 + movdqa xmm0, xmm6 + psubw xmm3, xmm1 ; xmm3=tmp13 + psubw xmm6, xmm7 ; xmm6=tmp12 + paddw xmm4, xmm1 ; xmm4=tmp10 + paddw xmm0, xmm7 ; xmm0=tmp11 + + paddw xmm6, xmm3 + psllw xmm6, PRE_MULTIPLY_SCALE_BITS + pmulhw xmm6, [rel PW_F0707] ; xmm6=z1 + + movdqa xmm1, xmm4 + movdqa xmm7, xmm3 + psubw xmm4, xmm0 ; xmm4=data4 + psubw xmm3, xmm6 ; xmm3=data6 + paddw xmm1, xmm0 ; xmm1=data0 + paddw xmm7, xmm6 ; xmm7=data2 + + movdqa xmm0, XMMWORD [wk(0)] ; xmm0=tmp6 + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp7 + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=data4 + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=data6 + + ; -- Odd part + + paddw xmm2, xmm5 ; xmm2=tmp10 + paddw xmm5, xmm0 ; xmm5=tmp11 + paddw xmm0, xmm6 ; xmm0=tmp12, xmm6=tmp7 + + psllw xmm2, PRE_MULTIPLY_SCALE_BITS + psllw xmm0, PRE_MULTIPLY_SCALE_BITS + + psllw xmm5, PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5, [rel PW_F0707] ; xmm5=z3 + + movdqa xmm4, xmm2 ; xmm4=tmp10 + psubw xmm2, xmm0 + pmulhw xmm2, [rel PW_F0382] ; xmm2=z5 + pmulhw xmm4, [rel PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) + pmulhw xmm0, [rel PW_F1306] ; xmm0=MULTIPLY(tmp12,FIX_1_306562) + paddw xmm4, xmm2 ; xmm4=z2 + paddw xmm0, xmm2 ; xmm0=z4 + + movdqa xmm3, xmm6 + psubw xmm6, xmm5 ; xmm6=z13 + paddw xmm3, xmm5 ; xmm3=z11 + + movdqa xmm2, xmm6 + movdqa xmm5, xmm3 + psubw xmm6, xmm4 ; xmm6=data3 + psubw xmm3, xmm0 ; xmm3=data7 + paddw xmm2, xmm4 ; xmm2=data5 + paddw xmm5, xmm0 ; xmm5=data1 + + ; ---- Pass 2: process columns. + + ; xmm1=(00 10 20 30 40 50 60 70), xmm7=(02 12 22 32 42 52 62 72) + ; xmm5=(01 11 21 31 41 51 61 71), xmm6=(03 13 23 33 43 53 63 73) + + movdqa xmm4, xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1, xmm5 ; xmm1=(00 01 10 11 20 21 30 31) + punpckhwd xmm4, xmm5 ; xmm4=(40 41 50 51 60 61 70 71) + movdqa xmm0, xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7, xmm6 ; xmm7=(02 03 12 13 22 23 32 33) + punpckhwd xmm0, xmm6 ; xmm0=(42 43 52 53 62 63 72 73) + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=col4 + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=col6 + + ; xmm5=(04 14 24 34 44 54 64 74), xmm6=(06 16 26 36 46 56 66 76) + ; xmm2=(05 15 25 35 45 55 65 75), xmm3=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=(02 03 12 13 22 23 32 33) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(42 43 52 53 62 63 72 73) + + movdqa xmm7, xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5, xmm2 ; xmm5=(04 05 14 15 24 25 34 35) + punpckhwd xmm7, xmm2 ; xmm7=(44 45 54 55 64 65 74 75) + movdqa xmm0, xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6, xmm3 ; xmm6=(06 07 16 17 26 27 36 37) + punpckhwd xmm0, xmm3 ; xmm0=(46 47 56 57 66 67 76 77) + + movdqa xmm2, xmm5 ; transpose coefficients(phase 2) + punpckldq xmm5, xmm6 ; xmm5=(04 05 06 07 14 15 16 17) + punpckhdq xmm2, xmm6 ; xmm2=(24 25 26 27 34 35 36 37) + movdqa xmm3, xmm7 ; transpose coefficients(phase 2) + punpckldq xmm7, xmm0 ; xmm7=(44 45 46 47 54 55 56 57) + punpckhdq xmm3, xmm0 ; xmm3=(64 65 66 67 74 75 76 77) + + movdqa xmm6, XMMWORD [wk(0)] ; xmm6=(02 03 12 13 22 23 32 33) + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(42 43 52 53 62 63 72 73) + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(24 25 26 27 34 35 36 37) + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=(44 45 46 47 54 55 56 57) + + movdqa xmm2, xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1, xmm6 ; xmm1=(00 01 02 03 10 11 12 13) + punpckhdq xmm2, xmm6 ; xmm2=(20 21 22 23 30 31 32 33) + movdqa xmm7, xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4, xmm0 ; xmm4=(40 41 42 43 50 51 52 53) + punpckhdq xmm7, xmm0 ; xmm7=(60 61 62 63 70 71 72 73) + + movdqa xmm6, xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1, xmm5 ; xmm1=(00 01 02 03 04 05 06 07)=data0 + punpckhqdq xmm6, xmm5 ; xmm6=(10 11 12 13 14 15 16 17)=data1 + movdqa xmm0, xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7, xmm3 ; xmm7=(60 61 62 63 64 65 66 67)=data6 + punpckhqdq xmm0, xmm3 ; xmm0=(70 71 72 73 74 75 76 77)=data7 + + movdqa xmm5, xmm6 + movdqa xmm3, xmm1 + psubw xmm6, xmm7 ; xmm6=data1-data6=tmp6 + psubw xmm1, xmm0 ; xmm1=data0-data7=tmp7 + paddw xmm5, xmm7 ; xmm5=data1+data6=tmp1 + paddw xmm3, xmm0 ; xmm3=data0+data7=tmp0 + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(24 25 26 27 34 35 36 37) + movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(44 45 46 47 54 55 56 57) + movdqa XMMWORD [wk(0)], xmm6 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=tmp7 + + movdqa xmm6, xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2, xmm7 ; xmm2=(20 21 22 23 24 25 26 27)=data2 + punpckhqdq xmm6, xmm7 ; xmm6=(30 31 32 33 34 35 36 37)=data3 + movdqa xmm1, xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4, xmm0 ; xmm4=(40 41 42 43 44 45 46 47)=data4 + punpckhqdq xmm1, xmm0 ; xmm1=(50 51 52 53 54 55 56 57)=data5 + + movdqa xmm7, xmm6 + movdqa xmm0, xmm2 + paddw xmm6, xmm4 ; xmm6=data3+data4=tmp3 + paddw xmm2, xmm1 ; xmm2=data2+data5=tmp2 + psubw xmm7, xmm4 ; xmm7=data3-data4=tmp4 + psubw xmm0, xmm1 ; xmm0=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4, xmm3 + movdqa xmm1, xmm5 + psubw xmm3, xmm6 ; xmm3=tmp13 + psubw xmm5, xmm2 ; xmm5=tmp12 + paddw xmm4, xmm6 ; xmm4=tmp10 + paddw xmm1, xmm2 ; xmm1=tmp11 + + paddw xmm5, xmm3 + psllw xmm5, PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5, [rel PW_F0707] ; xmm5=z1 + + movdqa xmm6, xmm4 + movdqa xmm2, xmm3 + psubw xmm4, xmm1 ; xmm4=data4 + psubw xmm3, xmm5 ; xmm3=data6 + paddw xmm6, xmm1 ; xmm6=data0 + paddw xmm2, xmm5 ; xmm2=data2 + + movdqa XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)], xmm3 + movdqa XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)], xmm6 + movdqa XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)], xmm2 + + ; -- Odd part + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=tmp6 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 + + paddw xmm7, xmm0 ; xmm7=tmp10 + paddw xmm0, xmm1 ; xmm0=tmp11 + paddw xmm1, xmm5 ; xmm1=tmp12, xmm5=tmp7 + + psllw xmm7, PRE_MULTIPLY_SCALE_BITS + psllw xmm1, PRE_MULTIPLY_SCALE_BITS + + psllw xmm0, PRE_MULTIPLY_SCALE_BITS + pmulhw xmm0, [rel PW_F0707] ; xmm0=z3 + + movdqa xmm4, xmm7 ; xmm4=tmp10 + psubw xmm7, xmm1 + pmulhw xmm7, [rel PW_F0382] ; xmm7=z5 + pmulhw xmm4, [rel PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196) + pmulhw xmm1, [rel PW_F1306] ; xmm1=MULTIPLY(tmp12,FIX_1_306562) + paddw xmm4, xmm7 ; xmm4=z2 + paddw xmm1, xmm7 ; xmm1=z4 + + movdqa xmm3, xmm5 + psubw xmm5, xmm0 ; xmm5=z13 + paddw xmm3, xmm0 ; xmm3=z11 + + movdqa xmm6, xmm5 + movdqa xmm2, xmm3 + psubw xmm5, xmm4 ; xmm5=data3 + psubw xmm3, xmm1 ; xmm3=data7 + paddw xmm6, xmm4 ; xmm6=data5 + paddw xmm2, xmm1 ; xmm2=data1 + + movdqa XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm5 + movdqa XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)], xmm3 + movdqa XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm6 + movdqa XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm2 + + uncollect_args 1 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jfdctint-avx2.asm b/simd/x86_64/jfdctint-avx2.asm new file mode 100644 index 0000000..448f47d --- /dev/null +++ b/simd/x86_64/jfdctint-avx2.asm @@ -0,0 +1,322 @@ +; +; jfdctint.asm - accurate integer FDCT (64-bit AVX2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, 2018, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; forward DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jfdctint.c; see the jfdctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS - PASS1_BITS) +%define DESCALE_P2 (CONST_BITS + PASS1_BITS) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_0_298 equ DESCALE( 320652955, 30 - CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276, 30 - CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887, 30 - CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413, 30 - CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111, 30 - CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813, 30 - CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267, 30 - CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350, 30 - CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- +; In-place 8x8x16-bit matrix transpose using AVX2 instructions +; %1-%4: Input/output registers +; %5-%8: Temp registers + +%macro dotranspose 8 + ; %1=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47) + ; %2=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57) + ; %3=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67) + ; %4=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77) + + vpunpcklwd %5, %1, %2 + vpunpckhwd %6, %1, %2 + vpunpcklwd %7, %3, %4 + vpunpckhwd %8, %3, %4 + ; transpose coefficients(phase 1) + ; %5=(00 10 01 11 02 12 03 13 40 50 41 51 42 52 43 53) + ; %6=(04 14 05 15 06 16 07 17 44 54 45 55 46 56 47 57) + ; %7=(20 30 21 31 22 32 23 33 60 70 61 71 62 72 63 73) + ; %8=(24 34 25 35 26 36 27 37 64 74 65 75 66 76 67 77) + + vpunpckldq %1, %5, %7 + vpunpckhdq %2, %5, %7 + vpunpckldq %3, %6, %8 + vpunpckhdq %4, %6, %8 + ; transpose coefficients(phase 2) + ; %1=(00 10 20 30 01 11 21 31 40 50 60 70 41 51 61 71) + ; %2=(02 12 22 32 03 13 23 33 42 52 62 72 43 53 63 73) + ; %3=(04 14 24 34 05 15 25 35 44 54 64 74 45 55 65 75) + ; %4=(06 16 26 36 07 17 27 37 46 56 66 76 47 57 67 77) + + vpermq %1, %1, 0x8D + vpermq %2, %2, 0x8D + vpermq %3, %3, 0xD8 + vpermq %4, %4, 0xD8 + ; transpose coefficients(phase 3) + ; %1=(01 11 21 31 41 51 61 71 00 10 20 30 40 50 60 70) + ; %2=(03 13 23 33 43 53 63 73 02 12 22 32 42 52 62 72) + ; %3=(04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75) + ; %4=(06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77) +%endmacro + +; -------------------------------------------------------------------------- +; In-place 8x8x16-bit slow integer forward DCT using AVX2 instructions +; %1-%4: Input/output registers +; %5-%8: Temp registers +; %9: Pass (1 or 2) + +%macro dodct 9 + vpsubw %5, %1, %4 ; %5=data1_0-data6_7=tmp6_7 + vpaddw %6, %1, %4 ; %6=data1_0+data6_7=tmp1_0 + vpaddw %7, %2, %3 ; %7=data3_2+data4_5=tmp3_2 + vpsubw %8, %2, %3 ; %8=data3_2-data4_5=tmp4_5 + + ; -- Even part + + vperm2i128 %6, %6, %6, 0x01 ; %6=tmp0_1 + vpaddw %1, %6, %7 ; %1=tmp0_1+tmp3_2=tmp10_11 + vpsubw %6, %6, %7 ; %6=tmp0_1-tmp3_2=tmp13_12 + + vperm2i128 %7, %1, %1, 0x01 ; %7=tmp11_10 + vpsignw %1, %1, [rel PW_1_NEG1] ; %1=tmp10_neg11 + vpaddw %7, %7, %1 ; %7=(tmp10+tmp11)_(tmp10-tmp11) +%if %9 == 1 + vpsllw %1, %7, PASS1_BITS ; %1=data0_4 +%else + vpaddw %7, %7, [rel PW_DESCALE_P2X] + vpsraw %1, %7, PASS1_BITS ; %1=data0_4 +%endif + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + vperm2i128 %7, %6, %6, 0x01 ; %7=tmp12_13 + vpunpcklwd %2, %6, %7 + vpunpckhwd %6, %6, %7 + vpmaddwd %2, %2, [rel PW_F130_F054_MF130_F054] ; %2=data2_6L + vpmaddwd %6, %6, [rel PW_F130_F054_MF130_F054] ; %6=data2_6H + + vpaddd %2, %2, [rel PD_DESCALE_P %+ %9] + vpaddd %6, %6, [rel PD_DESCALE_P %+ %9] + vpsrad %2, %2, DESCALE_P %+ %9 + vpsrad %6, %6, DESCALE_P %+ %9 + + vpackssdw %3, %2, %6 ; %6=data2_6 + + ; -- Odd part + + vpaddw %7, %8, %5 ; %7=tmp4_5+tmp6_7=z3_4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + vperm2i128 %2, %7, %7, 0x01 ; %2=z4_3 + vpunpcklwd %6, %7, %2 + vpunpckhwd %7, %7, %2 + vpmaddwd %6, %6, [rel PW_MF078_F117_F078_F117] ; %6=z3_4L + vpmaddwd %7, %7, [rel PW_MF078_F117_F078_F117] ; %7=z3_4H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + vperm2i128 %4, %5, %5, 0x01 ; %4=tmp7_6 + vpunpcklwd %2, %8, %4 + vpunpckhwd %4, %8, %4 + vpmaddwd %2, %2, [rel PW_MF060_MF089_MF050_MF256] ; %2=tmp4_5L + vpmaddwd %4, %4, [rel PW_MF060_MF089_MF050_MF256] ; %4=tmp4_5H + + vpaddd %2, %2, %6 ; %2=data7_5L + vpaddd %4, %4, %7 ; %4=data7_5H + + vpaddd %2, %2, [rel PD_DESCALE_P %+ %9] + vpaddd %4, %4, [rel PD_DESCALE_P %+ %9] + vpsrad %2, %2, DESCALE_P %+ %9 + vpsrad %4, %4, DESCALE_P %+ %9 + + vpackssdw %4, %2, %4 ; %4=data7_5 + + vperm2i128 %2, %8, %8, 0x01 ; %2=tmp5_4 + vpunpcklwd %8, %5, %2 + vpunpckhwd %5, %5, %2 + vpmaddwd %8, %8, [rel PW_F050_MF256_F060_MF089] ; %8=tmp6_7L + vpmaddwd %5, %5, [rel PW_F050_MF256_F060_MF089] ; %5=tmp6_7H + + vpaddd %8, %8, %6 ; %8=data3_1L + vpaddd %5, %5, %7 ; %5=data3_1H + + vpaddd %8, %8, [rel PD_DESCALE_P %+ %9] + vpaddd %5, %5, [rel PD_DESCALE_P %+ %9] + vpsrad %8, %8, DESCALE_P %+ %9 + vpsrad %5, %5, DESCALE_P %+ %9 + + vpackssdw %2, %8, %5 ; %2=data3_1 +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_fdct_islow_avx2) + +EXTN(jconst_fdct_islow_avx2): + +PW_F130_F054_MF130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541 + times 4 dw (F_0_541 - F_1_847), F_0_541 +PW_MF078_F117_F078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175 + times 4 dw (F_1_175 - F_0_390), F_1_175 +PW_MF060_MF089_MF050_MF256 times 4 dw (F_0_298 - F_0_899), -F_0_899 + times 4 dw (F_2_053 - F_2_562), -F_2_562 +PW_F050_MF256_F060_MF089 times 4 dw (F_3_072 - F_2_562), -F_2_562 + times 4 dw (F_1_501 - F_0_899), -F_0_899 +PD_DESCALE_P1 times 8 dd 1 << (DESCALE_P1 - 1) +PD_DESCALE_P2 times 8 dd 1 << (DESCALE_P2 - 1) +PW_DESCALE_P2X times 16 dw 1 << (PASS1_BITS - 1) +PW_1_NEG1 times 8 dw 1 + times 8 dw -1 + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_islow_avx2(DCTELEM *data) +; + +; r10 = DCTELEM *data + + align 32 + GLOBAL_FUNCTION(jsimd_fdct_islow_avx2) + +EXTN(jsimd_fdct_islow_avx2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 1 + + ; ---- Pass 1: process rows. + + vmovdqu ymm4, YMMWORD [YMMBLOCK(0,0,r10,SIZEOF_DCTELEM)] + vmovdqu ymm5, YMMWORD [YMMBLOCK(2,0,r10,SIZEOF_DCTELEM)] + vmovdqu ymm6, YMMWORD [YMMBLOCK(4,0,r10,SIZEOF_DCTELEM)] + vmovdqu ymm7, YMMWORD [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)] + ; ymm4=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + ; ymm5=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + ; ymm6=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + ; ymm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + vperm2i128 ymm0, ymm4, ymm6, 0x20 + vperm2i128 ymm1, ymm4, ymm6, 0x31 + vperm2i128 ymm2, ymm5, ymm7, 0x20 + vperm2i128 ymm3, ymm5, ymm7, 0x31 + ; ymm0=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47) + ; ymm1=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57) + ; ymm2=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67) + ; ymm3=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77) + + dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7 + + dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1 + ; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm3=data7_5 + + ; ---- Pass 2: process columns. + + vperm2i128 ymm4, ymm1, ymm3, 0x20 ; ymm4=data3_7 + vperm2i128 ymm1, ymm1, ymm3, 0x31 ; ymm1=data1_5 + + dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7 + + dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2 + ; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm4=data7_5 + + vperm2i128 ymm3, ymm0, ymm1, 0x30 ; ymm3=data0_1 + vperm2i128 ymm5, ymm2, ymm1, 0x20 ; ymm5=data2_3 + vperm2i128 ymm6, ymm0, ymm4, 0x31 ; ymm6=data4_5 + vperm2i128 ymm7, ymm2, ymm4, 0x21 ; ymm7=data6_7 + + vmovdqu YMMWORD [YMMBLOCK(0,0,r10,SIZEOF_DCTELEM)], ymm3 + vmovdqu YMMWORD [YMMBLOCK(2,0,r10,SIZEOF_DCTELEM)], ymm5 + vmovdqu YMMWORD [YMMBLOCK(4,0,r10,SIZEOF_DCTELEM)], ymm6 + vmovdqu YMMWORD [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm7 + + vzeroupper + uncollect_args 1 + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jfdctint-sse2.asm b/simd/x86_64/jfdctint-sse2.asm new file mode 100644 index 0000000..ef16a52 --- /dev/null +++ b/simd/x86_64/jfdctint-sse2.asm @@ -0,0 +1,621 @@ +; +; jfdctint.asm - accurate integer FDCT (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; forward DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jfdctint.c; see the jfdctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS - PASS1_BITS) +%define DESCALE_P2 (CONST_BITS + PASS1_BITS) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_0_298 equ DESCALE( 320652955, 30 - CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276, 30 - CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887, 30 - CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413, 30 - CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111, 30 - CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813, 30 - CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267, 30 - CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350, 30 - CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_fdct_islow_sse2) + +EXTN(jconst_fdct_islow_sse2): + +PW_F130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541 +PW_F054_MF130 times 4 dw F_0_541, (F_0_541 - F_1_847) +PW_MF078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175 +PW_F117_F078 times 4 dw F_1_175, (F_1_175 - F_0_390) +PW_MF060_MF089 times 4 dw (F_0_298 - F_0_899), -F_0_899 +PW_MF089_F060 times 4 dw -F_0_899, (F_1_501 - F_0_899) +PW_MF050_MF256 times 4 dw (F_2_053 - F_2_562), -F_2_562 +PW_MF256_F050 times 4 dw -F_2_562, (F_3_072 - F_2_562) +PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1) +PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1) +PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1) + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform the forward DCT on one block of samples. +; +; GLOBAL(void) +; jsimd_fdct_islow_sse2(DCTELEM *data) +; + +; r10 = DCTELEM *data + +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM] +%define WK_NUM 6 + + align 32 + GLOBAL_FUNCTION(jsimd_fdct_islow_sse2) + +EXTN(jsimd_fdct_islow_sse2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args 1 + + ; ---- Pass 1: process rows. + + mov rdx, r10 ; (DCTELEM *) + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm2, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)] + + ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27) + ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37) + + movdqa xmm4, xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0, xmm1 ; xmm0=(00 10 01 11 02 12 03 13) + punpckhwd xmm4, xmm1 ; xmm4=(04 14 05 15 06 16 07 17) + movdqa xmm5, xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2, xmm3 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm5, xmm3 ; xmm5=(24 34 25 35 26 36 27 37) + + movdqa xmm6, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm1, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)] + + ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62) + ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=(20 30 21 31 22 32 23 33) + movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(24 34 25 35 26 36 27 37) + + movdqa xmm2, xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6, xmm7 ; xmm6=(40 50 41 51 42 52 43 53) + punpckhwd xmm2, xmm7 ; xmm2=(44 54 45 55 46 56 47 57) + movdqa xmm5, xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1, xmm3 ; xmm1=(60 70 61 71 62 72 63 73) + punpckhwd xmm5, xmm3 ; xmm5=(64 74 65 75 66 76 67 77) + + movdqa xmm7, xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6, xmm1 ; xmm6=(40 50 60 70 41 51 61 71) + punpckhdq xmm7, xmm1 ; xmm7=(42 52 62 72 43 53 63 73) + movdqa xmm3, xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2, xmm5 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm3, xmm5 ; xmm3=(46 56 66 76 47 57 67 77) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(20 30 21 31 22 32 23 33) + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=(24 34 25 35 26 36 27 37) + movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=(42 52 62 72 43 53 63 73) + movdqa XMMWORD [wk(3)], xmm2 ; wk(3)=(44 54 64 74 45 55 65 75) + + movdqa xmm7, xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0, xmm1 ; xmm0=(00 10 20 30 01 11 21 31) + punpckhdq xmm7, xmm1 ; xmm7=(02 12 22 32 03 13 23 33) + movdqa xmm2, xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4, xmm5 ; xmm4=(04 14 24 34 05 15 25 35) + punpckhdq xmm2, xmm5 ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa xmm1, xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0, xmm6 ; xmm0=(00 10 20 30 40 50 60 70)=data0 + punpckhqdq xmm1, xmm6 ; xmm1=(01 11 21 31 41 51 61 71)=data1 + movdqa xmm5, xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2, xmm3 ; xmm2=(06 16 26 36 46 56 66 76)=data6 + punpckhqdq xmm5, xmm3 ; xmm5=(07 17 27 37 47 57 67 77)=data7 + + movdqa xmm6, xmm1 + movdqa xmm3, xmm0 + psubw xmm1, xmm2 ; xmm1=data1-data6=tmp6 + psubw xmm0, xmm5 ; xmm0=data0-data7=tmp7 + paddw xmm6, xmm2 ; xmm6=data1+data6=tmp1 + paddw xmm3, xmm5 ; xmm3=data0+data7=tmp0 + + movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5, XMMWORD [wk(3)] ; xmm5=(44 54 64 74 45 55 65 75) + movdqa XMMWORD [wk(0)], xmm1 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp7 + + movdqa xmm1, xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7, xmm2 ; xmm7=(02 12 22 32 42 52 62 72)=data2 + punpckhqdq xmm1, xmm2 ; xmm1=(03 13 23 33 43 53 63 73)=data3 + movdqa xmm0, xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4, xmm5 ; xmm4=(04 14 24 34 44 54 64 74)=data4 + punpckhqdq xmm0, xmm5 ; xmm0=(05 15 25 35 45 55 65 75)=data5 + + movdqa xmm2, xmm1 + movdqa xmm5, xmm7 + paddw xmm1, xmm4 ; xmm1=data3+data4=tmp3 + paddw xmm7, xmm0 ; xmm7=data2+data5=tmp2 + psubw xmm2, xmm4 ; xmm2=data3-data4=tmp4 + psubw xmm5, xmm0 ; xmm5=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm4, xmm3 + movdqa xmm0, xmm6 + paddw xmm3, xmm1 ; xmm3=tmp10 + paddw xmm6, xmm7 ; xmm6=tmp11 + psubw xmm4, xmm1 ; xmm4=tmp13 + psubw xmm0, xmm7 ; xmm0=tmp12 + + movdqa xmm1, xmm3 + paddw xmm3, xmm6 ; xmm3=tmp10+tmp11 + psubw xmm1, xmm6 ; xmm1=tmp10-tmp11 + + psllw xmm3, PASS1_BITS ; xmm3=data0 + psllw xmm1, PASS1_BITS ; xmm1=data4 + + movdqa XMMWORD [wk(2)], xmm3 ; wk(2)=data0 + movdqa XMMWORD [wk(3)], xmm1 ; wk(3)=data4 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movdqa xmm7, xmm4 ; xmm4=tmp13 + movdqa xmm6, xmm4 + punpcklwd xmm7, xmm0 ; xmm0=tmp12 + punpckhwd xmm6, xmm0 + movdqa xmm4, xmm7 + movdqa xmm0, xmm6 + pmaddwd xmm7, [rel PW_F130_F054] ; xmm7=data2L + pmaddwd xmm6, [rel PW_F130_F054] ; xmm6=data2H + pmaddwd xmm4, [rel PW_F054_MF130] ; xmm4=data6L + pmaddwd xmm0, [rel PW_F054_MF130] ; xmm0=data6H + + paddd xmm7, [rel PD_DESCALE_P1] + paddd xmm6, [rel PD_DESCALE_P1] + psrad xmm7, DESCALE_P1 + psrad xmm6, DESCALE_P1 + paddd xmm4, [rel PD_DESCALE_P1] + paddd xmm0, [rel PD_DESCALE_P1] + psrad xmm4, DESCALE_P1 + psrad xmm0, DESCALE_P1 + + packssdw xmm7, xmm6 ; xmm7=data2 + packssdw xmm4, xmm0 ; xmm4=data6 + + movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=data2 + movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=data6 + + ; -- Odd part + + movdqa xmm3, XMMWORD [wk(0)] ; xmm3=tmp6 + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp7 + + movdqa xmm6, xmm2 ; xmm2=tmp4 + movdqa xmm0, xmm5 ; xmm5=tmp5 + paddw xmm6, xmm3 ; xmm6=z3 + paddw xmm0, xmm1 ; xmm0=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm7, xmm6 + movdqa xmm4, xmm6 + punpcklwd xmm7, xmm0 + punpckhwd xmm4, xmm0 + movdqa xmm6, xmm7 + movdqa xmm0, xmm4 + pmaddwd xmm7, [rel PW_MF078_F117] ; xmm7=z3L + pmaddwd xmm4, [rel PW_MF078_F117] ; xmm4=z3H + pmaddwd xmm6, [rel PW_F117_F078] ; xmm6=z4L + pmaddwd xmm0, [rel PW_F117_F078] ; xmm0=z4H + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=z3L + movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movdqa xmm7, xmm2 + movdqa xmm4, xmm2 + punpcklwd xmm7, xmm1 + punpckhwd xmm4, xmm1 + movdqa xmm2, xmm7 + movdqa xmm1, xmm4 + pmaddwd xmm7, [rel PW_MF060_MF089] ; xmm7=tmp4L + pmaddwd xmm4, [rel PW_MF060_MF089] ; xmm4=tmp4H + pmaddwd xmm2, [rel PW_MF089_F060] ; xmm2=tmp7L + pmaddwd xmm1, [rel PW_MF089_F060] ; xmm1=tmp7H + + paddd xmm7, XMMWORD [wk(0)] ; xmm7=data7L + paddd xmm4, XMMWORD [wk(1)] ; xmm4=data7H + paddd xmm2, xmm6 ; xmm2=data1L + paddd xmm1, xmm0 ; xmm1=data1H + + paddd xmm7, [rel PD_DESCALE_P1] + paddd xmm4, [rel PD_DESCALE_P1] + psrad xmm7, DESCALE_P1 + psrad xmm4, DESCALE_P1 + paddd xmm2, [rel PD_DESCALE_P1] + paddd xmm1, [rel PD_DESCALE_P1] + psrad xmm2, DESCALE_P1 + psrad xmm1, DESCALE_P1 + + packssdw xmm7, xmm4 ; xmm7=data7 + packssdw xmm2, xmm1 ; xmm2=data1 + + movdqa xmm4, xmm5 + movdqa xmm1, xmm5 + punpcklwd xmm4, xmm3 + punpckhwd xmm1, xmm3 + movdqa xmm5, xmm4 + movdqa xmm3, xmm1 + pmaddwd xmm4, [rel PW_MF050_MF256] ; xmm4=tmp5L + pmaddwd xmm1, [rel PW_MF050_MF256] ; xmm1=tmp5H + pmaddwd xmm5, [rel PW_MF256_F050] ; xmm5=tmp6L + pmaddwd xmm3, [rel PW_MF256_F050] ; xmm3=tmp6H + + paddd xmm4, xmm6 ; xmm4=data5L + paddd xmm1, xmm0 ; xmm1=data5H + paddd xmm5, XMMWORD [wk(0)] ; xmm5=data3L + paddd xmm3, XMMWORD [wk(1)] ; xmm3=data3H + + paddd xmm4, [rel PD_DESCALE_P1] + paddd xmm1, [rel PD_DESCALE_P1] + psrad xmm4, DESCALE_P1 + psrad xmm1, DESCALE_P1 + paddd xmm5, [rel PD_DESCALE_P1] + paddd xmm3, [rel PD_DESCALE_P1] + psrad xmm5, DESCALE_P1 + psrad xmm3, DESCALE_P1 + + packssdw xmm4, xmm1 ; xmm4=data5 + packssdw xmm5, xmm3 ; xmm5=data3 + + ; ---- Pass 2: process columns. + + movdqa xmm6, XMMWORD [wk(2)] ; xmm6=col0 + movdqa xmm0, XMMWORD [wk(4)] ; xmm0=col2 + + ; xmm6=(00 10 20 30 40 50 60 70), xmm0=(02 12 22 32 42 52 62 72) + ; xmm2=(01 11 21 31 41 51 61 71), xmm5=(03 13 23 33 43 53 63 73) + + movdqa xmm1, xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6, xmm2 ; xmm6=(00 01 10 11 20 21 30 31) + punpckhwd xmm1, xmm2 ; xmm1=(40 41 50 51 60 61 70 71) + movdqa xmm3, xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0, xmm5 ; xmm0=(02 03 12 13 22 23 32 33) + punpckhwd xmm3, xmm5 ; xmm3=(42 43 52 53 62 63 72 73) + + movdqa xmm2, XMMWORD [wk(3)] ; xmm2=col4 + movdqa xmm5, XMMWORD [wk(5)] ; xmm5=col6 + + ; xmm2=(04 14 24 34 44 54 64 74), xmm5=(06 16 26 36 46 56 66 76) + ; xmm4=(05 15 25 35 45 55 65 75), xmm7=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=(02 03 12 13 22 23 32 33) + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(42 43 52 53 62 63 72 73) + + movdqa xmm0, xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2, xmm4 ; xmm2=(04 05 14 15 24 25 34 35) + punpckhwd xmm0, xmm4 ; xmm0=(44 45 54 55 64 65 74 75) + movdqa xmm3, xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5, xmm7 ; xmm5=(06 07 16 17 26 27 36 37) + punpckhwd xmm3, xmm7 ; xmm3=(46 47 56 57 66 67 76 77) + + movdqa xmm4, xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2, xmm5 ; xmm2=(04 05 06 07 14 15 16 17) + punpckhdq xmm4, xmm5 ; xmm4=(24 25 26 27 34 35 36 37) + movdqa xmm7, xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0, xmm3 ; xmm0=(44 45 46 47 54 55 56 57) + punpckhdq xmm7, xmm3 ; xmm7=(64 65 66 67 74 75 76 77) + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=(02 03 12 13 22 23 32 33) + movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(42 43 52 53 62 63 72 73) + movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=(24 25 26 27 34 35 36 37) + movdqa XMMWORD [wk(3)], xmm0 ; wk(3)=(44 45 46 47 54 55 56 57) + + movdqa xmm4, xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6, xmm5 ; xmm6=(00 01 02 03 10 11 12 13) + punpckhdq xmm4, xmm5 ; xmm4=(20 21 22 23 30 31 32 33) + movdqa xmm0, xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1, xmm3 ; xmm1=(40 41 42 43 50 51 52 53) + punpckhdq xmm0, xmm3 ; xmm0=(60 61 62 63 70 71 72 73) + + movdqa xmm5, xmm6 ; transpose coefficients(phase 3) + punpcklqdq xmm6, xmm2 ; xmm6=(00 01 02 03 04 05 06 07)=data0 + punpckhqdq xmm5, xmm2 ; xmm5=(10 11 12 13 14 15 16 17)=data1 + movdqa xmm3, xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0, xmm7 ; xmm0=(60 61 62 63 64 65 66 67)=data6 + punpckhqdq xmm3, xmm7 ; xmm3=(70 71 72 73 74 75 76 77)=data7 + + movdqa xmm2, xmm5 + movdqa xmm7, xmm6 + psubw xmm5, xmm0 ; xmm5=data1-data6=tmp6 + psubw xmm6, xmm3 ; xmm6=data0-data7=tmp7 + paddw xmm2, xmm0 ; xmm2=data1+data6=tmp1 + paddw xmm7, xmm3 ; xmm7=data0+data7=tmp0 + + movdqa xmm0, XMMWORD [wk(2)] ; xmm0=(24 25 26 27 34 35 36 37) + movdqa xmm3, XMMWORD [wk(3)] ; xmm3=(44 45 46 47 54 55 56 57) + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=tmp6 + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp7 + + movdqa xmm5, xmm4 ; transpose coefficients(phase 3) + punpcklqdq xmm4, xmm0 ; xmm4=(20 21 22 23 24 25 26 27)=data2 + punpckhqdq xmm5, xmm0 ; xmm5=(30 31 32 33 34 35 36 37)=data3 + movdqa xmm6, xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1, xmm3 ; xmm1=(40 41 42 43 44 45 46 47)=data4 + punpckhqdq xmm6, xmm3 ; xmm6=(50 51 52 53 54 55 56 57)=data5 + + movdqa xmm0, xmm5 + movdqa xmm3, xmm4 + paddw xmm5, xmm1 ; xmm5=data3+data4=tmp3 + paddw xmm4, xmm6 ; xmm4=data2+data5=tmp2 + psubw xmm0, xmm1 ; xmm0=data3-data4=tmp4 + psubw xmm3, xmm6 ; xmm3=data2-data5=tmp5 + + ; -- Even part + + movdqa xmm1, xmm7 + movdqa xmm6, xmm2 + paddw xmm7, xmm5 ; xmm7=tmp10 + paddw xmm2, xmm4 ; xmm2=tmp11 + psubw xmm1, xmm5 ; xmm1=tmp13 + psubw xmm6, xmm4 ; xmm6=tmp12 + + movdqa xmm5, xmm7 + paddw xmm7, xmm2 ; xmm7=tmp10+tmp11 + psubw xmm5, xmm2 ; xmm5=tmp10-tmp11 + + paddw xmm7, [rel PW_DESCALE_P2X] + paddw xmm5, [rel PW_DESCALE_P2X] + psraw xmm7, PASS1_BITS ; xmm7=data0 + psraw xmm5, PASS1_BITS ; xmm5=data4 + + movdqa XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)], xmm7 + movdqa XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)], xmm5 + + ; (Original) + ; z1 = (tmp12 + tmp13) * 0.541196100; + ; data2 = z1 + tmp13 * 0.765366865; + ; data6 = z1 + tmp12 * -1.847759065; + ; + ; (This implementation) + ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; + ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); + + movdqa xmm4, xmm1 ; xmm1=tmp13 + movdqa xmm2, xmm1 + punpcklwd xmm4, xmm6 ; xmm6=tmp12 + punpckhwd xmm2, xmm6 + movdqa xmm1, xmm4 + movdqa xmm6, xmm2 + pmaddwd xmm4, [rel PW_F130_F054] ; xmm4=data2L + pmaddwd xmm2, [rel PW_F130_F054] ; xmm2=data2H + pmaddwd xmm1, [rel PW_F054_MF130] ; xmm1=data6L + pmaddwd xmm6, [rel PW_F054_MF130] ; xmm6=data6H + + paddd xmm4, [rel PD_DESCALE_P2] + paddd xmm2, [rel PD_DESCALE_P2] + psrad xmm4, DESCALE_P2 + psrad xmm2, DESCALE_P2 + paddd xmm1, [rel PD_DESCALE_P2] + paddd xmm6, [rel PD_DESCALE_P2] + psrad xmm1, DESCALE_P2 + psrad xmm6, DESCALE_P2 + + packssdw xmm4, xmm2 ; xmm4=data2 + packssdw xmm1, xmm6 ; xmm1=data6 + + movdqa XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)], xmm1 + + ; -- Odd part + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp6 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp7 + + movdqa xmm2, xmm0 ; xmm0=tmp4 + movdqa xmm6, xmm3 ; xmm3=tmp5 + paddw xmm2, xmm7 ; xmm2=z3 + paddw xmm6, xmm5 ; xmm6=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm4, xmm2 + movdqa xmm1, xmm2 + punpcklwd xmm4, xmm6 + punpckhwd xmm1, xmm6 + movdqa xmm2, xmm4 + movdqa xmm6, xmm1 + pmaddwd xmm4, [rel PW_MF078_F117] ; xmm4=z3L + pmaddwd xmm1, [rel PW_MF078_F117] ; xmm1=z3H + pmaddwd xmm2, [rel PW_F117_F078] ; xmm2=z4L + pmaddwd xmm6, [rel PW_F117_F078] ; xmm6=z4H + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=z3L + movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=z3H + + ; (Original) + ; z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; + ; tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; + ; tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; + ; data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; + ; + ; (This implementation) + ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; + ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; + ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); + ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); + ; data7 = tmp4 + z3; data5 = tmp5 + z4; + ; data3 = tmp6 + z3; data1 = tmp7 + z4; + + movdqa xmm4, xmm0 + movdqa xmm1, xmm0 + punpcklwd xmm4, xmm5 + punpckhwd xmm1, xmm5 + movdqa xmm0, xmm4 + movdqa xmm5, xmm1 + pmaddwd xmm4, [rel PW_MF060_MF089] ; xmm4=tmp4L + pmaddwd xmm1, [rel PW_MF060_MF089] ; xmm1=tmp4H + pmaddwd xmm0, [rel PW_MF089_F060] ; xmm0=tmp7L + pmaddwd xmm5, [rel PW_MF089_F060] ; xmm5=tmp7H + + paddd xmm4, XMMWORD [wk(0)] ; xmm4=data7L + paddd xmm1, XMMWORD [wk(1)] ; xmm1=data7H + paddd xmm0, xmm2 ; xmm0=data1L + paddd xmm5, xmm6 ; xmm5=data1H + + paddd xmm4, [rel PD_DESCALE_P2] + paddd xmm1, [rel PD_DESCALE_P2] + psrad xmm4, DESCALE_P2 + psrad xmm1, DESCALE_P2 + paddd xmm0, [rel PD_DESCALE_P2] + paddd xmm5, [rel PD_DESCALE_P2] + psrad xmm0, DESCALE_P2 + psrad xmm5, DESCALE_P2 + + packssdw xmm4, xmm1 ; xmm4=data7 + packssdw xmm0, xmm5 ; xmm0=data1 + + movdqa XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)], xmm4 + movdqa XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm0 + + movdqa xmm1, xmm3 + movdqa xmm5, xmm3 + punpcklwd xmm1, xmm7 + punpckhwd xmm5, xmm7 + movdqa xmm3, xmm1 + movdqa xmm7, xmm5 + pmaddwd xmm1, [rel PW_MF050_MF256] ; xmm1=tmp5L + pmaddwd xmm5, [rel PW_MF050_MF256] ; xmm5=tmp5H + pmaddwd xmm3, [rel PW_MF256_F050] ; xmm3=tmp6L + pmaddwd xmm7, [rel PW_MF256_F050] ; xmm7=tmp6H + + paddd xmm1, xmm2 ; xmm1=data5L + paddd xmm5, xmm6 ; xmm5=data5H + paddd xmm3, XMMWORD [wk(0)] ; xmm3=data3L + paddd xmm7, XMMWORD [wk(1)] ; xmm7=data3H + + paddd xmm1, [rel PD_DESCALE_P2] + paddd xmm5, [rel PD_DESCALE_P2] + psrad xmm1, DESCALE_P2 + psrad xmm5, DESCALE_P2 + paddd xmm3, [rel PD_DESCALE_P2] + paddd xmm7, [rel PD_DESCALE_P2] + psrad xmm3, DESCALE_P2 + psrad xmm7, DESCALE_P2 + + packssdw xmm1, xmm5 ; xmm1=data5 + packssdw xmm3, xmm7 ; xmm3=data3 + + movdqa XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm3 + + uncollect_args 1 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jidctflt-sse2.asm b/simd/x86_64/jidctflt-sse2.asm new file mode 100644 index 0000000..b676ef3 --- /dev/null +++ b/simd/x86_64/jidctflt-sse2.asm @@ -0,0 +1,483 @@ +; +; jidctflt.asm - floating-point IDCT (64-bit SSE & SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a floating-point implementation of the inverse DCT +; (Discrete Cosine Transform). The following code is based directly on +; the IJG's original jidctflt.c; see the jidctflt.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5) + shufps %1, %2, 0x44 +%endmacro + +%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) + shufps %1, %2, 0xEE +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_idct_float_sse2) + +EXTN(jconst_idct_float_sse2): + +PD_1_414 times 4 dd 1.414213562373095048801689 +PD_1_847 times 4 dd 1.847759065022573512256366 +PD_1_082 times 4 dd 1.082392200292393968799446 +PD_M2_613 times 4 dd -2.613125929752753055713286 +PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_float_sse2(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +; r10 = void *dct_table +; r11 = JCOEFPTR coef_block +; r12 = JSAMPARRAY output_buf +; r13d = JDIMENSION output_col + +%define original_rbp rbp + 0 +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 2 +%define workspace wk(0) - DCTSIZE2 * SIZEOF_FAST_FLOAT + ; FAST_FLOAT workspace[DCTSIZE2] + + align 32 + GLOBAL_FUNCTION(jsimd_idct_float_sse2) + +EXTN(jsimd_idct_float_sse2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [workspace] + collect_args 4 + push rbx + + ; ---- Pass 1: process columns from input, store into work array. + + mov rdx, r10 ; quantptr + mov rsi, r11 ; inptr + lea rdi, [workspace] ; FAST_FLOAT *wsptr + mov rcx, DCTSIZE/4 ; ctr +.columnloop: +%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE + mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movq xmm1, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movq xmm2, XMM_MMWORD [MMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + movq xmm4, XMM_MMWORD [MMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + movq xmm5, XMM_MMWORD [MMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movq xmm6, XMM_MMWORD [MMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + movq xmm7, XMM_MMWORD [MMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + por xmm1, xmm2 + por xmm3, xmm4 + por xmm5, xmm6 + por xmm1, xmm3 + por xmm5, xmm7 + por xmm1, xmm5 + packsswb xmm1, xmm1 + movd eax, xmm1 + test rax, rax + jnz short .columnDCT + + ; -- AC terms all zero + + movq xmm0, XMM_MMWORD [MMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + + punpcklwd xmm0, xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + psrad xmm0, (DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) + cvtdq2ps xmm0, xmm0 ; xmm0=in0=(00 01 02 03) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm1, xmm0 + movaps xmm2, xmm0 + movaps xmm3, xmm0 + + shufps xmm0, xmm0, 0x00 ; xmm0=(00 00 00 00) + shufps xmm1, xmm1, 0x55 ; xmm1=(01 01 01 01) + shufps xmm2, xmm2, 0xAA ; xmm2=(02 02 02 02) + shufps xmm3, xmm3, 0xFF ; xmm3=(03 03 03 03) + + movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3 + jmp near .nextcolumn +%endif +.columnDCT: + + ; -- Even part + + movq xmm0, XMM_MMWORD [MMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + movq xmm1, XMM_MMWORD [MMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + movq xmm2, XMM_MMWORD [MMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + + punpcklwd xmm0, xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpcklwd xmm1, xmm1 ; xmm1=(20 20 21 21 22 22 23 23) + psrad xmm0, (DWORD_BIT-WORD_BIT) ; xmm0=in0=(00 01 02 03) + psrad xmm1, (DWORD_BIT-WORD_BIT) ; xmm1=in2=(20 21 22 23) + cvtdq2ps xmm0, xmm0 ; xmm0=in0=(00 01 02 03) + cvtdq2ps xmm1, xmm1 ; xmm1=in2=(20 21 22 23) + + punpcklwd xmm2, xmm2 ; xmm2=(40 40 41 41 42 42 43 43) + punpcklwd xmm3, xmm3 ; xmm3=(60 60 61 61 62 62 63 63) + psrad xmm2, (DWORD_BIT-WORD_BIT) ; xmm2=in4=(40 41 42 43) + psrad xmm3, (DWORD_BIT-WORD_BIT) ; xmm3=in6=(60 61 62 63) + cvtdq2ps xmm2, xmm2 ; xmm2=in4=(40 41 42 43) + cvtdq2ps xmm3, xmm3 ; xmm3=in6=(60 61 62 63) + + mulps xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4, xmm0 + movaps xmm5, xmm1 + subps xmm0, xmm2 ; xmm0=tmp11 + subps xmm1, xmm3 + addps xmm4, xmm2 ; xmm4=tmp10 + addps xmm5, xmm3 ; xmm5=tmp13 + + mulps xmm1, [rel PD_1_414] + subps xmm1, xmm5 ; xmm1=tmp12 + + movaps xmm6, xmm4 + movaps xmm7, xmm0 + subps xmm4, xmm5 ; xmm4=tmp3 + subps xmm0, xmm1 ; xmm0=tmp2 + addps xmm6, xmm5 ; xmm6=tmp0 + addps xmm7, xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movq xmm2, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movq xmm3, XMM_MMWORD [MMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + movq xmm5, XMM_MMWORD [MMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movq xmm1, XMM_MMWORD [MMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + + punpcklwd xmm2, xmm2 ; xmm2=(10 10 11 11 12 12 13 13) + punpcklwd xmm3, xmm3 ; xmm3=(30 30 31 31 32 32 33 33) + psrad xmm2, (DWORD_BIT-WORD_BIT) ; xmm2=in1=(10 11 12 13) + psrad xmm3, (DWORD_BIT-WORD_BIT) ; xmm3=in3=(30 31 32 33) + cvtdq2ps xmm2, xmm2 ; xmm2=in1=(10 11 12 13) + cvtdq2ps xmm3, xmm3 ; xmm3=in3=(30 31 32 33) + + punpcklwd xmm5, xmm5 ; xmm5=(50 50 51 51 52 52 53 53) + punpcklwd xmm1, xmm1 ; xmm1=(70 70 71 71 72 72 73 73) + psrad xmm5, (DWORD_BIT-WORD_BIT) ; xmm5=in5=(50 51 52 53) + psrad xmm1, (DWORD_BIT-WORD_BIT) ; xmm1=in7=(70 71 72 73) + cvtdq2ps xmm5, xmm5 ; xmm5=in5=(50 51 52 53) + cvtdq2ps xmm1, xmm1 ; xmm1=in7=(70 71 72 73) + + mulps xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm5, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + mulps xmm1, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FLOAT_MULT_TYPE)] + + movaps xmm4, xmm2 + movaps xmm0, xmm5 + addps xmm2, xmm1 ; xmm2=z11 + addps xmm5, xmm3 ; xmm5=z13 + subps xmm4, xmm1 ; xmm4=z12 + subps xmm0, xmm3 ; xmm0=z10 + + movaps xmm1, xmm2 + subps xmm2, xmm5 + addps xmm1, xmm5 ; xmm1=tmp7 + + mulps xmm2, [rel PD_1_414] ; xmm2=tmp11 + + movaps xmm3, xmm0 + addps xmm0, xmm4 + mulps xmm0, [rel PD_1_847] ; xmm0=z5 + mulps xmm3, [rel PD_M2_613] ; xmm3=(z10 * -2.613125930) + mulps xmm4, [rel PD_1_082] ; xmm4=(z12 * 1.082392200) + addps xmm3, xmm0 ; xmm3=tmp12 + subps xmm4, xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3, xmm1 ; xmm3=tmp6 + movaps xmm5, xmm6 + movaps xmm0, xmm7 + addps xmm6, xmm1 ; xmm6=data0=(00 01 02 03) + addps xmm7, xmm3 ; xmm7=data1=(10 11 12 13) + subps xmm5, xmm1 ; xmm5=data7=(70 71 72 73) + subps xmm0, xmm3 ; xmm0=data6=(60 61 62 63) + subps xmm2, xmm3 ; xmm2=tmp5 + + movaps xmm1, xmm6 ; transpose coefficients(phase 1) + unpcklps xmm6, xmm7 ; xmm6=(00 10 01 11) + unpckhps xmm1, xmm7 ; xmm1=(02 12 03 13) + movaps xmm3, xmm0 ; transpose coefficients(phase 1) + unpcklps xmm0, xmm5 ; xmm0=(60 70 61 71) + unpckhps xmm3, xmm5 ; xmm3=(62 72 63 73) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movaps xmm5, XMMWORD [wk(1)] ; xmm5=tmp3 + + movaps XMMWORD [wk(0)], xmm0 ; wk(0)=(60 70 61 71) + movaps XMMWORD [wk(1)], xmm3 ; wk(1)=(62 72 63 73) + + addps xmm4, xmm2 ; xmm4=tmp4 + movaps xmm0, xmm7 + movaps xmm3, xmm5 + addps xmm7, xmm2 ; xmm7=data2=(20 21 22 23) + addps xmm5, xmm4 ; xmm5=data4=(40 41 42 43) + subps xmm0, xmm2 ; xmm0=data5=(50 51 52 53) + subps xmm3, xmm4 ; xmm3=data3=(30 31 32 33) + + movaps xmm2, xmm7 ; transpose coefficients(phase 1) + unpcklps xmm7, xmm3 ; xmm7=(20 30 21 31) + unpckhps xmm2, xmm3 ; xmm2=(22 32 23 33) + movaps xmm4, xmm5 ; transpose coefficients(phase 1) + unpcklps xmm5, xmm0 ; xmm5=(40 50 41 51) + unpckhps xmm4, xmm0 ; xmm4=(42 52 43 53) + + movaps xmm3, xmm6 ; transpose coefficients(phase 2) + unpcklps2 xmm6, xmm7 ; xmm6=(00 10 20 30) + unpckhps2 xmm3, xmm7 ; xmm3=(01 11 21 31) + movaps xmm0, xmm1 ; transpose coefficients(phase 2) + unpcklps2 xmm1, xmm2 ; xmm1=(02 12 22 32) + unpckhps2 xmm0, xmm2 ; xmm0=(03 13 23 33) + + movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71) + movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73) + + movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm1 + movaps XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm0 + + movaps xmm6, xmm5 ; transpose coefficients(phase 2) + unpcklps2 xmm5, xmm7 ; xmm5=(40 50 60 70) + unpckhps2 xmm6, xmm7 ; xmm6=(41 51 61 71) + movaps xmm3, xmm4 ; transpose coefficients(phase 2) + unpcklps2 xmm4, xmm2 ; xmm4=(42 52 62 72) + unpckhps2 xmm3, xmm2 ; xmm3=(43 53 63 73) + + movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm5 + movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm6 + movaps XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm4 + movaps XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3 + +.nextcolumn: + add rsi, byte 4*SIZEOF_JCOEF ; coef_block + add rdx, byte 4*SIZEOF_FLOAT_MULT_TYPE ; quantptr + add rdi, 4*DCTSIZE*SIZEOF_FAST_FLOAT ; wsptr + dec rcx ; ctr + jnz near .columnloop + + ; -- Prefetch the next coefficient block + + prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32] + prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32] + prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32] + prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov rax, [original_rbp] + lea rsi, [workspace] ; FAST_FLOAT *wsptr + mov rdi, r12 ; (JSAMPROW *) + mov eax, r13d + mov rcx, DCTSIZE/4 ; ctr +.rowloop: + + ; -- Even part + + movaps xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_FAST_FLOAT)] + + movaps xmm4, xmm0 + movaps xmm5, xmm1 + subps xmm0, xmm2 ; xmm0=tmp11 + subps xmm1, xmm3 + addps xmm4, xmm2 ; xmm4=tmp10 + addps xmm5, xmm3 ; xmm5=tmp13 + + mulps xmm1, [rel PD_1_414] + subps xmm1, xmm5 ; xmm1=tmp12 + + movaps xmm6, xmm4 + movaps xmm7, xmm0 + subps xmm4, xmm5 ; xmm4=tmp3 + subps xmm0, xmm1 ; xmm0=tmp2 + addps xmm6, xmm5 ; xmm6=tmp0 + addps xmm7, xmm1 ; xmm7=tmp1 + + movaps XMMWORD [wk(1)], xmm4 ; tmp3 + movaps XMMWORD [wk(0)], xmm0 ; tmp2 + + ; -- Odd part + + movaps xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm5, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_FAST_FLOAT)] + + movaps xmm4, xmm2 + movaps xmm0, xmm5 + addps xmm2, xmm1 ; xmm2=z11 + addps xmm5, xmm3 ; xmm5=z13 + subps xmm4, xmm1 ; xmm4=z12 + subps xmm0, xmm3 ; xmm0=z10 + + movaps xmm1, xmm2 + subps xmm2, xmm5 + addps xmm1, xmm5 ; xmm1=tmp7 + + mulps xmm2, [rel PD_1_414] ; xmm2=tmp11 + + movaps xmm3, xmm0 + addps xmm0, xmm4 + mulps xmm0, [rel PD_1_847] ; xmm0=z5 + mulps xmm3, [rel PD_M2_613] ; xmm3=(z10 * -2.613125930) + mulps xmm4, [rel PD_1_082] ; xmm4=(z12 * 1.082392200) + addps xmm3, xmm0 ; xmm3=tmp12 + subps xmm4, xmm0 ; xmm4=tmp10 + + ; -- Final output stage + + subps xmm3, xmm1 ; xmm3=tmp6 + movaps xmm5, xmm6 + movaps xmm0, xmm7 + addps xmm6, xmm1 ; xmm6=data0=(00 10 20 30) + addps xmm7, xmm3 ; xmm7=data1=(01 11 21 31) + subps xmm5, xmm1 ; xmm5=data7=(07 17 27 37) + subps xmm0, xmm3 ; xmm0=data6=(06 16 26 36) + subps xmm2, xmm3 ; xmm2=tmp5 + + movaps xmm1, [rel PD_RNDINT_MAGIC] ; xmm1=[rel PD_RNDINT_MAGIC] + pcmpeqd xmm3, xmm3 + psrld xmm3, WORD_BIT ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..} + + addps xmm6, xmm1 ; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **) + addps xmm7, xmm1 ; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **) + addps xmm0, xmm1 ; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **) + addps xmm5, xmm1 ; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **) + + pand xmm6, xmm3 ; xmm6=(00 -- 10 -- 20 -- 30 --) + pslld xmm7, WORD_BIT ; xmm7=(-- 01 -- 11 -- 21 -- 31) + pand xmm0, xmm3 ; xmm0=(06 -- 16 -- 26 -- 36 --) + pslld xmm5, WORD_BIT ; xmm5=(-- 07 -- 17 -- 27 -- 37) + por xmm6, xmm7 ; xmm6=(00 01 10 11 20 21 30 31) + por xmm0, xmm5 ; xmm0=(06 07 16 17 26 27 36 37) + + movaps xmm1, XMMWORD [wk(0)] ; xmm1=tmp2 + movaps xmm3, XMMWORD [wk(1)] ; xmm3=tmp3 + + addps xmm4, xmm2 ; xmm4=tmp4 + movaps xmm7, xmm1 + movaps xmm5, xmm3 + addps xmm1, xmm2 ; xmm1=data2=(02 12 22 32) + addps xmm3, xmm4 ; xmm3=data4=(04 14 24 34) + subps xmm7, xmm2 ; xmm7=data5=(05 15 25 35) + subps xmm5, xmm4 ; xmm5=data3=(03 13 23 33) + + movaps xmm2, [rel PD_RNDINT_MAGIC] ; xmm2=[rel PD_RNDINT_MAGIC] + pcmpeqd xmm4, xmm4 + psrld xmm4, WORD_BIT ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..} + + addps xmm3, xmm2 ; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **) + addps xmm7, xmm2 ; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **) + addps xmm1, xmm2 ; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **) + addps xmm5, xmm2 ; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **) + + pand xmm3, xmm4 ; xmm3=(04 -- 14 -- 24 -- 34 --) + pslld xmm7, WORD_BIT ; xmm7=(-- 05 -- 15 -- 25 -- 35) + pand xmm1, xmm4 ; xmm1=(02 -- 12 -- 22 -- 32 --) + pslld xmm5, WORD_BIT ; xmm5=(-- 03 -- 13 -- 23 -- 33) + por xmm3, xmm7 ; xmm3=(04 05 14 15 24 25 34 35) + por xmm1, xmm5 ; xmm1=(02 03 12 13 22 23 32 33) + + movdqa xmm2, [rel PB_CENTERJSAMP] ; xmm2=[rel PB_CENTERJSAMP] + + packsswb xmm6, xmm3 ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35) + packsswb xmm1, xmm0 ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37) + paddb xmm6, xmm2 + paddb xmm1, xmm2 + + movdqa xmm4, xmm6 ; transpose coefficients(phase 2) + punpcklwd xmm6, xmm1 ; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4, xmm1 ; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + + movdqa xmm7, xmm6 ; transpose coefficients(phase 3) + punpckldq xmm6, xmm4 ; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm7, xmm4 ; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + + pshufd xmm5, xmm6, 0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm3, xmm7, 0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] + mov rbx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm7 + mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] + mov rbx, JSAMPROW [rdi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5 + movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm3 + + add rsi, byte 4*SIZEOF_FAST_FLOAT ; wsptr + add rdi, byte 4*SIZEOF_JSAMPROW + dec rcx ; ctr + jnz near .rowloop + + pop rbx + uncollect_args 4 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jidctfst-sse2.asm b/simd/x86_64/jidctfst-sse2.asm new file mode 100644 index 0000000..c6c42f9 --- /dev/null +++ b/simd/x86_64/jidctfst-sse2.asm @@ -0,0 +1,492 @@ +; +; jidctfst.asm - fast integer IDCT (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a fast, not so accurate integer implementation of +; the inverse DCT (Discrete Cosine Transform). The following code is +; based directly on the IJG's original jidctfst.c; see the jidctfst.c +; for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 8 ; 14 is also OK. +%define PASS1_BITS 2 + +%if IFAST_SCALE_BITS != PASS1_BITS +%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'." +%endif + +%if CONST_BITS == 8 +F_1_082 equ 277 ; FIX(1.082392200) +F_1_414 equ 362 ; FIX(1.414213562) +F_1_847 equ 473 ; FIX(1.847759065) +F_2_613 equ 669 ; FIX(2.613125930) +F_1_613 equ (F_2_613 - 256) ; FIX(2.613125930) - FIX(1) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_1_082 equ DESCALE(1162209775, 30 - CONST_BITS) ; FIX(1.082392200) +F_1_414 equ DESCALE(1518500249, 30 - CONST_BITS) ; FIX(1.414213562) +F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065) +F_2_613 equ DESCALE(2805822602, 30 - CONST_BITS) ; FIX(2.613125930) +F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + +; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) +; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) + +%define PRE_MULTIPLY_SCALE_BITS 2 +%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) + + alignz 32 + GLOBAL_DATA(jconst_idct_ifast_sse2) + +EXTN(jconst_idct_ifast_sse2): + +PW_F1414 times 8 dw F_1_414 << CONST_SHIFT +PW_F1847 times 8 dw F_1_847 << CONST_SHIFT +PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT +PW_F1082 times 8 dw F_1_082 << CONST_SHIFT +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_ifast_sse2(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +; r10 = jpeg_component_info *compptr +; r11 = JCOEFPTR coef_block +; r12 = JSAMPARRAY output_buf +; r13d = JDIMENSION output_col + +%define original_rbp rbp + 0 +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 32 + GLOBAL_FUNCTION(jsimd_idct_ifast_sse2) + +EXTN(jsimd_idct_ifast_sse2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args 4 + + ; ---- Pass 1: process columns from input. + + mov rdx, r10 ; quantptr + mov rsi, r11 ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2 + mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + por xmm1, xmm0 + packsswb xmm1, xmm1 + packsswb xmm1, xmm1 + movd eax, xmm1 + test rax, rax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm7, xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm0, xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpckhwd xmm7, xmm7 ; xmm7=(04 04 05 05 06 06 07 07) + + pshufd xmm6, xmm0, 0x00 ; xmm6=col0=(00 00 00 00 00 00 00 00) + pshufd xmm2, xmm0, 0x55 ; xmm2=col1=(01 01 01 01 01 01 01 01) + pshufd xmm5, xmm0, 0xAA ; xmm5=col2=(02 02 02 02 02 02 02 02) + pshufd xmm0, xmm0, 0xFF ; xmm0=col3=(03 03 03 03 03 03 03 03) + pshufd xmm1, xmm7, 0x00 ; xmm1=col4=(04 04 04 04 04 04 04 04) + pshufd xmm4, xmm7, 0x55 ; xmm4=col5=(05 05 05 05 05 05 05 05) + pshufd xmm3, xmm7, 0xAA ; xmm3=col6=(06 06 06 06 06 06 06 06) + pshufd xmm7, xmm7, 0xFF ; xmm7=col7=(07 07 07 07 07 07 07 07) + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=col1 + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=col3 + jmp near .column_end +%endif +.columnDCT: + + ; -- Even part + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + + movdqa xmm4, xmm0 + movdqa xmm5, xmm1 + psubw xmm0, xmm2 ; xmm0=tmp11 + psubw xmm1, xmm3 + paddw xmm4, xmm2 ; xmm4=tmp10 + paddw xmm5, xmm3 ; xmm5=tmp13 + + psllw xmm1, PRE_MULTIPLY_SCALE_BITS + pmulhw xmm1, [rel PW_F1414] + psubw xmm1, xmm5 ; xmm1=tmp12 + + movdqa xmm6, xmm4 + movdqa xmm7, xmm0 + psubw xmm4, xmm5 ; xmm4=tmp3 + psubw xmm0, xmm1 ; xmm0=tmp2 + paddw xmm6, xmm5 ; xmm6=tmp0 + paddw xmm7, xmm1 ; xmm7=tmp1 + + movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=tmp3 + movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=tmp2 + + ; -- Odd part + + movdqa xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + movdqa xmm5, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + pmullw xmm5, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_IFAST_MULT_TYPE)] + + movdqa xmm4, xmm2 + movdqa xmm0, xmm5 + psubw xmm2, xmm1 ; xmm2=z12 + psubw xmm5, xmm3 ; xmm5=z10 + paddw xmm4, xmm1 ; xmm4=z11 + paddw xmm0, xmm3 ; xmm0=z13 + + movdqa xmm1, xmm5 ; xmm1=z10(unscaled) + psllw xmm2, PRE_MULTIPLY_SCALE_BITS + psllw xmm5, PRE_MULTIPLY_SCALE_BITS + + movdqa xmm3, xmm4 + psubw xmm4, xmm0 + paddw xmm3, xmm0 ; xmm3=tmp7 + + psllw xmm4, PRE_MULTIPLY_SCALE_BITS + pmulhw xmm4, [rel PW_F1414] ; xmm4=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movdqa xmm0, xmm5 + paddw xmm5, xmm2 + pmulhw xmm5, [rel PW_F1847] ; xmm5=z5 + pmulhw xmm0, [rel PW_MF1613] + pmulhw xmm2, [rel PW_F1082] + psubw xmm0, xmm1 + psubw xmm2, xmm5 ; xmm2=tmp10 + paddw xmm0, xmm5 ; xmm0=tmp12 + + ; -- Final output stage + + psubw xmm0, xmm3 ; xmm0=tmp6 + movdqa xmm1, xmm6 + movdqa xmm5, xmm7 + paddw xmm6, xmm3 ; xmm6=data0=(00 01 02 03 04 05 06 07) + paddw xmm7, xmm0 ; xmm7=data1=(10 11 12 13 14 15 16 17) + psubw xmm1, xmm3 ; xmm1=data7=(70 71 72 73 74 75 76 77) + psubw xmm5, xmm0 ; xmm5=data6=(60 61 62 63 64 65 66 67) + psubw xmm4, xmm0 ; xmm4=tmp5 + + movdqa xmm3, xmm6 ; transpose coefficients(phase 1) + punpcklwd xmm6, xmm7 ; xmm6=(00 10 01 11 02 12 03 13) + punpckhwd xmm3, xmm7 ; xmm3=(04 14 05 15 06 16 07 17) + movdqa xmm0, xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5, xmm1 ; xmm5=(60 70 61 71 62 72 63 73) + punpckhwd xmm0, xmm1 ; xmm0=(64 74 65 75 66 76 67 77) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp2 + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=tmp3 + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(60 70 61 71 62 72 63 73) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(64 74 65 75 66 76 67 77) + + paddw xmm2, xmm4 ; xmm2=tmp4 + movdqa xmm5, xmm7 + movdqa xmm0, xmm1 + paddw xmm7, xmm4 ; xmm7=data2=(20 21 22 23 24 25 26 27) + paddw xmm1, xmm2 ; xmm1=data4=(40 41 42 43 44 45 46 47) + psubw xmm5, xmm4 ; xmm5=data5=(50 51 52 53 54 55 56 57) + psubw xmm0, xmm2 ; xmm0=data3=(30 31 32 33 34 35 36 37) + + movdqa xmm4, xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7, xmm0 ; xmm7=(20 30 21 31 22 32 23 33) + punpckhwd xmm4, xmm0 ; xmm4=(24 34 25 35 26 36 27 37) + movdqa xmm2, xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1, xmm5 ; xmm1=(40 50 41 51 42 52 43 53) + punpckhwd xmm2, xmm5 ; xmm2=(44 54 45 55 46 56 47 57) + + movdqa xmm0, xmm3 ; transpose coefficients(phase 2) + punpckldq xmm3, xmm4 ; xmm3=(04 14 24 34 05 15 25 35) + punpckhdq xmm0, xmm4 ; xmm0=(06 16 26 36 07 17 27 37) + movdqa xmm5, xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6, xmm7 ; xmm6=(00 10 20 30 01 11 21 31) + punpckhdq xmm5, xmm7 ; xmm5=(02 12 22 32 03 13 23 33) + + movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(60 70 61 71 62 72 63 73) + movdqa xmm7, XMMWORD [wk(1)] ; xmm7=(64 74 65 75 66 76 67 77) + + movdqa XMMWORD [wk(0)], xmm3 ; wk(0)=(04 14 24 34 05 15 25 35) + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=(06 16 26 36 07 17 27 37) + + movdqa xmm3, xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1, xmm4 ; xmm1=(40 50 60 70 41 51 61 71) + punpckhdq xmm3, xmm4 ; xmm3=(42 52 62 72 43 53 63 73) + movdqa xmm0, xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2, xmm7 ; xmm2=(44 54 64 74 45 55 65 75) + punpckhdq xmm0, xmm7 ; xmm0=(46 56 66 76 47 57 67 77) + + movdqa xmm4, xmm6 ; transpose coefficients(phase 3) + punpcklqdq xmm6, xmm1 ; xmm6=col0=(00 10 20 30 40 50 60 70) + punpckhqdq xmm4, xmm1 ; xmm4=col1=(01 11 21 31 41 51 61 71) + movdqa xmm7, xmm5 ; transpose coefficients(phase 3) + punpcklqdq xmm5, xmm3 ; xmm5=col2=(02 12 22 32 42 52 62 72) + punpckhqdq xmm7, xmm3 ; xmm7=col3=(03 13 23 33 43 53 63 73) + + movdqa xmm1, XMMWORD [wk(0)] ; xmm1=(04 14 24 34 05 15 25 35) + movdqa xmm3, XMMWORD [wk(1)] ; xmm3=(06 16 26 36 07 17 27 37) + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=col1 + movdqa XMMWORD [wk(1)], xmm7 ; wk(1)=col3 + + movdqa xmm4, xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1, xmm2 ; xmm1=col4=(04 14 24 34 44 54 64 74) + punpckhqdq xmm4, xmm2 ; xmm4=col5=(05 15 25 35 45 55 65 75) + movdqa xmm7, xmm3 ; transpose coefficients(phase 3) + punpcklqdq xmm3, xmm0 ; xmm3=col6=(06 16 26 36 46 56 66 76) + punpckhqdq xmm7, xmm0 ; xmm7=col7=(07 17 27 37 47 57 67 77) +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov rax, [original_rbp] + mov rdi, r12 ; (JSAMPROW *) + mov eax, r13d + + ; -- Even part + + ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6 + + movdqa xmm2, xmm6 + movdqa xmm0, xmm5 + psubw xmm6, xmm1 ; xmm6=tmp11 + psubw xmm5, xmm3 + paddw xmm2, xmm1 ; xmm2=tmp10 + paddw xmm0, xmm3 ; xmm0=tmp13 + + psllw xmm5, PRE_MULTIPLY_SCALE_BITS + pmulhw xmm5, [rel PW_F1414] + psubw xmm5, xmm0 ; xmm5=tmp12 + + movdqa xmm1, xmm2 + movdqa xmm3, xmm6 + psubw xmm2, xmm0 ; xmm2=tmp3 + psubw xmm6, xmm5 ; xmm6=tmp2 + paddw xmm1, xmm0 ; xmm1=tmp0 + paddw xmm3, xmm5 ; xmm3=tmp1 + + movdqa xmm0, XMMWORD [wk(0)] ; xmm0=col1 + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=col3 + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp3 + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=tmp2 + + ; -- Odd part + + ; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7 + + movdqa xmm2, xmm0 + movdqa xmm6, xmm4 + psubw xmm0, xmm7 ; xmm0=z12 + psubw xmm4, xmm5 ; xmm4=z10 + paddw xmm2, xmm7 ; xmm2=z11 + paddw xmm6, xmm5 ; xmm6=z13 + + movdqa xmm7, xmm4 ; xmm7=z10(unscaled) + psllw xmm0, PRE_MULTIPLY_SCALE_BITS + psllw xmm4, PRE_MULTIPLY_SCALE_BITS + + movdqa xmm5, xmm2 + psubw xmm2, xmm6 + paddw xmm5, xmm6 ; xmm5=tmp7 + + psllw xmm2, PRE_MULTIPLY_SCALE_BITS + pmulhw xmm2, [rel PW_F1414] ; xmm2=tmp11 + + ; To avoid overflow... + ; + ; (Original) + ; tmp12 = -2.613125930 * z10 + z5; + ; + ; (This implementation) + ; tmp12 = (-1.613125930 - 1) * z10 + z5; + ; = -1.613125930 * z10 - z10 + z5; + + movdqa xmm6, xmm4 + paddw xmm4, xmm0 + pmulhw xmm4, [rel PW_F1847] ; xmm4=z5 + pmulhw xmm6, [rel PW_MF1613] + pmulhw xmm0, [rel PW_F1082] + psubw xmm6, xmm7 + psubw xmm0, xmm4 ; xmm0=tmp10 + paddw xmm6, xmm4 ; xmm6=tmp12 + + ; -- Final output stage + + psubw xmm6, xmm5 ; xmm6=tmp6 + movdqa xmm7, xmm1 + movdqa xmm4, xmm3 + paddw xmm1, xmm5 ; xmm1=data0=(00 10 20 30 40 50 60 70) + paddw xmm3, xmm6 ; xmm3=data1=(01 11 21 31 41 51 61 71) + psraw xmm1, (PASS1_BITS+3) ; descale + psraw xmm3, (PASS1_BITS+3) ; descale + psubw xmm7, xmm5 ; xmm7=data7=(07 17 27 37 47 57 67 77) + psubw xmm4, xmm6 ; xmm4=data6=(06 16 26 36 46 56 66 76) + psraw xmm7, (PASS1_BITS+3) ; descale + psraw xmm4, (PASS1_BITS+3) ; descale + psubw xmm2, xmm6 ; xmm2=tmp5 + + packsswb xmm1, xmm4 ; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + packsswb xmm3, xmm7 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm5, XMMWORD [wk(1)] ; xmm5=tmp2 + movdqa xmm6, XMMWORD [wk(0)] ; xmm6=tmp3 + + paddw xmm0, xmm2 ; xmm0=tmp4 + movdqa xmm4, xmm5 + movdqa xmm7, xmm6 + paddw xmm5, xmm2 ; xmm5=data2=(02 12 22 32 42 52 62 72) + paddw xmm6, xmm0 ; xmm6=data4=(04 14 24 34 44 54 64 74) + psraw xmm5, (PASS1_BITS+3) ; descale + psraw xmm6, (PASS1_BITS+3) ; descale + psubw xmm4, xmm2 ; xmm4=data5=(05 15 25 35 45 55 65 75) + psubw xmm7, xmm0 ; xmm7=data3=(03 13 23 33 43 53 63 73) + psraw xmm4, (PASS1_BITS+3) ; descale + psraw xmm7, (PASS1_BITS+3) ; descale + + movdqa xmm2, [rel PB_CENTERJSAMP] ; xmm2=[rel PB_CENTERJSAMP] + + packsswb xmm5, xmm6 ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) + packsswb xmm7, xmm4 ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) + + paddb xmm1, xmm2 + paddb xmm3, xmm2 + paddb xmm5, xmm2 + paddb xmm7, xmm2 + + movdqa xmm0, xmm1 ; transpose coefficients(phase 1) + punpcklbw xmm1, xmm3 ; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) + punpckhbw xmm0, xmm3 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) + movdqa xmm6, xmm5 ; transpose coefficients(phase 1) + punpcklbw xmm5, xmm7 ; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) + punpckhbw xmm6, xmm7 ; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) + + movdqa xmm4, xmm1 ; transpose coefficients(phase 2) + punpcklwd xmm1, xmm5 ; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4, xmm5 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) + movdqa xmm2, xmm6 ; transpose coefficients(phase 2) + punpcklwd xmm6, xmm0 ; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + punpckhwd xmm2, xmm0 ; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) + + movdqa xmm3, xmm1 ; transpose coefficients(phase 3) + punpckldq xmm1, xmm6 ; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm3, xmm6 ; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + movdqa xmm7, xmm4 ; transpose coefficients(phase 3) + punpckldq xmm4, xmm2 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + punpckhdq xmm7, xmm2 ; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + pshufd xmm5, xmm1, 0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm0, xmm3, 0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + pshufd xmm6, xmm4, 0x4E ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) + pshufd xmm2, xmm7, 0x4E ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) + + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3 + mov rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7 + + mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0 + mov rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2 + + uncollect_args 4 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jidctint-avx2.asm b/simd/x86_64/jidctint-avx2.asm new file mode 100644 index 0000000..b60b44f --- /dev/null +++ b/simd/x86_64/jidctint-avx2.asm @@ -0,0 +1,419 @@ +; +; jidctint.asm - accurate integer IDCT (64-bit AVX2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, 2018, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; inverse DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jidctint.c; see the jidctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS - PASS1_BITS) +%define DESCALE_P2 (CONST_BITS + PASS1_BITS + 3) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_0_298 equ DESCALE( 320652955, 30 - CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276, 30 - CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887, 30 - CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413, 30 - CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111, 30 - CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813, 30 - CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267, 30 - CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350, 30 - CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- +; In-place 8x8x16-bit inverse matrix transpose using AVX2 instructions +; %1-%4: Input/output registers +; %5-%8: Temp registers + +%macro dotranspose 8 + ; %5=(00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71) + ; %6=(03 13 23 33 43 53 63 73 02 12 22 32 42 52 62 72) + ; %7=(04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75) + ; %8=(07 17 27 37 47 57 67 77 06 16 26 36 46 56 66 76) + + vpermq %5, %1, 0xD8 + vpermq %6, %2, 0x72 + vpermq %7, %3, 0xD8 + vpermq %8, %4, 0x72 + ; transpose coefficients(phase 1) + ; %5=(00 10 20 30 01 11 21 31 40 50 60 70 41 51 61 71) + ; %6=(02 12 22 32 03 13 23 33 42 52 62 72 43 53 63 73) + ; %7=(04 14 24 34 05 15 25 35 44 54 64 74 45 55 65 75) + ; %8=(06 16 26 36 07 17 27 37 46 56 66 76 47 57 67 77) + + vpunpcklwd %1, %5, %6 + vpunpckhwd %2, %5, %6 + vpunpcklwd %3, %7, %8 + vpunpckhwd %4, %7, %8 + ; transpose coefficients(phase 2) + ; %1=(00 02 10 12 20 22 30 32 40 42 50 52 60 62 70 72) + ; %2=(01 03 11 13 21 23 31 33 41 43 51 53 61 63 71 73) + ; %3=(04 06 14 16 24 26 34 36 44 46 54 56 64 66 74 76) + ; %4=(05 07 15 17 25 27 35 37 45 47 55 57 65 67 75 77) + + vpunpcklwd %5, %1, %2 + vpunpcklwd %6, %3, %4 + vpunpckhwd %7, %1, %2 + vpunpckhwd %8, %3, %4 + ; transpose coefficients(phase 3) + ; %5=(00 01 02 03 10 11 12 13 40 41 42 43 50 51 52 53) + ; %6=(04 05 06 07 14 15 16 17 44 45 46 47 54 55 56 57) + ; %7=(20 21 22 23 30 31 32 33 60 61 62 63 70 71 72 73) + ; %8=(24 25 26 27 34 35 36 37 64 65 66 67 74 75 76 77) + + vpunpcklqdq %1, %5, %6 + vpunpckhqdq %2, %5, %6 + vpunpcklqdq %3, %7, %8 + vpunpckhqdq %4, %7, %8 + ; transpose coefficients(phase 4) + ; %1=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47) + ; %2=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57) + ; %3=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67) + ; %4=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77) +%endmacro + +; -------------------------------------------------------------------------- +; In-place 8x8x16-bit slow integer inverse DCT using AVX2 instructions +; %1-%4: Input/output registers +; %5-%12: Temp registers +; %9: Pass (1 or 2) + +%macro dodct 13 + ; -- Even part + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + vperm2i128 %6, %3, %3, 0x01 ; %6=in6_2 + vpunpcklwd %5, %3, %6 ; %5=in26_62L + vpunpckhwd %6, %3, %6 ; %6=in26_62H + vpmaddwd %5, %5, [rel PW_F130_F054_MF130_F054] ; %5=tmp3_2L + vpmaddwd %6, %6, [rel PW_F130_F054_MF130_F054] ; %6=tmp3_2H + + vperm2i128 %7, %1, %1, 0x01 ; %7=in4_0 + vpsignw %1, %1, [rel PW_1_NEG1] + vpaddw %7, %7, %1 ; %7=(in0+in4)_(in0-in4) + + vpxor %1, %1, %1 + vpunpcklwd %8, %1, %7 ; %8=tmp0_1L + vpunpckhwd %1, %1, %7 ; %1=tmp0_1H + vpsrad %8, %8, (16-CONST_BITS) ; vpsrad %8,16 & vpslld %8,CONST_BITS + vpsrad %1, %1, (16-CONST_BITS) ; vpsrad %1,16 & vpslld %1,CONST_BITS + + vpsubd %11, %8, %5 ; %11=tmp0_1L-tmp3_2L=tmp13_12L + vpaddd %9, %8, %5 ; %9=tmp0_1L+tmp3_2L=tmp10_11L + vpsubd %12, %1, %6 ; %12=tmp0_1H-tmp3_2H=tmp13_12H + vpaddd %10, %1, %6 ; %10=tmp0_1H+tmp3_2H=tmp10_11H + + ; -- Odd part + + vpaddw %1, %4, %2 ; %1=in7_5+in3_1=z3_4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + vperm2i128 %8, %1, %1, 0x01 ; %8=z4_3 + vpunpcklwd %7, %1, %8 ; %7=z34_43L + vpunpckhwd %8, %1, %8 ; %8=z34_43H + vpmaddwd %7, %7, [rel PW_MF078_F117_F078_F117] ; %7=z3_4L + vpmaddwd %8, %8, [rel PW_MF078_F117_F078_F117] ; %8=z3_4H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + vperm2i128 %2, %2, %2, 0x01 ; %2=in1_3 + vpunpcklwd %3, %4, %2 ; %3=in71_53L + vpunpckhwd %4, %4, %2 ; %4=in71_53H + + vpmaddwd %5, %3, [rel PW_MF060_MF089_MF050_MF256] ; %5=tmp0_1L + vpmaddwd %6, %4, [rel PW_MF060_MF089_MF050_MF256] ; %6=tmp0_1H + vpaddd %5, %5, %7 ; %5=tmp0_1L+z3_4L=tmp0_1L + vpaddd %6, %6, %8 ; %6=tmp0_1H+z3_4H=tmp0_1H + + vpmaddwd %3, %3, [rel PW_MF089_F060_MF256_F050] ; %3=tmp3_2L + vpmaddwd %4, %4, [rel PW_MF089_F060_MF256_F050] ; %4=tmp3_2H + vperm2i128 %7, %7, %7, 0x01 ; %7=z4_3L + vperm2i128 %8, %8, %8, 0x01 ; %8=z4_3H + vpaddd %7, %3, %7 ; %7=tmp3_2L+z4_3L=tmp3_2L + vpaddd %8, %4, %8 ; %8=tmp3_2H+z4_3H=tmp3_2H + + ; -- Final output stage + + vpaddd %1, %9, %7 ; %1=tmp10_11L+tmp3_2L=data0_1L + vpaddd %2, %10, %8 ; %2=tmp10_11H+tmp3_2H=data0_1H + vpaddd %1, %1, [rel PD_DESCALE_P %+ %13] + vpaddd %2, %2, [rel PD_DESCALE_P %+ %13] + vpsrad %1, %1, DESCALE_P %+ %13 + vpsrad %2, %2, DESCALE_P %+ %13 + vpackssdw %1, %1, %2 ; %1=data0_1 + + vpsubd %3, %9, %7 ; %3=tmp10_11L-tmp3_2L=data7_6L + vpsubd %4, %10, %8 ; %4=tmp10_11H-tmp3_2H=data7_6H + vpaddd %3, %3, [rel PD_DESCALE_P %+ %13] + vpaddd %4, %4, [rel PD_DESCALE_P %+ %13] + vpsrad %3, %3, DESCALE_P %+ %13 + vpsrad %4, %4, DESCALE_P %+ %13 + vpackssdw %4, %3, %4 ; %4=data7_6 + + vpaddd %7, %11, %5 ; %7=tmp13_12L+tmp0_1L=data3_2L + vpaddd %8, %12, %6 ; %8=tmp13_12H+tmp0_1H=data3_2H + vpaddd %7, %7, [rel PD_DESCALE_P %+ %13] + vpaddd %8, %8, [rel PD_DESCALE_P %+ %13] + vpsrad %7, %7, DESCALE_P %+ %13 + vpsrad %8, %8, DESCALE_P %+ %13 + vpackssdw %2, %7, %8 ; %2=data3_2 + + vpsubd %7, %11, %5 ; %7=tmp13_12L-tmp0_1L=data4_5L + vpsubd %8, %12, %6 ; %8=tmp13_12H-tmp0_1H=data4_5H + vpaddd %7, %7, [rel PD_DESCALE_P %+ %13] + vpaddd %8, %8, [rel PD_DESCALE_P %+ %13] + vpsrad %7, %7, DESCALE_P %+ %13 + vpsrad %8, %8, DESCALE_P %+ %13 + vpackssdw %3, %7, %8 ; %3=data4_5 +%endmacro + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_idct_islow_avx2) + +EXTN(jconst_idct_islow_avx2): + +PW_F130_F054_MF130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541 + times 4 dw (F_0_541 - F_1_847), F_0_541 +PW_MF078_F117_F078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175 + times 4 dw (F_1_175 - F_0_390), F_1_175 +PW_MF060_MF089_MF050_MF256 times 4 dw (F_0_298 - F_0_899), -F_0_899 + times 4 dw (F_2_053 - F_2_562), -F_2_562 +PW_MF089_F060_MF256_F050 times 4 dw -F_0_899, (F_1_501 - F_0_899) + times 4 dw -F_2_562, (F_3_072 - F_2_562) +PD_DESCALE_P1 times 8 dd 1 << (DESCALE_P1 - 1) +PD_DESCALE_P2 times 8 dd 1 << (DESCALE_P2 - 1) +PB_CENTERJSAMP times 32 db CENTERJSAMPLE +PW_1_NEG1 times 8 dw 1 + times 8 dw -1 + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_islow_avx2(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +; r10 = jpeg_component_info *compptr +; r11 = JCOEFPTR coef_block +; r12 = JSAMPARRAY output_buf +; r13d = JDIMENSION output_col + + align 32 + GLOBAL_FUNCTION(jsimd_idct_islow_avx2) + +EXTN(jsimd_idct_islow_avx2): + push rbp + mov rax, rsp ; rax = original rbp + mov rbp, rsp ; rbp = aligned rbp + push_xmm 4 + collect_args 4 + + ; ---- Pass 1: process columns. + +%ifndef NO_ZERO_COLUMN_TEST_ISLOW_AVX2 + mov eax, DWORD [DWBLOCK(1,0,r11,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,r11,SIZEOF_JCOEF)] + jnz near .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,r11,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,r11,SIZEOF_JCOEF)] + vpor xmm0, xmm0, XMMWORD [XMMBLOCK(3,0,r11,SIZEOF_JCOEF)] + vpor xmm1, xmm1, XMMWORD [XMMBLOCK(4,0,r11,SIZEOF_JCOEF)] + vpor xmm0, xmm0, XMMWORD [XMMBLOCK(5,0,r11,SIZEOF_JCOEF)] + vpor xmm1, xmm1, XMMWORD [XMMBLOCK(6,0,r11,SIZEOF_JCOEF)] + vpor xmm0, xmm0, XMMWORD [XMMBLOCK(7,0,r11,SIZEOF_JCOEF)] + vpor xmm1, xmm1, xmm0 + vpacksswb xmm1, xmm1, xmm1 + vpacksswb xmm1, xmm1, xmm1 + movd eax, xmm1 + test rax, rax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm5, XMMWORD [XMMBLOCK(0,0,r11,SIZEOF_JCOEF)] + vpmullw xmm5, xmm5, XMMWORD [XMMBLOCK(0,0,r10,SIZEOF_ISLOW_MULT_TYPE)] + + vpsllw xmm5, xmm5, PASS1_BITS + + vpunpcklwd xmm4, xmm5, xmm5 ; xmm4=(00 00 01 01 02 02 03 03) + vpunpckhwd xmm5, xmm5, xmm5 ; xmm5=(04 04 05 05 06 06 07 07) + vinserti128 ymm4, ymm4, xmm5, 1 + + vpshufd ymm0, ymm4, 0x00 ; ymm0=col0_4=(00 00 00 00 00 00 00 00 04 04 04 04 04 04 04 04) + vpshufd ymm1, ymm4, 0x55 ; ymm1=col1_5=(01 01 01 01 01 01 01 01 05 05 05 05 05 05 05 05) + vpshufd ymm2, ymm4, 0xAA ; ymm2=col2_6=(02 02 02 02 02 02 02 02 06 06 06 06 06 06 06 06) + vpshufd ymm3, ymm4, 0xFF ; ymm3=col3_7=(03 03 03 03 03 03 03 03 07 07 07 07 07 07 07 07) + + jmp near .column_end +%endif +.columnDCT: + + vmovdqu ymm4, YMMWORD [YMMBLOCK(0,0,r11,SIZEOF_JCOEF)] ; ymm4=in0_1 + vmovdqu ymm5, YMMWORD [YMMBLOCK(2,0,r11,SIZEOF_JCOEF)] ; ymm5=in2_3 + vmovdqu ymm6, YMMWORD [YMMBLOCK(4,0,r11,SIZEOF_JCOEF)] ; ymm6=in4_5 + vmovdqu ymm7, YMMWORD [YMMBLOCK(6,0,r11,SIZEOF_JCOEF)] ; ymm7=in6_7 + vpmullw ymm4, ymm4, YMMWORD [YMMBLOCK(0,0,r10,SIZEOF_ISLOW_MULT_TYPE)] + vpmullw ymm5, ymm5, YMMWORD [YMMBLOCK(2,0,r10,SIZEOF_ISLOW_MULT_TYPE)] + vpmullw ymm6, ymm6, YMMWORD [YMMBLOCK(4,0,r10,SIZEOF_ISLOW_MULT_TYPE)] + vpmullw ymm7, ymm7, YMMWORD [YMMBLOCK(6,0,r10,SIZEOF_ISLOW_MULT_TYPE)] + + vperm2i128 ymm0, ymm4, ymm6, 0x20 ; ymm0=in0_4 + vperm2i128 ymm1, ymm5, ymm4, 0x31 ; ymm1=in3_1 + vperm2i128 ymm2, ymm5, ymm7, 0x20 ; ymm2=in2_6 + vperm2i128 ymm3, ymm7, ymm6, 0x31 ; ymm3=in7_5 + + dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 1 + ; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm3=data7_6 + + dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7 + ; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm3=data3_7 + +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [r11 + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [r11 + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [r11 + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [r11 + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows. + + vperm2i128 ymm4, ymm3, ymm1, 0x31 ; ymm3=in7_5 + vperm2i128 ymm1, ymm3, ymm1, 0x20 ; ymm1=in3_1 + + dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 2 + ; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm4=data7_6 + + dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7 + ; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm4=data3_7 + + vpacksswb ymm0, ymm0, ymm1 ; ymm0=data01_45 + vpacksswb ymm1, ymm2, ymm4 ; ymm1=data23_67 + vpaddb ymm0, ymm0, [rel PB_CENTERJSAMP] + vpaddb ymm1, ymm1, [rel PB_CENTERJSAMP] + + vextracti128 xmm6, ymm1, 1 ; xmm3=data67 + vextracti128 xmm4, ymm0, 1 ; xmm2=data45 + vextracti128 xmm2, ymm1, 0 ; xmm1=data23 + vextracti128 xmm0, ymm0, 0 ; xmm0=data01 + + vpshufd xmm1, xmm0, 0x4E ; xmm1=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + vpshufd xmm3, xmm2, 0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + vpshufd xmm5, xmm4, 0x4E ; xmm5=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) + vpshufd xmm7, xmm6, 0x4E ; xmm7=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) + + vzeroupper + + mov eax, r13d + + mov rdx, JSAMPROW [r12+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rsi, JSAMPROW [r12+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm0 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm1 + + mov rdx, JSAMPROW [r12+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rsi, JSAMPROW [r12+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3 + + mov rdx, JSAMPROW [r12+4*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rsi, JSAMPROW [r12+5*SIZEOF_JSAMPROW] ; (JSAMPLE *) + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5 + + mov rdx, JSAMPROW [r12+6*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rsi, JSAMPROW [r12+7*SIZEOF_JSAMPROW] ; (JSAMPLE *) + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7 + + uncollect_args 4 + pop_xmm 4 + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jidctint-sse2.asm b/simd/x86_64/jidctint-sse2.asm new file mode 100644 index 0000000..83fc344 --- /dev/null +++ b/simd/x86_64/jidctint-sse2.asm @@ -0,0 +1,848 @@ +; +; jidctint.asm - accurate integer IDCT (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains a slow-but-accurate integer implementation of the +; inverse DCT (Discrete Cosine Transform). The following code is based +; directly on the IJG's original jidctint.c; see the jidctint.c for +; more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1 (CONST_BITS - PASS1_BITS) +%define DESCALE_P2 (CONST_BITS + PASS1_BITS + 3) + +%if CONST_BITS == 13 +F_0_298 equ 2446 ; FIX(0.298631336) +F_0_390 equ 3196 ; FIX(0.390180644) +F_0_541 equ 4433 ; FIX(0.541196100) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_175 equ 9633 ; FIX(1.175875602) +F_1_501 equ 12299 ; FIX(1.501321110) +F_1_847 equ 15137 ; FIX(1.847759065) +F_1_961 equ 16069 ; FIX(1.961570560) +F_2_053 equ 16819 ; FIX(2.053119869) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_072 equ 25172 ; FIX(3.072711026) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_0_298 equ DESCALE( 320652955, 30 - CONST_BITS) ; FIX(0.298631336) +F_0_390 equ DESCALE( 418953276, 30 - CONST_BITS) ; FIX(0.390180644) +F_0_541 equ DESCALE( 581104887, 30 - CONST_BITS) ; FIX(0.541196100) +F_0_765 equ DESCALE( 821806413, 30 - CONST_BITS) ; FIX(0.765366865) +F_0_899 equ DESCALE( 966342111, 30 - CONST_BITS) ; FIX(0.899976223) +F_1_175 equ DESCALE(1262586813, 30 - CONST_BITS) ; FIX(1.175875602) +F_1_501 equ DESCALE(1612031267, 30 - CONST_BITS) ; FIX(1.501321110) +F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065) +F_1_961 equ DESCALE(2106220350, 30 - CONST_BITS) ; FIX(1.961570560) +F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869) +F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447) +F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_idct_islow_sse2) + +EXTN(jconst_idct_islow_sse2): + +PW_F130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541 +PW_F054_MF130 times 4 dw F_0_541, (F_0_541 - F_1_847) +PW_MF078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175 +PW_F117_F078 times 4 dw F_1_175, (F_1_175 - F_0_390) +PW_MF060_MF089 times 4 dw (F_0_298 - F_0_899), -F_0_899 +PW_MF089_F060 times 4 dw -F_0_899, (F_1_501 - F_0_899) +PW_MF050_MF256 times 4 dw (F_2_053 - F_2_562), -F_2_562 +PW_MF256_F050 times 4 dw -F_2_562, (F_3_072 - F_2_562) +PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1) +PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform dequantization and inverse DCT on one block of coefficients. +; +; GLOBAL(void) +; jsimd_idct_islow_sse2(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +; r10 = jpeg_component_info *compptr +; r11 = JCOEFPTR coef_block +; r12 = JSAMPARRAY output_buf +; r13d = JDIMENSION output_col + +%define original_rbp rbp + 0 +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 12 + + align 32 + GLOBAL_FUNCTION(jsimd_idct_islow_sse2) + +EXTN(jsimd_idct_islow_sse2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args 4 + + ; ---- Pass 1: process columns from input. + + mov rdx, r10 ; quantptr + mov rsi, r11 ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2 + mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] + jnz near .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + por xmm1, xmm0 + packsswb xmm1, xmm1 + packsswb xmm1, xmm1 + movd eax, xmm1 + test rax, rax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm5, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + pmullw xmm5, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw xmm5, PASS1_BITS + + movdqa xmm4, xmm5 ; xmm5=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm5, xmm5 ; xmm5=(00 00 01 01 02 02 03 03) + punpckhwd xmm4, xmm4 ; xmm4=(04 04 05 05 06 06 07 07) + + pshufd xmm7, xmm5, 0x00 ; xmm7=col0=(00 00 00 00 00 00 00 00) + pshufd xmm6, xmm5, 0x55 ; xmm6=col1=(01 01 01 01 01 01 01 01) + pshufd xmm1, xmm5, 0xAA ; xmm1=col2=(02 02 02 02 02 02 02 02) + pshufd xmm5, xmm5, 0xFF ; xmm5=col3=(03 03 03 03 03 03 03 03) + pshufd xmm0, xmm4, 0x00 ; xmm0=col4=(04 04 04 04 04 04 04 04) + pshufd xmm3, xmm4, 0x55 ; xmm3=col5=(05 05 05 05 05 05 05 05) + pshufd xmm2, xmm4, 0xAA ; xmm2=col6=(06 06 06 06 06 06 06 06) + pshufd xmm4, xmm4, 0xFF ; xmm4=col7=(07 07 07 07 07 07 07 07) + + movdqa XMMWORD [wk(8)], xmm6 ; wk(8)=col1 + movdqa XMMWORD [wk(9)], xmm5 ; wk(9)=col3 + movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 + movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 + jmp near .column_end +%endif +.columnDCT: + + ; -- Even part + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movdqa xmm4, xmm1 ; xmm1=in2=z2 + movdqa xmm5, xmm1 + punpcklwd xmm4, xmm3 ; xmm3=in6=z3 + punpckhwd xmm5, xmm3 + movdqa xmm1, xmm4 + movdqa xmm3, xmm5 + pmaddwd xmm4, [rel PW_F130_F054] ; xmm4=tmp3L + pmaddwd xmm5, [rel PW_F130_F054] ; xmm5=tmp3H + pmaddwd xmm1, [rel PW_F054_MF130] ; xmm1=tmp2L + pmaddwd xmm3, [rel PW_F054_MF130] ; xmm3=tmp2H + + movdqa xmm6, xmm0 + paddw xmm0, xmm2 ; xmm0=in0+in4 + psubw xmm6, xmm2 ; xmm6=in0-in4 + + pxor xmm7, xmm7 + pxor xmm2, xmm2 + punpcklwd xmm7, xmm0 ; xmm7=tmp0L + punpckhwd xmm2, xmm0 ; xmm2=tmp0H + psrad xmm7, (16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS + psrad xmm2, (16-CONST_BITS) ; psrad xmm2,16 & pslld xmm2,CONST_BITS + + movdqa xmm0, xmm7 + paddd xmm7, xmm4 ; xmm7=tmp10L + psubd xmm0, xmm4 ; xmm0=tmp13L + movdqa xmm4, xmm2 + paddd xmm2, xmm5 ; xmm2=tmp10H + psubd xmm4, xmm5 ; xmm4=tmp13H + + movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=tmp10L + movdqa XMMWORD [wk(1)], xmm2 ; wk(1)=tmp10H + movdqa XMMWORD [wk(2)], xmm0 ; wk(2)=tmp13L + movdqa XMMWORD [wk(3)], xmm4 ; wk(3)=tmp13H + + pxor xmm5, xmm5 + pxor xmm7, xmm7 + punpcklwd xmm5, xmm6 ; xmm5=tmp1L + punpckhwd xmm7, xmm6 ; xmm7=tmp1H + psrad xmm5, (16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS + psrad xmm7, (16-CONST_BITS) ; psrad xmm7,16 & pslld xmm7,CONST_BITS + + movdqa xmm2, xmm5 + paddd xmm5, xmm1 ; xmm5=tmp11L + psubd xmm2, xmm1 ; xmm2=tmp12L + movdqa xmm0, xmm7 + paddd xmm7, xmm3 ; xmm7=tmp11H + psubd xmm0, xmm3 ; xmm0=tmp12H + + movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L + movdqa XMMWORD [wk(5)], xmm7 ; wk(5)=tmp11H + movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=tmp12L + movdqa XMMWORD [wk(7)], xmm0 ; wk(7)=tmp12H + + ; -- Odd part + + movdqa xmm4, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm6, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + pmullw xmm4, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm6, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm1, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + pmullw xmm1, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm5, xmm6 + movdqa xmm7, xmm4 + paddw xmm5, xmm3 ; xmm5=z3 + paddw xmm7, xmm1 ; xmm7=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm2, xmm5 + movdqa xmm0, xmm5 + punpcklwd xmm2, xmm7 + punpckhwd xmm0, xmm7 + movdqa xmm5, xmm2 + movdqa xmm7, xmm0 + pmaddwd xmm2, [rel PW_MF078_F117] ; xmm2=z3L + pmaddwd xmm0, [rel PW_MF078_F117] ; xmm0=z3H + pmaddwd xmm5, [rel PW_F117_F078] ; xmm5=z4L + pmaddwd xmm7, [rel PW_F117_F078] ; xmm7=z4H + + movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=z3L + movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movdqa xmm2, xmm3 + movdqa xmm0, xmm3 + punpcklwd xmm2, xmm4 + punpckhwd xmm0, xmm4 + movdqa xmm3, xmm2 + movdqa xmm4, xmm0 + pmaddwd xmm2, [rel PW_MF060_MF089] ; xmm2=tmp0L + pmaddwd xmm0, [rel PW_MF060_MF089] ; xmm0=tmp0H + pmaddwd xmm3, [rel PW_MF089_F060] ; xmm3=tmp3L + pmaddwd xmm4, [rel PW_MF089_F060] ; xmm4=tmp3H + + paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp0L + paddd xmm0, XMMWORD [wk(11)] ; xmm0=tmp0H + paddd xmm3, xmm5 ; xmm3=tmp3L + paddd xmm4, xmm7 ; xmm4=tmp3H + + movdqa XMMWORD [wk(8)], xmm2 ; wk(8)=tmp0L + movdqa XMMWORD [wk(9)], xmm0 ; wk(9)=tmp0H + + movdqa xmm2, xmm1 + movdqa xmm0, xmm1 + punpcklwd xmm2, xmm6 + punpckhwd xmm0, xmm6 + movdqa xmm1, xmm2 + movdqa xmm6, xmm0 + pmaddwd xmm2, [rel PW_MF050_MF256] ; xmm2=tmp1L + pmaddwd xmm0, [rel PW_MF050_MF256] ; xmm0=tmp1H + pmaddwd xmm1, [rel PW_MF256_F050] ; xmm1=tmp2L + pmaddwd xmm6, [rel PW_MF256_F050] ; xmm6=tmp2H + + paddd xmm2, xmm5 ; xmm2=tmp1L + paddd xmm0, xmm7 ; xmm0=tmp1H + paddd xmm1, XMMWORD [wk(10)] ; xmm1=tmp2L + paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H + + movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=tmp1L + movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=tmp1H + + ; -- Final output stage + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L + movdqa xmm7, XMMWORD [wk(1)] ; xmm7=tmp10H + + movdqa xmm2, xmm5 + movdqa xmm0, xmm7 + paddd xmm5, xmm3 ; xmm5=data0L + paddd xmm7, xmm4 ; xmm7=data0H + psubd xmm2, xmm3 ; xmm2=data7L + psubd xmm0, xmm4 ; xmm0=data7H + + movdqa xmm3, [rel PD_DESCALE_P1] ; xmm3=[rel PD_DESCALE_P1] + + paddd xmm5, xmm3 + paddd xmm7, xmm3 + psrad xmm5, DESCALE_P1 + psrad xmm7, DESCALE_P1 + paddd xmm2, xmm3 + paddd xmm0, xmm3 + psrad xmm2, DESCALE_P1 + psrad xmm0, DESCALE_P1 + + packssdw xmm5, xmm7 ; xmm5=data0=(00 01 02 03 04 05 06 07) + packssdw xmm2, xmm0 ; xmm2=data7=(70 71 72 73 74 75 76 77) + + movdqa xmm4, XMMWORD [wk(4)] ; xmm4=tmp11L + movdqa xmm3, XMMWORD [wk(5)] ; xmm3=tmp11H + + movdqa xmm7, xmm4 + movdqa xmm0, xmm3 + paddd xmm4, xmm1 ; xmm4=data1L + paddd xmm3, xmm6 ; xmm3=data1H + psubd xmm7, xmm1 ; xmm7=data6L + psubd xmm0, xmm6 ; xmm0=data6H + + movdqa xmm1, [rel PD_DESCALE_P1] ; xmm1=[rel PD_DESCALE_P1] + + paddd xmm4, xmm1 + paddd xmm3, xmm1 + psrad xmm4, DESCALE_P1 + psrad xmm3, DESCALE_P1 + paddd xmm7, xmm1 + paddd xmm0, xmm1 + psrad xmm7, DESCALE_P1 + psrad xmm0, DESCALE_P1 + + packssdw xmm4, xmm3 ; xmm4=data1=(10 11 12 13 14 15 16 17) + packssdw xmm7, xmm0 ; xmm7=data6=(60 61 62 63 64 65 66 67) + + movdqa xmm6, xmm5 ; transpose coefficients(phase 1) + punpcklwd xmm5, xmm4 ; xmm5=(00 10 01 11 02 12 03 13) + punpckhwd xmm6, xmm4 ; xmm6=(04 14 05 15 06 16 07 17) + movdqa xmm1, xmm7 ; transpose coefficients(phase 1) + punpcklwd xmm7, xmm2 ; xmm7=(60 70 61 71 62 72 63 73) + punpckhwd xmm1, xmm2 ; xmm1=(64 74 65 75 66 76 67 77) + + movdqa xmm3, XMMWORD [wk(6)] ; xmm3=tmp12L + movdqa xmm0, XMMWORD [wk(7)] ; xmm0=tmp12H + movdqa xmm4, XMMWORD [wk(10)] ; xmm4=tmp1L + movdqa xmm2, XMMWORD [wk(11)] ; xmm2=tmp1H + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 01 11 02 12 03 13) + movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=(04 14 05 15 06 16 07 17) + movdqa XMMWORD [wk(4)], xmm7 ; wk(4)=(60 70 61 71 62 72 63 73) + movdqa XMMWORD [wk(5)], xmm1 ; wk(5)=(64 74 65 75 66 76 67 77) + + movdqa xmm5, xmm3 + movdqa xmm6, xmm0 + paddd xmm3, xmm4 ; xmm3=data2L + paddd xmm0, xmm2 ; xmm0=data2H + psubd xmm5, xmm4 ; xmm5=data5L + psubd xmm6, xmm2 ; xmm6=data5H + + movdqa xmm7, [rel PD_DESCALE_P1] ; xmm7=[rel PD_DESCALE_P1] + + paddd xmm3, xmm7 + paddd xmm0, xmm7 + psrad xmm3, DESCALE_P1 + psrad xmm0, DESCALE_P1 + paddd xmm5, xmm7 + paddd xmm6, xmm7 + psrad xmm5, DESCALE_P1 + psrad xmm6, DESCALE_P1 + + packssdw xmm3, xmm0 ; xmm3=data2=(20 21 22 23 24 25 26 27) + packssdw xmm5, xmm6 ; xmm5=data5=(50 51 52 53 54 55 56 57) + + movdqa xmm1, XMMWORD [wk(2)] ; xmm1=tmp13L + movdqa xmm4, XMMWORD [wk(3)] ; xmm4=tmp13H + movdqa xmm2, XMMWORD [wk(8)] ; xmm2=tmp0L + movdqa xmm7, XMMWORD [wk(9)] ; xmm7=tmp0H + + movdqa xmm0, xmm1 + movdqa xmm6, xmm4 + paddd xmm1, xmm2 ; xmm1=data3L + paddd xmm4, xmm7 ; xmm4=data3H + psubd xmm0, xmm2 ; xmm0=data4L + psubd xmm6, xmm7 ; xmm6=data4H + + movdqa xmm2, [rel PD_DESCALE_P1] ; xmm2=[rel PD_DESCALE_P1] + + paddd xmm1, xmm2 + paddd xmm4, xmm2 + psrad xmm1, DESCALE_P1 + psrad xmm4, DESCALE_P1 + paddd xmm0, xmm2 + paddd xmm6, xmm2 + psrad xmm0, DESCALE_P1 + psrad xmm6, DESCALE_P1 + + packssdw xmm1, xmm4 ; xmm1=data3=(30 31 32 33 34 35 36 37) + packssdw xmm0, xmm6 ; xmm0=data4=(40 41 42 43 44 45 46 47) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 01 11 02 12 03 13) + movdqa xmm2, XMMWORD [wk(1)] ; xmm2=(04 14 05 15 06 16 07 17) + + movdqa xmm4, xmm3 ; transpose coefficients(phase 1) + punpcklwd xmm3, xmm1 ; xmm3=(20 30 21 31 22 32 23 33) + punpckhwd xmm4, xmm1 ; xmm4=(24 34 25 35 26 36 27 37) + movdqa xmm6, xmm0 ; transpose coefficients(phase 1) + punpcklwd xmm0, xmm5 ; xmm0=(40 50 41 51 42 52 43 53) + punpckhwd xmm6, xmm5 ; xmm6=(44 54 45 55 46 56 47 57) + + movdqa xmm1, xmm7 ; transpose coefficients(phase 2) + punpckldq xmm7, xmm3 ; xmm7=(00 10 20 30 01 11 21 31) + punpckhdq xmm1, xmm3 ; xmm1=(02 12 22 32 03 13 23 33) + movdqa xmm5, xmm2 ; transpose coefficients(phase 2) + punpckldq xmm2, xmm4 ; xmm2=(04 14 24 34 05 15 25 35) + punpckhdq xmm5, xmm4 ; xmm5=(06 16 26 36 07 17 27 37) + + movdqa xmm3, XMMWORD [wk(4)] ; xmm3=(60 70 61 71 62 72 63 73) + movdqa xmm4, XMMWORD [wk(5)] ; xmm4=(64 74 65 75 66 76 67 77) + + movdqa XMMWORD [wk(6)], xmm2 ; wk(6)=(04 14 24 34 05 15 25 35) + movdqa XMMWORD [wk(7)], xmm5 ; wk(7)=(06 16 26 36 07 17 27 37) + + movdqa xmm2, xmm0 ; transpose coefficients(phase 2) + punpckldq xmm0, xmm3 ; xmm0=(40 50 60 70 41 51 61 71) + punpckhdq xmm2, xmm3 ; xmm2=(42 52 62 72 43 53 63 73) + movdqa xmm5, xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6, xmm4 ; xmm6=(44 54 64 74 45 55 65 75) + punpckhdq xmm5, xmm4 ; xmm5=(46 56 66 76 47 57 67 77) + + movdqa xmm3, xmm7 ; transpose coefficients(phase 3) + punpcklqdq xmm7, xmm0 ; xmm7=col0=(00 10 20 30 40 50 60 70) + punpckhqdq xmm3, xmm0 ; xmm3=col1=(01 11 21 31 41 51 61 71) + movdqa xmm4, xmm1 ; transpose coefficients(phase 3) + punpcklqdq xmm1, xmm2 ; xmm1=col2=(02 12 22 32 42 52 62 72) + punpckhqdq xmm4, xmm2 ; xmm4=col3=(03 13 23 33 43 53 63 73) + + movdqa xmm0, XMMWORD [wk(6)] ; xmm0=(04 14 24 34 05 15 25 35) + movdqa xmm2, XMMWORD [wk(7)] ; xmm2=(06 16 26 36 07 17 27 37) + + movdqa XMMWORD [wk(8)], xmm3 ; wk(8)=col1 + movdqa XMMWORD [wk(9)], xmm4 ; wk(9)=col3 + + movdqa xmm3, xmm0 ; transpose coefficients(phase 3) + punpcklqdq xmm0, xmm6 ; xmm0=col4=(04 14 24 34 44 54 64 74) + punpckhqdq xmm3, xmm6 ; xmm3=col5=(05 15 25 35 45 55 65 75) + movdqa xmm4, xmm2 ; transpose coefficients(phase 3) + punpcklqdq xmm2, xmm5 ; xmm2=col6=(06 16 26 36 46 56 66 76) + punpckhqdq xmm4, xmm5 ; xmm4=col7=(07 17 27 37 47 57 67 77) + + movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5 + movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7 +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows from work array, store into output array. + + mov rax, [original_rbp] + mov rdi, r12 ; (JSAMPROW *) + mov eax, r13d + + ; -- Even part + + ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6 + + ; (Original) + ; z1 = (z2 + z3) * 0.541196100; + ; tmp2 = z1 + z3 * -1.847759065; + ; tmp3 = z1 + z2 * 0.765366865; + ; + ; (This implementation) + ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); + ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; + + movdqa xmm6, xmm1 ; xmm1=in2=z2 + movdqa xmm5, xmm1 + punpcklwd xmm6, xmm2 ; xmm2=in6=z3 + punpckhwd xmm5, xmm2 + movdqa xmm1, xmm6 + movdqa xmm2, xmm5 + pmaddwd xmm6, [rel PW_F130_F054] ; xmm6=tmp3L + pmaddwd xmm5, [rel PW_F130_F054] ; xmm5=tmp3H + pmaddwd xmm1, [rel PW_F054_MF130] ; xmm1=tmp2L + pmaddwd xmm2, [rel PW_F054_MF130] ; xmm2=tmp2H + + movdqa xmm3, xmm7 + paddw xmm7, xmm0 ; xmm7=in0+in4 + psubw xmm3, xmm0 ; xmm3=in0-in4 + + pxor xmm4, xmm4 + pxor xmm0, xmm0 + punpcklwd xmm4, xmm7 ; xmm4=tmp0L + punpckhwd xmm0, xmm7 ; xmm0=tmp0H + psrad xmm4, (16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS + psrad xmm0, (16-CONST_BITS) ; psrad xmm0,16 & pslld xmm0,CONST_BITS + + movdqa xmm7, xmm4 + paddd xmm4, xmm6 ; xmm4=tmp10L + psubd xmm7, xmm6 ; xmm7=tmp13L + movdqa xmm6, xmm0 + paddd xmm0, xmm5 ; xmm0=tmp10H + psubd xmm6, xmm5 ; xmm6=tmp13H + + movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=tmp10L + movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=tmp10H + movdqa XMMWORD [wk(2)], xmm7 ; wk(2)=tmp13L + movdqa XMMWORD [wk(3)], xmm6 ; wk(3)=tmp13H + + pxor xmm5, xmm5 + pxor xmm4, xmm4 + punpcklwd xmm5, xmm3 ; xmm5=tmp1L + punpckhwd xmm4, xmm3 ; xmm4=tmp1H + psrad xmm5, (16-CONST_BITS) ; psrad xmm5,16 & pslld xmm5,CONST_BITS + psrad xmm4, (16-CONST_BITS) ; psrad xmm4,16 & pslld xmm4,CONST_BITS + + movdqa xmm0, xmm5 + paddd xmm5, xmm1 ; xmm5=tmp11L + psubd xmm0, xmm1 ; xmm0=tmp12L + movdqa xmm7, xmm4 + paddd xmm4, xmm2 ; xmm4=tmp11H + psubd xmm7, xmm2 ; xmm7=tmp12H + + movdqa XMMWORD [wk(4)], xmm5 ; wk(4)=tmp11L + movdqa XMMWORD [wk(5)], xmm4 ; wk(5)=tmp11H + movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=tmp12L + movdqa XMMWORD [wk(7)], xmm7 ; wk(7)=tmp12H + + ; -- Odd part + + movdqa xmm6, XMMWORD [wk(9)] ; xmm6=col3 + movdqa xmm3, XMMWORD [wk(8)] ; xmm3=col1 + movdqa xmm1, XMMWORD [wk(11)] ; xmm1=col7 + movdqa xmm2, XMMWORD [wk(10)] ; xmm2=col5 + + movdqa xmm5, xmm6 + movdqa xmm4, xmm3 + paddw xmm5, xmm1 ; xmm5=z3 + paddw xmm4, xmm2 ; xmm4=z4 + + ; (Original) + ; z5 = (z3 + z4) * 1.175875602; + ; z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; + ; z3 += z5; z4 += z5; + ; + ; (This implementation) + ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; + ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); + + movdqa xmm0, xmm5 + movdqa xmm7, xmm5 + punpcklwd xmm0, xmm4 + punpckhwd xmm7, xmm4 + movdqa xmm5, xmm0 + movdqa xmm4, xmm7 + pmaddwd xmm0, [rel PW_MF078_F117] ; xmm0=z3L + pmaddwd xmm7, [rel PW_MF078_F117] ; xmm7=z3H + pmaddwd xmm5, [rel PW_F117_F078] ; xmm5=z4L + pmaddwd xmm4, [rel PW_F117_F078] ; xmm4=z4H + + movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=z3L + movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=z3H + + ; (Original) + ; z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; + ; tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; + ; tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; + ; z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; + ; tmp0 += z1 + z3; tmp1 += z2 + z4; + ; tmp2 += z2 + z3; tmp3 += z1 + z4; + ; + ; (This implementation) + ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; + ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; + ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); + ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); + ; tmp0 += z3; tmp1 += z4; + ; tmp2 += z3; tmp3 += z4; + + movdqa xmm0, xmm1 + movdqa xmm7, xmm1 + punpcklwd xmm0, xmm3 + punpckhwd xmm7, xmm3 + movdqa xmm1, xmm0 + movdqa xmm3, xmm7 + pmaddwd xmm0, [rel PW_MF060_MF089] ; xmm0=tmp0L + pmaddwd xmm7, [rel PW_MF060_MF089] ; xmm7=tmp0H + pmaddwd xmm1, [rel PW_MF089_F060] ; xmm1=tmp3L + pmaddwd xmm3, [rel PW_MF089_F060] ; xmm3=tmp3H + + paddd xmm0, XMMWORD [wk(10)] ; xmm0=tmp0L + paddd xmm7, XMMWORD [wk(11)] ; xmm7=tmp0H + paddd xmm1, xmm5 ; xmm1=tmp3L + paddd xmm3, xmm4 ; xmm3=tmp3H + + movdqa XMMWORD [wk(8)], xmm0 ; wk(8)=tmp0L + movdqa XMMWORD [wk(9)], xmm7 ; wk(9)=tmp0H + + movdqa xmm0, xmm2 + movdqa xmm7, xmm2 + punpcklwd xmm0, xmm6 + punpckhwd xmm7, xmm6 + movdqa xmm2, xmm0 + movdqa xmm6, xmm7 + pmaddwd xmm0, [rel PW_MF050_MF256] ; xmm0=tmp1L + pmaddwd xmm7, [rel PW_MF050_MF256] ; xmm7=tmp1H + pmaddwd xmm2, [rel PW_MF256_F050] ; xmm2=tmp2L + pmaddwd xmm6, [rel PW_MF256_F050] ; xmm6=tmp2H + + paddd xmm0, xmm5 ; xmm0=tmp1L + paddd xmm7, xmm4 ; xmm7=tmp1H + paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp2L + paddd xmm6, XMMWORD [wk(11)] ; xmm6=tmp2H + + movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=tmp1L + movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=tmp1H + + ; -- Final output stage + + movdqa xmm5, XMMWORD [wk(0)] ; xmm5=tmp10L + movdqa xmm4, XMMWORD [wk(1)] ; xmm4=tmp10H + + movdqa xmm0, xmm5 + movdqa xmm7, xmm4 + paddd xmm5, xmm1 ; xmm5=data0L + paddd xmm4, xmm3 ; xmm4=data0H + psubd xmm0, xmm1 ; xmm0=data7L + psubd xmm7, xmm3 ; xmm7=data7H + + movdqa xmm1, [rel PD_DESCALE_P2] ; xmm1=[rel PD_DESCALE_P2] + + paddd xmm5, xmm1 + paddd xmm4, xmm1 + psrad xmm5, DESCALE_P2 + psrad xmm4, DESCALE_P2 + paddd xmm0, xmm1 + paddd xmm7, xmm1 + psrad xmm0, DESCALE_P2 + psrad xmm7, DESCALE_P2 + + packssdw xmm5, xmm4 ; xmm5=data0=(00 10 20 30 40 50 60 70) + packssdw xmm0, xmm7 ; xmm0=data7=(07 17 27 37 47 57 67 77) + + movdqa xmm3, XMMWORD [wk(4)] ; xmm3=tmp11L + movdqa xmm1, XMMWORD [wk(5)] ; xmm1=tmp11H + + movdqa xmm4, xmm3 + movdqa xmm7, xmm1 + paddd xmm3, xmm2 ; xmm3=data1L + paddd xmm1, xmm6 ; xmm1=data1H + psubd xmm4, xmm2 ; xmm4=data6L + psubd xmm7, xmm6 ; xmm7=data6H + + movdqa xmm2, [rel PD_DESCALE_P2] ; xmm2=[rel PD_DESCALE_P2] + + paddd xmm3, xmm2 + paddd xmm1, xmm2 + psrad xmm3, DESCALE_P2 + psrad xmm1, DESCALE_P2 + paddd xmm4, xmm2 + paddd xmm7, xmm2 + psrad xmm4, DESCALE_P2 + psrad xmm7, DESCALE_P2 + + packssdw xmm3, xmm1 ; xmm3=data1=(01 11 21 31 41 51 61 71) + packssdw xmm4, xmm7 ; xmm4=data6=(06 16 26 36 46 56 66 76) + + packsswb xmm5, xmm4 ; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + packsswb xmm3, xmm0 ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm6, XMMWORD [wk(6)] ; xmm6=tmp12L + movdqa xmm2, XMMWORD [wk(7)] ; xmm2=tmp12H + movdqa xmm1, XMMWORD [wk(10)] ; xmm1=tmp1L + movdqa xmm7, XMMWORD [wk(11)] ; xmm7=tmp1H + + movdqa XMMWORD [wk(0)], xmm5 ; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + movdqa xmm4, xmm6 + movdqa xmm0, xmm2 + paddd xmm6, xmm1 ; xmm6=data2L + paddd xmm2, xmm7 ; xmm2=data2H + psubd xmm4, xmm1 ; xmm4=data5L + psubd xmm0, xmm7 ; xmm0=data5H + + movdqa xmm5, [rel PD_DESCALE_P2] ; xmm5=[rel PD_DESCALE_P2] + + paddd xmm6, xmm5 + paddd xmm2, xmm5 + psrad xmm6, DESCALE_P2 + psrad xmm2, DESCALE_P2 + paddd xmm4, xmm5 + paddd xmm0, xmm5 + psrad xmm4, DESCALE_P2 + psrad xmm0, DESCALE_P2 + + packssdw xmm6, xmm2 ; xmm6=data2=(02 12 22 32 42 52 62 72) + packssdw xmm4, xmm0 ; xmm4=data5=(05 15 25 35 45 55 65 75) + + movdqa xmm3, XMMWORD [wk(2)] ; xmm3=tmp13L + movdqa xmm1, XMMWORD [wk(3)] ; xmm1=tmp13H + movdqa xmm7, XMMWORD [wk(8)] ; xmm7=tmp0L + movdqa xmm5, XMMWORD [wk(9)] ; xmm5=tmp0H + + movdqa xmm2, xmm3 + movdqa xmm0, xmm1 + paddd xmm3, xmm7 ; xmm3=data3L + paddd xmm1, xmm5 ; xmm1=data3H + psubd xmm2, xmm7 ; xmm2=data4L + psubd xmm0, xmm5 ; xmm0=data4H + + movdqa xmm7, [rel PD_DESCALE_P2] ; xmm7=[rel PD_DESCALE_P2] + + paddd xmm3, xmm7 + paddd xmm1, xmm7 + psrad xmm3, DESCALE_P2 + psrad xmm1, DESCALE_P2 + paddd xmm2, xmm7 + paddd xmm0, xmm7 + psrad xmm2, DESCALE_P2 + psrad xmm0, DESCALE_P2 + + movdqa xmm5, [rel PB_CENTERJSAMP] ; xmm5=[rel PB_CENTERJSAMP] + + packssdw xmm3, xmm1 ; xmm3=data3=(03 13 23 33 43 53 63 73) + packssdw xmm2, xmm0 ; xmm2=data4=(04 14 24 34 44 54 64 74) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76) + movdqa xmm1, XMMWORD [wk(1)] ; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77) + + packsswb xmm6, xmm2 ; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74) + packsswb xmm3, xmm4 ; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75) + + paddb xmm7, xmm5 + paddb xmm1, xmm5 + paddb xmm6, xmm5 + paddb xmm3, xmm5 + + movdqa xmm0, xmm7 ; transpose coefficients(phase 1) + punpcklbw xmm7, xmm1 ; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71) + punpckhbw xmm0, xmm1 ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77) + movdqa xmm2, xmm6 ; transpose coefficients(phase 1) + punpcklbw xmm6, xmm3 ; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73) + punpckhbw xmm2, xmm3 ; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75) + + movdqa xmm4, xmm7 ; transpose coefficients(phase 2) + punpcklwd xmm7, xmm6 ; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33) + punpckhwd xmm4, xmm6 ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73) + movdqa xmm5, xmm2 ; transpose coefficients(phase 2) + punpcklwd xmm2, xmm0 ; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37) + punpckhwd xmm5, xmm0 ; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77) + + movdqa xmm1, xmm7 ; transpose coefficients(phase 3) + punpckldq xmm7, xmm2 ; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + punpckhdq xmm1, xmm2 ; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + movdqa xmm3, xmm4 ; transpose coefficients(phase 3) + punpckldq xmm4, xmm5 ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + punpckhdq xmm3, xmm5 ; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + pshufd xmm6, xmm7, 0x4E ; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07) + pshufd xmm0, xmm1, 0x4E ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27) + pshufd xmm2, xmm4, 0x4E ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47) + pshufd xmm5, xmm3, 0x4E ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67) + + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm7 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm1 + mov rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3 + + mov rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0 + mov rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW] + movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2 + movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5 + + uncollect_args 4 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jidctred-sse2.asm b/simd/x86_64/jidctred-sse2.asm new file mode 100644 index 0000000..af64fdc --- /dev/null +++ b/simd/x86_64/jidctred-sse2.asm @@ -0,0 +1,575 @@ +; +; jidctred.asm - reduced-size IDCT (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; This file contains inverse-DCT routines that produce reduced-size +; output: either 4x4 or 2x2 pixels from an 8x8 DCT block. +; The following code is based directly on the IJG's original jidctred.c; +; see the jidctred.c for more details. +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + +%define CONST_BITS 13 +%define PASS1_BITS 2 + +%define DESCALE_P1_4 (CONST_BITS - PASS1_BITS + 1) +%define DESCALE_P2_4 (CONST_BITS + PASS1_BITS + 3 + 1) +%define DESCALE_P1_2 (CONST_BITS - PASS1_BITS + 2) +%define DESCALE_P2_2 (CONST_BITS + PASS1_BITS + 3 + 2) + +%if CONST_BITS == 13 +F_0_211 equ 1730 ; FIX(0.211164243) +F_0_509 equ 4176 ; FIX(0.509795579) +F_0_601 equ 4926 ; FIX(0.601344887) +F_0_720 equ 5906 ; FIX(0.720959822) +F_0_765 equ 6270 ; FIX(0.765366865) +F_0_850 equ 6967 ; FIX(0.850430095) +F_0_899 equ 7373 ; FIX(0.899976223) +F_1_061 equ 8697 ; FIX(1.061594337) +F_1_272 equ 10426 ; FIX(1.272758580) +F_1_451 equ 11893 ; FIX(1.451774981) +F_1_847 equ 15137 ; FIX(1.847759065) +F_2_172 equ 17799 ; FIX(2.172734803) +F_2_562 equ 20995 ; FIX(2.562915447) +F_3_624 equ 29692 ; FIX(3.624509785) +%else +; NASM cannot do compile-time arithmetic on floating-point constants. +%define DESCALE(x, n) (((x) + (1 << ((n) - 1))) >> (n)) +F_0_211 equ DESCALE( 226735879, 30 - CONST_BITS) ; FIX(0.211164243) +F_0_509 equ DESCALE( 547388834, 30 - CONST_BITS) ; FIX(0.509795579) +F_0_601 equ DESCALE( 645689155, 30 - CONST_BITS) ; FIX(0.601344887) +F_0_720 equ DESCALE( 774124714, 30 - CONST_BITS) ; FIX(0.720959822) +F_0_765 equ DESCALE( 821806413, 30 - CONST_BITS) ; FIX(0.765366865) +F_0_850 equ DESCALE( 913142361, 30 - CONST_BITS) ; FIX(0.850430095) +F_0_899 equ DESCALE( 966342111, 30 - CONST_BITS) ; FIX(0.899976223) +F_1_061 equ DESCALE(1139878239, 30 - CONST_BITS) ; FIX(1.061594337) +F_1_272 equ DESCALE(1366614119, 30 - CONST_BITS) ; FIX(1.272758580) +F_1_451 equ DESCALE(1558831516, 30 - CONST_BITS) ; FIX(1.451774981) +F_1_847 equ DESCALE(1984016188, 30 - CONST_BITS) ; FIX(1.847759065) +F_2_172 equ DESCALE(2332956230, 30 - CONST_BITS) ; FIX(2.172734803) +F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447) +F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS) ; FIX(3.624509785) +%endif + +; -------------------------------------------------------------------------- + SECTION SEG_CONST + + alignz 32 + GLOBAL_DATA(jconst_idct_red_sse2) + +EXTN(jconst_idct_red_sse2): + +PW_F184_MF076 times 4 dw F_1_847, -F_0_765 +PW_F256_F089 times 4 dw F_2_562, F_0_899 +PW_F106_MF217 times 4 dw F_1_061, -F_2_172 +PW_MF060_MF050 times 4 dw -F_0_601, -F_0_509 +PW_F145_MF021 times 4 dw F_1_451, -F_0_211 +PW_F362_MF127 times 4 dw F_3_624, -F_1_272 +PW_F085_MF072 times 4 dw F_0_850, -F_0_720 +PD_DESCALE_P1_4 times 4 dd 1 << (DESCALE_P1_4 - 1) +PD_DESCALE_P2_4 times 4 dd 1 << (DESCALE_P2_4 - 1) +PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2 - 1) +PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2 - 1) +PB_CENTERJSAMP times 16 db CENTERJSAMPLE + + alignz 32 + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 4x4 output block. +; +; GLOBAL(void) +; jsimd_idct_4x4_sse2(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +; r10 = void *dct_table +; r11 = JCOEFPTR coef_block +; r12 = JSAMPARRAY output_buf +; r13d = JDIMENSION output_col + +%define original_rbp rbp + 0 +%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD + ; xmmword wk[WK_NUM] +%define WK_NUM 2 + + align 32 + GLOBAL_FUNCTION(jsimd_idct_4x4_sse2) + +EXTN(jsimd_idct_4x4_sse2): + push rbp + mov rax, rsp ; rax = original rbp + sub rsp, byte 4 + and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits + mov [rsp], rax + mov rbp, rsp ; rbp = aligned rbp + lea rsp, [wk(0)] + collect_args 4 + + ; ---- Pass 1: process columns from input. + + mov rdx, r10 ; quantptr + mov rsi, r11 ; inptr + +%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2 + mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)] + or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)] + jnz short .columnDCT + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + por xmm0, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + por xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + por xmm0, xmm1 + packsswb xmm0, xmm0 + packsswb xmm0, xmm0 + movd eax, xmm0 + test rax, rax + jnz short .columnDCT + + ; -- AC terms all zero + + movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + psllw xmm0, PASS1_BITS + + movdqa xmm3, xmm0 ; xmm0=in0=(00 01 02 03 04 05 06 07) + punpcklwd xmm0, xmm0 ; xmm0=(00 00 01 01 02 02 03 03) + punpckhwd xmm3, xmm3 ; xmm3=(04 04 05 05 06 06 07 07) + + pshufd xmm1, xmm0, 0x50 ; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01) + pshufd xmm0, xmm0, 0xFA ; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03) + pshufd xmm6, xmm3, 0x50 ; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05) + pshufd xmm3, xmm3, 0xFA ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07) + + jmp near .column_end +%endif +.columnDCT: + + ; -- Odd part + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + movdqa xmm4, xmm0 + movdqa xmm5, xmm0 + punpcklwd xmm4, xmm1 + punpckhwd xmm5, xmm1 + movdqa xmm0, xmm4 + movdqa xmm1, xmm5 + pmaddwd xmm4, [rel PW_F256_F089] ; xmm4=(tmp2L) + pmaddwd xmm5, [rel PW_F256_F089] ; xmm5=(tmp2H) + pmaddwd xmm0, [rel PW_F106_MF217] ; xmm0=(tmp0L) + pmaddwd xmm1, [rel PW_F106_MF217] ; xmm1=(tmp0H) + + movdqa xmm6, xmm2 + movdqa xmm7, xmm2 + punpcklwd xmm6, xmm3 + punpckhwd xmm7, xmm3 + movdqa xmm2, xmm6 + movdqa xmm3, xmm7 + pmaddwd xmm6, [rel PW_MF060_MF050] ; xmm6=(tmp2L) + pmaddwd xmm7, [rel PW_MF060_MF050] ; xmm7=(tmp2H) + pmaddwd xmm2, [rel PW_F145_MF021] ; xmm2=(tmp0L) + pmaddwd xmm3, [rel PW_F145_MF021] ; xmm3=(tmp0H) + + paddd xmm6, xmm4 ; xmm6=tmp2L + paddd xmm7, xmm5 ; xmm7=tmp2H + paddd xmm2, xmm0 ; xmm2=tmp0L + paddd xmm3, xmm1 ; xmm3=tmp0H + + movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=tmp0L + movdqa XMMWORD [wk(1)], xmm3 ; wk(1)=tmp0H + + ; -- Even part + + movdqa xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + movdqa xmm5, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)] + movdqa xmm0, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)] + pmullw xmm4, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm5, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm0, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + pxor xmm1, xmm1 + pxor xmm2, xmm2 + punpcklwd xmm1, xmm4 ; xmm1=tmp0L + punpckhwd xmm2, xmm4 ; xmm2=tmp0H + psrad xmm1, (16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1 + psrad xmm2, (16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1 + + movdqa xmm3, xmm5 ; xmm5=in2=z2 + punpcklwd xmm5, xmm0 ; xmm0=in6=z3 + punpckhwd xmm3, xmm0 + pmaddwd xmm5, [rel PW_F184_MF076] ; xmm5=tmp2L + pmaddwd xmm3, [rel PW_F184_MF076] ; xmm3=tmp2H + + movdqa xmm4, xmm1 + movdqa xmm0, xmm2 + paddd xmm1, xmm5 ; xmm1=tmp10L + paddd xmm2, xmm3 ; xmm2=tmp10H + psubd xmm4, xmm5 ; xmm4=tmp12L + psubd xmm0, xmm3 ; xmm0=tmp12H + + ; -- Final output stage + + movdqa xmm5, xmm1 + movdqa xmm3, xmm2 + paddd xmm1, xmm6 ; xmm1=data0L + paddd xmm2, xmm7 ; xmm2=data0H + psubd xmm5, xmm6 ; xmm5=data3L + psubd xmm3, xmm7 ; xmm3=data3H + + movdqa xmm6, [rel PD_DESCALE_P1_4] ; xmm6=[rel PD_DESCALE_P1_4] + + paddd xmm1, xmm6 + paddd xmm2, xmm6 + psrad xmm1, DESCALE_P1_4 + psrad xmm2, DESCALE_P1_4 + paddd xmm5, xmm6 + paddd xmm3, xmm6 + psrad xmm5, DESCALE_P1_4 + psrad xmm3, DESCALE_P1_4 + + packssdw xmm1, xmm2 ; xmm1=data0=(00 01 02 03 04 05 06 07) + packssdw xmm5, xmm3 ; xmm5=data3=(30 31 32 33 34 35 36 37) + + movdqa xmm7, XMMWORD [wk(0)] ; xmm7=tmp0L + movdqa xmm6, XMMWORD [wk(1)] ; xmm6=tmp0H + + movdqa xmm2, xmm4 + movdqa xmm3, xmm0 + paddd xmm4, xmm7 ; xmm4=data1L + paddd xmm0, xmm6 ; xmm0=data1H + psubd xmm2, xmm7 ; xmm2=data2L + psubd xmm3, xmm6 ; xmm3=data2H + + movdqa xmm7, [rel PD_DESCALE_P1_4] ; xmm7=[rel PD_DESCALE_P1_4] + + paddd xmm4, xmm7 + paddd xmm0, xmm7 + psrad xmm4, DESCALE_P1_4 + psrad xmm0, DESCALE_P1_4 + paddd xmm2, xmm7 + paddd xmm3, xmm7 + psrad xmm2, DESCALE_P1_4 + psrad xmm3, DESCALE_P1_4 + + packssdw xmm4, xmm0 ; xmm4=data1=(10 11 12 13 14 15 16 17) + packssdw xmm2, xmm3 ; xmm2=data2=(20 21 22 23 24 25 26 27) + + movdqa xmm6, xmm1 ; transpose coefficients(phase 1) + punpcklwd xmm1, xmm4 ; xmm1=(00 10 01 11 02 12 03 13) + punpckhwd xmm6, xmm4 ; xmm6=(04 14 05 15 06 16 07 17) + movdqa xmm7, xmm2 ; transpose coefficients(phase 1) + punpcklwd xmm2, xmm5 ; xmm2=(20 30 21 31 22 32 23 33) + punpckhwd xmm7, xmm5 ; xmm7=(24 34 25 35 26 36 27 37) + + movdqa xmm0, xmm1 ; transpose coefficients(phase 2) + punpckldq xmm1, xmm2 ; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31) + punpckhdq xmm0, xmm2 ; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33) + movdqa xmm3, xmm6 ; transpose coefficients(phase 2) + punpckldq xmm6, xmm7 ; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35) + punpckhdq xmm3, xmm7 ; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37) +.column_end: + + ; -- Prefetch the next coefficient block + + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows, store into output array. + + mov rax, [original_rbp] + mov rdi, r12 ; (JSAMPROW *) + mov eax, r13d + + ; -- Even part + + pxor xmm4, xmm4 + punpcklwd xmm4, xmm1 ; xmm4=tmp0 + psrad xmm4, (16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1 + + ; -- Odd part + + punpckhwd xmm1, xmm0 + punpckhwd xmm6, xmm3 + movdqa xmm5, xmm1 + movdqa xmm2, xmm6 + pmaddwd xmm1, [rel PW_F256_F089] ; xmm1=(tmp2) + pmaddwd xmm6, [rel PW_MF060_MF050] ; xmm6=(tmp2) + pmaddwd xmm5, [rel PW_F106_MF217] ; xmm5=(tmp0) + pmaddwd xmm2, [rel PW_F145_MF021] ; xmm2=(tmp0) + + paddd xmm6, xmm1 ; xmm6=tmp2 + paddd xmm2, xmm5 ; xmm2=tmp0 + + ; -- Even part + + punpcklwd xmm0, xmm3 + pmaddwd xmm0, [rel PW_F184_MF076] ; xmm0=tmp2 + + movdqa xmm7, xmm4 + paddd xmm4, xmm0 ; xmm4=tmp10 + psubd xmm7, xmm0 ; xmm7=tmp12 + + ; -- Final output stage + + movdqa xmm1, [rel PD_DESCALE_P2_4] ; xmm1=[rel PD_DESCALE_P2_4] + + movdqa xmm5, xmm4 + movdqa xmm3, xmm7 + paddd xmm4, xmm6 ; xmm4=data0=(00 10 20 30) + paddd xmm7, xmm2 ; xmm7=data1=(01 11 21 31) + psubd xmm5, xmm6 ; xmm5=data3=(03 13 23 33) + psubd xmm3, xmm2 ; xmm3=data2=(02 12 22 32) + + paddd xmm4, xmm1 + paddd xmm7, xmm1 + psrad xmm4, DESCALE_P2_4 + psrad xmm7, DESCALE_P2_4 + paddd xmm5, xmm1 + paddd xmm3, xmm1 + psrad xmm5, DESCALE_P2_4 + psrad xmm3, DESCALE_P2_4 + + packssdw xmm4, xmm3 ; xmm4=(00 10 20 30 02 12 22 32) + packssdw xmm7, xmm5 ; xmm7=(01 11 21 31 03 13 23 33) + + movdqa xmm0, xmm4 ; transpose coefficients(phase 1) + punpcklwd xmm4, xmm7 ; xmm4=(00 01 10 11 20 21 30 31) + punpckhwd xmm0, xmm7 ; xmm0=(02 03 12 13 22 23 32 33) + + movdqa xmm6, xmm4 ; transpose coefficients(phase 2) + punpckldq xmm4, xmm0 ; xmm4=(00 01 02 03 10 11 12 13) + punpckhdq xmm6, xmm0 ; xmm6=(20 21 22 23 30 31 32 33) + + packsswb xmm4, xmm6 ; xmm4=(00 01 02 03 10 11 12 13 20 ..) + paddb xmm4, [rel PB_CENTERJSAMP] + + pshufd xmm2, xmm4, 0x39 ; xmm2=(10 11 12 13 20 21 22 23 30 ..) + pshufd xmm1, xmm4, 0x4E ; xmm1=(20 21 22 23 30 31 32 33 00 ..) + pshufd xmm3, xmm4, 0x93 ; xmm3=(30 31 32 33 00 01 02 03 10 ..) + + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] + movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4 + movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2 + mov rdx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW] + movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1 + movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3 + + uncollect_args 4 + mov rsp, rbp ; rsp <- aligned rbp + pop rsp ; rsp <- original rbp + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Perform dequantization and inverse DCT on one block of coefficients, +; producing a reduced-size 2x2 output block. +; +; GLOBAL(void) +; jsimd_idct_2x2_sse2(void *dct_table, JCOEFPTR coef_block, +; JSAMPARRAY output_buf, JDIMENSION output_col) +; + +; r10 = void *dct_table +; r11 = JCOEFPTR coef_block +; r12 = JSAMPARRAY output_buf +; r13d = JDIMENSION output_col + + align 32 + GLOBAL_FUNCTION(jsimd_idct_2x2_sse2) + +EXTN(jsimd_idct_2x2_sse2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 4 + push rbx + + ; ---- Pass 1: process columns from input. + + mov rdx, r10 ; quantptr + mov rsi, r11 ; inptr + + ; | input: | result: | + ; | 00 01 ** 03 ** 05 ** 07 | | + ; | 10 11 ** 13 ** 15 ** 17 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 | + ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 | + ; | 50 51 ** 53 ** 55 ** 57 | | + ; | ** ** ** ** ** ** ** ** | | + ; | 70 71 ** 73 ** 75 ** 77 | | + + ; -- Odd part + + movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)] + movdqa xmm1, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)] + pmullw xmm0, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + movdqa xmm2, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)] + movdqa xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)] + pmullw xmm2, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + pmullw xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + ; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37) + ; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77) + + pcmpeqd xmm7, xmm7 + pslld xmm7, WORD_BIT ; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..} + + movdqa xmm4, xmm0 ; xmm4=(10 11 ** 13 ** 15 ** 17) + movdqa xmm5, xmm2 ; xmm5=(50 51 ** 53 ** 55 ** 57) + punpcklwd xmm4, xmm1 ; xmm4=(10 30 11 31 ** ** 13 33) + punpcklwd xmm5, xmm3 ; xmm5=(50 70 51 71 ** ** 53 73) + pmaddwd xmm4, [rel PW_F362_MF127] + pmaddwd xmm5, [rel PW_F085_MF072] + + psrld xmm0, WORD_BIT ; xmm0=(11 -- 13 -- 15 -- 17 --) + pand xmm1, xmm7 ; xmm1=(-- 31 -- 33 -- 35 -- 37) + psrld xmm2, WORD_BIT ; xmm2=(51 -- 53 -- 55 -- 57 --) + pand xmm3, xmm7 ; xmm3=(-- 71 -- 73 -- 75 -- 77) + por xmm0, xmm1 ; xmm0=(11 31 13 33 15 35 17 37) + por xmm2, xmm3 ; xmm2=(51 71 53 73 55 75 57 77) + pmaddwd xmm0, [rel PW_F362_MF127] + pmaddwd xmm2, [rel PW_F085_MF072] + + paddd xmm4, xmm5 ; xmm4=tmp0[col0 col1 **** col3] + paddd xmm0, xmm2 ; xmm0=tmp0[col1 col3 col5 col7] + + ; -- Even part + + movdqa xmm6, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)] + pmullw xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)] + + ; xmm6=(00 01 ** 03 ** 05 ** 07) + + movdqa xmm1, xmm6 ; xmm1=(00 01 ** 03 ** 05 ** 07) + pslld xmm6, WORD_BIT ; xmm6=(-- 00 -- ** -- ** -- **) + pand xmm1, xmm7 ; xmm1=(-- 01 -- 03 -- 05 -- 07) + psrad xmm6, (WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****] + psrad xmm1, (WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7] + + ; -- Final output stage + + movdqa xmm3, xmm6 + movdqa xmm5, xmm1 + paddd xmm6, xmm4 ; xmm6=data0[col0 **** **** ****]=(A0 ** ** **) + paddd xmm1, xmm0 ; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7) + psubd xmm3, xmm4 ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **) + psubd xmm5, xmm0 ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7) + + movdqa xmm2, [rel PD_DESCALE_P1_2] ; xmm2=[rel PD_DESCALE_P1_2] + + punpckldq xmm6, xmm3 ; xmm6=(A0 B0 ** **) + + movdqa xmm7, xmm1 + punpcklqdq xmm1, xmm5 ; xmm1=(A1 A3 B1 B3) + punpckhqdq xmm7, xmm5 ; xmm7=(A5 A7 B5 B7) + + paddd xmm6, xmm2 + psrad xmm6, DESCALE_P1_2 + + paddd xmm1, xmm2 + paddd xmm7, xmm2 + psrad xmm1, DESCALE_P1_2 + psrad xmm7, DESCALE_P1_2 + + ; -- Prefetch the next coefficient block + + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] + prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] + + ; ---- Pass 2: process rows, store into output array. + + mov rdi, r12 ; (JSAMPROW *) + mov eax, r13d + + ; | input:| result:| + ; | A0 B0 | | + ; | A1 B1 | C0 C1 | + ; | A3 B3 | D0 D1 | + ; | A5 B5 | | + ; | A7 B7 | | + + ; -- Odd part + + packssdw xmm1, xmm1 ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3) + packssdw xmm7, xmm7 ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7) + pmaddwd xmm1, [rel PW_F362_MF127] + pmaddwd xmm7, [rel PW_F085_MF072] + + paddd xmm1, xmm7 ; xmm1=tmp0[row0 row1 row0 row1] + + ; -- Even part + + pslld xmm6, (CONST_BITS+2) ; xmm6=tmp10[row0 row1 **** ****] + + ; -- Final output stage + + movdqa xmm4, xmm6 + paddd xmm6, xmm1 ; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **) + psubd xmm4, xmm1 ; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **) + + punpckldq xmm6, xmm4 ; xmm6=(C0 D0 C1 D1) + + paddd xmm6, [rel PD_DESCALE_P2_2] + psrad xmm6, DESCALE_P2_2 + + packssdw xmm6, xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1) + packsswb xmm6, xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..) + paddb xmm6, [rel PB_CENTERJSAMP] + + pextrw ebx, xmm6, 0x00 ; ebx=(C0 D0 -- --) + pextrw ecx, xmm6, 0x01 ; ecx=(C1 D1 -- --) + + mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW] + mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW] + mov WORD [rdx+rax*SIZEOF_JSAMPLE], bx + mov WORD [rsi+rax*SIZEOF_JSAMPLE], cx + + pop rbx + uncollect_args 4 + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jquantf-sse2.asm b/simd/x86_64/jquantf-sse2.asm new file mode 100644 index 0000000..4600eec --- /dev/null +++ b/simd/x86_64/jquantf-sse2.asm @@ -0,0 +1,156 @@ +; +; jquantf.asm - sample data conversion and quantization (64-bit SSE & SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_float_sse2(JSAMPARRAY sample_data, JDIMENSION start_col, +; FAST_FLOAT *workspace); +; + +; r10 = JSAMPARRAY sample_data +; r11d = JDIMENSION start_col +; r12 = FAST_FLOAT *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_convsamp_float_sse2) + +EXTN(jsimd_convsamp_float_sse2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 3 + push rbx + + pcmpeqw xmm7, xmm7 + psllw xmm7, 7 + packsswb xmm7, xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..) + + mov rsi, r10 + mov eax, r11d + mov rdi, r12 + mov rcx, DCTSIZE/2 +.convloop: + mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] + movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] + + psubb xmm0, xmm7 ; xmm0=(01234567) + psubb xmm1, xmm7 ; xmm1=(89ABCDEF) + + punpcklbw xmm0, xmm0 ; xmm0=(*0*1*2*3*4*5*6*7) + punpcklbw xmm1, xmm1 ; xmm1=(*8*9*A*B*C*D*E*F) + + punpcklwd xmm2, xmm0 ; xmm2=(***0***1***2***3) + punpckhwd xmm0, xmm0 ; xmm0=(***4***5***6***7) + punpcklwd xmm3, xmm1 ; xmm3=(***8***9***A***B) + punpckhwd xmm1, xmm1 ; xmm1=(***C***D***E***F) + + psrad xmm2, (DWORD_BIT-BYTE_BIT) ; xmm2=(0123) + psrad xmm0, (DWORD_BIT-BYTE_BIT) ; xmm0=(4567) + cvtdq2ps xmm2, xmm2 ; xmm2=(0123) + cvtdq2ps xmm0, xmm0 ; xmm0=(4567) + psrad xmm3, (DWORD_BIT-BYTE_BIT) ; xmm3=(89AB) + psrad xmm1, (DWORD_BIT-BYTE_BIT) ; xmm1=(CDEF) + cvtdq2ps xmm3, xmm3 ; xmm3=(89AB) + cvtdq2ps xmm1, xmm1 ; xmm1=(CDEF) + + movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm2 + movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0 + movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3 + movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1 + + add rsi, byte 2*SIZEOF_JSAMPROW + add rdi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT + dec rcx + jnz short .convloop + + pop rbx + uncollect_args 3 + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; GLOBAL(void) +; jsimd_quantize_float_sse2(JCOEFPTR coef_block, FAST_FLOAT *divisors, +; FAST_FLOAT *workspace); +; + +; r10 = JCOEFPTR coef_block +; r11 = FAST_FLOAT *divisors +; r12 = FAST_FLOAT *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_quantize_float_sse2) + +EXTN(jsimd_quantize_float_sse2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 3 + + mov rsi, r12 + mov rdx, r11 + mov rdi, r10 + mov rax, DCTSIZE2/16 +.quantloop: + movaps xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm1, XMMWORD [XMMBLOCK(0,1,rsi,SIZEOF_FAST_FLOAT)] + mulps xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)] + mulps xmm1, XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)] + movaps xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)] + movaps xmm3, XMMWORD [XMMBLOCK(1,1,rsi,SIZEOF_FAST_FLOAT)] + mulps xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)] + mulps xmm3, XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)] + + cvtps2dq xmm0, xmm0 + cvtps2dq xmm1, xmm1 + cvtps2dq xmm2, xmm2 + cvtps2dq xmm3, xmm3 + + packssdw xmm0, xmm1 + packssdw xmm2, xmm3 + + movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_JCOEF)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_JCOEF)], xmm2 + + add rsi, byte 16*SIZEOF_FAST_FLOAT + add rdx, byte 16*SIZEOF_FAST_FLOAT + add rdi, byte 16*SIZEOF_JCOEF + dec rax + jnz short .quantloop + + uncollect_args 3 + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jquanti-avx2.asm b/simd/x86_64/jquanti-avx2.asm new file mode 100644 index 0000000..b7243e4 --- /dev/null +++ b/simd/x86_64/jquanti-avx2.asm @@ -0,0 +1,164 @@ +; +; jquanti.asm - sample data conversion and quantization (64-bit AVX2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, 2018, D. R. Commander. +; Copyright (C) 2016, Matthieu Darbois. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_avx2(JSAMPARRAY sample_data, JDIMENSION start_col, +; DCTELEM *workspace); +; + +; r10 = JSAMPARRAY sample_data +; r11d = JDIMENSION start_col +; r12 = DCTELEM *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_convsamp_avx2) + +EXTN(jsimd_convsamp_avx2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 3 + + mov eax, r11d + + mov rsi, JSAMPROW [r10+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rdi, JSAMPROW [r10+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + movq xmm0, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE] + pinsrq xmm0, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1 + + mov rsi, JSAMPROW [r10+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rdi, JSAMPROW [r10+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) + movq xmm1, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE] + pinsrq xmm1, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1 + + mov rsi, JSAMPROW [r10+4*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rdi, JSAMPROW [r10+5*SIZEOF_JSAMPROW] ; (JSAMPLE *) + movq xmm2, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE] + pinsrq xmm2, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1 + + mov rsi, JSAMPROW [r10+6*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rdi, JSAMPROW [r10+7*SIZEOF_JSAMPROW] ; (JSAMPLE *) + movq xmm3, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE] + pinsrq xmm3, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1 + + vpmovzxbw ymm0, xmm0 ; ymm0=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17) + vpmovzxbw ymm1, xmm1 ; ymm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37) + vpmovzxbw ymm2, xmm2 ; ymm2=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57) + vpmovzxbw ymm3, xmm3 ; ymm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77) + + vpcmpeqw ymm7, ymm7, ymm7 + vpsllw ymm7, ymm7, 7 ; ymm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + vpaddw ymm0, ymm0, ymm7 + vpaddw ymm1, ymm1, ymm7 + vpaddw ymm2, ymm2, ymm7 + vpaddw ymm3, ymm3, ymm7 + + vmovdqu YMMWORD [YMMBLOCK(0,0,r12,SIZEOF_DCTELEM)], ymm0 + vmovdqu YMMWORD [YMMBLOCK(2,0,r12,SIZEOF_DCTELEM)], ymm1 + vmovdqu YMMWORD [YMMBLOCK(4,0,r12,SIZEOF_DCTELEM)], ymm2 + vmovdqu YMMWORD [YMMBLOCK(6,0,r12,SIZEOF_DCTELEM)], ymm3 + + vzeroupper + uncollect_args 3 + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; This implementation is based on an algorithm described in +; "How to optimize for the Pentium family of microprocessors" +; (http://www.agner.org/assem/). +; +; GLOBAL(void) +; jsimd_quantize_avx2(JCOEFPTR coef_block, DCTELEM *divisors, +; DCTELEM *workspace); +; + +%define RECIPROCAL(m, n, b) \ + YMMBLOCK(DCTSIZE * 0 + (m), (n), (b), SIZEOF_DCTELEM) +%define CORRECTION(m, n, b) \ + YMMBLOCK(DCTSIZE * 1 + (m), (n), (b), SIZEOF_DCTELEM) +%define SCALE(m, n, b) \ + YMMBLOCK(DCTSIZE * 2 + (m), (n), (b), SIZEOF_DCTELEM) + +; r10 = JCOEFPTR coef_block +; r11 = DCTELEM *divisors +; r12 = DCTELEM *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_quantize_avx2) + +EXTN(jsimd_quantize_avx2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 3 + + vmovdqu ymm4, [YMMBLOCK(0,0,r12,SIZEOF_DCTELEM)] + vmovdqu ymm5, [YMMBLOCK(2,0,r12,SIZEOF_DCTELEM)] + vmovdqu ymm6, [YMMBLOCK(4,0,r12,SIZEOF_DCTELEM)] + vmovdqu ymm7, [YMMBLOCK(6,0,r12,SIZEOF_DCTELEM)] + vpabsw ymm0, ymm4 + vpabsw ymm1, ymm5 + vpabsw ymm2, ymm6 + vpabsw ymm3, ymm7 + + vpaddw ymm0, YMMWORD [CORRECTION(0,0,r11)] ; correction + roundfactor + vpaddw ymm1, YMMWORD [CORRECTION(2,0,r11)] + vpaddw ymm2, YMMWORD [CORRECTION(4,0,r11)] + vpaddw ymm3, YMMWORD [CORRECTION(6,0,r11)] + vpmulhuw ymm0, YMMWORD [RECIPROCAL(0,0,r11)] ; reciprocal + vpmulhuw ymm1, YMMWORD [RECIPROCAL(2,0,r11)] + vpmulhuw ymm2, YMMWORD [RECIPROCAL(4,0,r11)] + vpmulhuw ymm3, YMMWORD [RECIPROCAL(6,0,r11)] + vpmulhuw ymm0, YMMWORD [SCALE(0,0,r11)] ; scale + vpmulhuw ymm1, YMMWORD [SCALE(2,0,r11)] + vpmulhuw ymm2, YMMWORD [SCALE(4,0,r11)] + vpmulhuw ymm3, YMMWORD [SCALE(6,0,r11)] + + vpsignw ymm0, ymm0, ymm4 + vpsignw ymm1, ymm1, ymm5 + vpsignw ymm2, ymm2, ymm6 + vpsignw ymm3, ymm3, ymm7 + + vmovdqu [YMMBLOCK(0,0,r10,SIZEOF_DCTELEM)], ymm0 + vmovdqu [YMMBLOCK(2,0,r10,SIZEOF_DCTELEM)], ymm1 + vmovdqu [YMMBLOCK(4,0,r10,SIZEOF_DCTELEM)], ymm2 + vmovdqu [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm3 + + vzeroupper + uncollect_args 3 + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jquanti-sse2.asm b/simd/x86_64/jquanti-sse2.asm new file mode 100644 index 0000000..7ff7275 --- /dev/null +++ b/simd/x86_64/jquanti-sse2.asm @@ -0,0 +1,189 @@ +; +; jquanti.asm - sample data conversion and quantization (64-bit SSE2) +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2009, 2016, D. R. Commander. +; +; Based on the x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" +%include "jdct.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Load data into workspace, applying unsigned->signed conversion +; +; GLOBAL(void) +; jsimd_convsamp_sse2(JSAMPARRAY sample_data, JDIMENSION start_col, +; DCTELEM *workspace); +; + +; r10 = JSAMPARRAY sample_data +; r11d = JDIMENSION start_col +; r12 = DCTELEM *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_convsamp_sse2) + +EXTN(jsimd_convsamp_sse2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 3 + push rbx + + pxor xmm6, xmm6 ; xmm6=(all 0's) + pcmpeqw xmm7, xmm7 + psllw xmm7, 7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} + + mov rsi, r10 + mov eax, r11d + mov rdi, r12 + mov rcx, DCTSIZE/4 +.convloop: + mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm0=(01234567) + movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF) + + mov rbx, JSAMPROW [rsi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) + mov rdx, JSAMPROW [rsi+3*SIZEOF_JSAMPROW] ; (JSAMPLE *) + + movq xmm2, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm2=(GHIJKLMN) + movq xmm3, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm3=(OPQRSTUV) + + punpcklbw xmm0, xmm6 ; xmm0=(01234567) + punpcklbw xmm1, xmm6 ; xmm1=(89ABCDEF) + paddw xmm0, xmm7 + paddw xmm1, xmm7 + punpcklbw xmm2, xmm6 ; xmm2=(GHIJKLMN) + punpcklbw xmm3, xmm6 ; xmm3=(OPQRSTUV) + paddw xmm2, xmm7 + paddw xmm3, xmm7 + + movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2 + movdqa XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3 + + add rsi, byte 4*SIZEOF_JSAMPROW + add rdi, byte 4*DCTSIZE*SIZEOF_DCTELEM + dec rcx + jnz short .convloop + + pop rbx + uncollect_args 3 + pop rbp + ret + +; -------------------------------------------------------------------------- +; +; Quantize/descale the coefficients, and store into coef_block +; +; This implementation is based on an algorithm described in +; "How to optimize for the Pentium family of microprocessors" +; (http://www.agner.org/assem/). +; +; GLOBAL(void) +; jsimd_quantize_sse2(JCOEFPTR coef_block, DCTELEM *divisors, +; DCTELEM *workspace); +; + +%define RECIPROCAL(m, n, b) \ + XMMBLOCK(DCTSIZE * 0 + (m), (n), (b), SIZEOF_DCTELEM) +%define CORRECTION(m, n, b) \ + XMMBLOCK(DCTSIZE * 1 + (m), (n), (b), SIZEOF_DCTELEM) +%define SCALE(m, n, b) \ + XMMBLOCK(DCTSIZE * 2 + (m), (n), (b), SIZEOF_DCTELEM) + +; r10 = JCOEFPTR coef_block +; r11 = DCTELEM *divisors +; r12 = DCTELEM *workspace + + align 32 + GLOBAL_FUNCTION(jsimd_quantize_sse2) + +EXTN(jsimd_quantize_sse2): + push rbp + mov rax, rsp + mov rbp, rsp + collect_args 3 + + mov rsi, r12 + mov rdx, r11 + mov rdi, r10 + mov rax, DCTSIZE2/32 +.quantloop: + movdqa xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_DCTELEM)] + movdqa xmm5, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_DCTELEM)] + movdqa xmm6, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_DCTELEM)] + movdqa xmm7, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_DCTELEM)] + movdqa xmm0, xmm4 + movdqa xmm1, xmm5 + movdqa xmm2, xmm6 + movdqa xmm3, xmm7 + psraw xmm4, (WORD_BIT-1) + psraw xmm5, (WORD_BIT-1) + psraw xmm6, (WORD_BIT-1) + psraw xmm7, (WORD_BIT-1) + pxor xmm0, xmm4 + pxor xmm1, xmm5 + pxor xmm2, xmm6 + pxor xmm3, xmm7 + psubw xmm0, xmm4 ; if (xmm0 < 0) xmm0 = -xmm0; + psubw xmm1, xmm5 ; if (xmm1 < 0) xmm1 = -xmm1; + psubw xmm2, xmm6 ; if (xmm2 < 0) xmm2 = -xmm2; + psubw xmm3, xmm7 ; if (xmm3 < 0) xmm3 = -xmm3; + + paddw xmm0, XMMWORD [CORRECTION(0,0,rdx)] ; correction + roundfactor + paddw xmm1, XMMWORD [CORRECTION(1,0,rdx)] + paddw xmm2, XMMWORD [CORRECTION(2,0,rdx)] + paddw xmm3, XMMWORD [CORRECTION(3,0,rdx)] + pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,rdx)] ; reciprocal + pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,rdx)] + pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,rdx)] + pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,rdx)] + pmulhuw xmm0, XMMWORD [SCALE(0,0,rdx)] ; scale + pmulhuw xmm1, XMMWORD [SCALE(1,0,rdx)] + pmulhuw xmm2, XMMWORD [SCALE(2,0,rdx)] + pmulhuw xmm3, XMMWORD [SCALE(3,0,rdx)] + + pxor xmm0, xmm4 + pxor xmm1, xmm5 + pxor xmm2, xmm6 + pxor xmm3, xmm7 + psubw xmm0, xmm4 + psubw xmm1, xmm5 + psubw xmm2, xmm6 + psubw xmm3, xmm7 + movdqa XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0 + movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1 + movdqa XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2 + movdqa XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3 + + add rsi, byte 32*SIZEOF_DCTELEM + add rdx, byte 32*SIZEOF_DCTELEM + add rdi, byte 32*SIZEOF_JCOEF + dec rax + jnz near .quantloop + + uncollect_args 3 + pop rbp + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/simd/x86_64/jsimd.c b/simd/x86_64/jsimd.c new file mode 100644 index 0000000..1e5698b --- /dev/null +++ b/simd/x86_64/jsimd.c @@ -0,0 +1,1076 @@ +/* + * jsimd_x86_64.c + * + * Copyright 2009 Pierre Ossman for Cendio AB + * Copyright (C) 2009-2011, 2014, 2016, 2018, D. R. Commander. + * Copyright (C) 2015-2016, 2018, Matthieu Darbois. + * + * Based on the x86 SIMD extension for IJG JPEG library, + * Copyright (C) 1999-2006, MIYASAKA Masaru. + * For conditions of distribution and use, see copyright notice in jsimdext.inc + * + * This file contains the interface between the "normal" portions + * of the library and the SIMD implementations when running on a + * 64-bit x86 architecture. + */ + +#define JPEG_INTERNALS +#include "../../jinclude.h" +#include "../../jpeglib.h" +#include "../../jsimd.h" +#include "../../jdct.h" +#include "../../jsimddct.h" +#include "../jsimd.h" +#include "jconfigint.h" + +/* + * In the PIC cases, we have no guarantee that constants will keep + * their alignment. This macro allows us to verify it at runtime. + */ +#define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0) + +#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ +#define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */ + +static unsigned int simd_support = (unsigned int)(~0); +static unsigned int simd_huffman = 1; + +/* + * Check what SIMD accelerations are supported. + * + * FIXME: This code is racy under a multi-threaded environment. + */ +LOCAL(void) +init_simd(void) +{ +#ifndef NO_GETENV + char *env = NULL; +#endif + + if (simd_support != ~0U) + return; + + simd_support = jpeg_simd_cpu_support(); + +#ifndef NO_GETENV + /* Force different settings through environment variables */ + env = getenv("JSIMD_FORCESSE2"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_SSE2; + env = getenv("JSIMD_FORCEAVX2"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support &= JSIMD_AVX2; + env = getenv("JSIMD_FORCENONE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = 0; + env = getenv("JSIMD_NOHUFFENC"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_huffman = 0; +#endif +} + +GLOBAL(int) +jsimd_can_rgb_ycc(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_AVX2) && + IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_rgb_gray(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_AVX2) && + IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) + return 0; + + if ((simd_support & JSIMD_AVX2) && + IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_ycc_rgb565(void) +{ + return 0; +} + +GLOBAL(void) +jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) +{ + void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_extrgb_ycc_convert_avx2; + sse2fct = jsimd_extrgb_ycc_convert_sse2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + avx2fct = jsimd_extrgbx_ycc_convert_avx2; + sse2fct = jsimd_extrgbx_ycc_convert_sse2; + break; + case JCS_EXT_BGR: + avx2fct = jsimd_extbgr_ycc_convert_avx2; + sse2fct = jsimd_extbgr_ycc_convert_sse2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + avx2fct = jsimd_extbgrx_ycc_convert_avx2; + sse2fct = jsimd_extbgrx_ycc_convert_sse2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + avx2fct = jsimd_extxbgr_ycc_convert_avx2; + sse2fct = jsimd_extxbgr_ycc_convert_sse2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + avx2fct = jsimd_extxrgb_ycc_convert_avx2; + sse2fct = jsimd_extxrgb_ycc_convert_sse2; + break; + default: + avx2fct = jsimd_rgb_ycc_convert_avx2; + sse2fct = jsimd_rgb_ycc_convert_sse2; + break; + } + + if (simd_support & JSIMD_AVX2) + avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); + else + sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, + JSAMPIMAGE output_buf, JDIMENSION output_row, + int num_rows) +{ + void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); + + switch (cinfo->in_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_extrgb_gray_convert_avx2; + sse2fct = jsimd_extrgb_gray_convert_sse2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + avx2fct = jsimd_extrgbx_gray_convert_avx2; + sse2fct = jsimd_extrgbx_gray_convert_sse2; + break; + case JCS_EXT_BGR: + avx2fct = jsimd_extbgr_gray_convert_avx2; + sse2fct = jsimd_extbgr_gray_convert_sse2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + avx2fct = jsimd_extbgrx_gray_convert_avx2; + sse2fct = jsimd_extbgrx_gray_convert_sse2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + avx2fct = jsimd_extxbgr_gray_convert_avx2; + sse2fct = jsimd_extxbgr_gray_convert_sse2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + avx2fct = jsimd_extxrgb_gray_convert_avx2; + sse2fct = jsimd_extxrgb_gray_convert_sse2; + break; + default: + avx2fct = jsimd_rgb_gray_convert_avx2; + sse2fct = jsimd_rgb_gray_convert_sse2; + break; + } + + if (simd_support & JSIMD_AVX2) + avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); + else + sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) +{ + void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); + + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_ycc_extrgb_convert_avx2; + sse2fct = jsimd_ycc_extrgb_convert_sse2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + avx2fct = jsimd_ycc_extrgbx_convert_avx2; + sse2fct = jsimd_ycc_extrgbx_convert_sse2; + break; + case JCS_EXT_BGR: + avx2fct = jsimd_ycc_extbgr_convert_avx2; + sse2fct = jsimd_ycc_extbgr_convert_sse2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + avx2fct = jsimd_ycc_extbgrx_convert_avx2; + sse2fct = jsimd_ycc_extbgrx_convert_sse2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + avx2fct = jsimd_ycc_extxbgr_convert_avx2; + sse2fct = jsimd_ycc_extxbgr_convert_sse2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + avx2fct = jsimd_ycc_extxrgb_convert_avx2; + sse2fct = jsimd_ycc_extxrgb_convert_sse2; + break; + default: + avx2fct = jsimd_ycc_rgb_convert_avx2; + sse2fct = jsimd_ycc_rgb_convert_sse2; + break; + } + + if (simd_support & JSIMD_AVX2) + avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); + else + sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); +} + +GLOBAL(void) +jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION input_row, JSAMPARRAY output_buf, + int num_rows) +{ +} + +GLOBAL(int) +jsimd_can_h2v2_downsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_AVX2) + return 1; + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_downsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_AVX2) + return 1; + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + if (simd_support & JSIMD_AVX2) + jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, input_data, + output_data); + else + jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, input_data, + output_data); +} + +GLOBAL(void) +jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY output_data) +{ + if (simd_support & JSIMD_AVX2) + jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, input_data, + output_data); + else + jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor, + compptr->v_samp_factor, + compptr->width_in_blocks, input_data, + output_data); +} + +GLOBAL(int) +jsimd_can_h2v2_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_AVX2) + return 1; + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if (simd_support & JSIMD_AVX2) + return 1; + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_AVX2) + jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); + else + jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_AVX2) + jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); + else + jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width, + input_data, output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_fancy_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_AVX2) && + IS_ALIGNED_AVX(jconst_fancy_upsample_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_fancy_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_AVX2) && + IS_ALIGNED_AVX(jconst_fancy_upsample_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_AVX2) + jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); + else + jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(void) +jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + if (simd_support & JSIMD_AVX2) + jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); + else + jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, + compptr->downsampled_width, input_data, + output_data_ptr); +} + +GLOBAL(int) +jsimd_can_h2v2_merged_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_AVX2) && + IS_ALIGNED_AVX(jconst_merged_upsample_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_h2v1_merged_upsample(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + + if ((simd_support & JSIMD_AVX2) && + IS_ALIGNED_AVX(jconst_merged_upsample_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && + IS_ALIGNED_SSE(jconst_merged_upsample_sse2)) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) +{ + void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2; + sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2; + sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2; + break; + case JCS_EXT_BGR: + avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2; + sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2; + sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2; + sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2; + sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2; + break; + default: + avx2fct = jsimd_h2v2_merged_upsample_avx2; + sse2fct = jsimd_h2v2_merged_upsample_sse2; + break; + } + + if (simd_support & JSIMD_AVX2) + avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); + else + sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); +} + +GLOBAL(void) +jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, + JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf) +{ + void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); + + switch (cinfo->out_color_space) { + case JCS_EXT_RGB: + avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2; + sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2; + break; + case JCS_EXT_RGBX: + case JCS_EXT_RGBA: + avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2; + sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2; + break; + case JCS_EXT_BGR: + avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2; + sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2; + break; + case JCS_EXT_BGRX: + case JCS_EXT_BGRA: + avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2; + sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2; + break; + case JCS_EXT_XBGR: + case JCS_EXT_ABGR: + avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2; + sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2; + break; + case JCS_EXT_XRGB: + case JCS_EXT_ARGB: + avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2; + sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2; + break; + default: + avx2fct = jsimd_h2v1_merged_upsample_avx2; + sse2fct = jsimd_h2v1_merged_upsample_sse2; + break; + } + + if (simd_support & JSIMD_AVX2) + avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); + else + sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); +} + +GLOBAL(int) +jsimd_can_convsamp(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_AVX2) + return 1; + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_convsamp_float(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, + DCTELEM *workspace) +{ + if (simd_support & JSIMD_AVX2) + jsimd_convsamp_avx2(sample_data, start_col, workspace); + else + jsimd_convsamp_sse2(sample_data, start_col, workspace); +} + +GLOBAL(void) +jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, + FAST_FLOAT *workspace) +{ + jsimd_convsamp_float_sse2(sample_data, start_col, workspace); +} + +GLOBAL(int) +jsimd_can_fdct_islow(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_ifast(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_fdct_float(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_fdct_islow(DCTELEM *data) +{ + if (simd_support & JSIMD_AVX2) + jsimd_fdct_islow_avx2(data); + else + jsimd_fdct_islow_sse2(data); +} + +GLOBAL(void) +jsimd_fdct_ifast(DCTELEM *data) +{ + jsimd_fdct_ifast_sse2(data); +} + +GLOBAL(void) +jsimd_fdct_float(FAST_FLOAT *data) +{ + jsimd_fdct_float_sse(data); +} + +GLOBAL(int) +jsimd_can_quantize(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_AVX2) + return 1; + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_quantize_float(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) +{ + if (simd_support & JSIMD_AVX2) + jsimd_quantize_avx2(coef_block, divisors, workspace); + else + jsimd_quantize_sse2(coef_block, divisors, workspace); +} + +GLOBAL(void) +jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, + FAST_FLOAT *workspace) +{ + jsimd_quantize_float_sse2(coef_block, divisors, workspace); +} + +GLOBAL(int) +jsimd_can_idct_2x2(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_4x4(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col); +} + +GLOBAL(void) +jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col); +} + +GLOBAL(int) +jsimd_can_idct_islow(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(ISLOW_MULT_TYPE) != 2) + return 0; + + if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2)) + return 1; + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_ifast(void) +{ + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(IFAST_MULT_TYPE) != 2) + return 0; + if (IFAST_SCALE_BITS != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_can_idct_float(void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(FAST_FLOAT) != 4) + return 0; + if (sizeof(FLOAT_MULT_TYPE) != 4) + return 0; + + if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + if (simd_support & JSIMD_AVX2) + jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf, + output_col); + else + jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(void) +jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, + JDIMENSION output_col) +{ + jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, + output_col); +} + +GLOBAL(int) +jsimd_can_huff_encode_one_block(void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + + if ((simd_support & JSIMD_SSE2) && simd_huffman && + IS_ALIGNED_SSE(jconst_huff_encode_one_block)) + return 1; + + return 0; +} + +GLOBAL(JOCTET *) +jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block, + int last_dc_val, c_derived_tbl *dctbl, + c_derived_tbl *actbl) +{ + return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val, + dctbl, actbl); +} + +GLOBAL(int) +jsimd_can_encode_mcu_AC_first_prepare(void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (SIZEOF_SIZE_T != 8) + return 0; + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(void) +jsimd_encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *values, size_t *zerobits) +{ + jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start, + Sl, Al, values, zerobits); +} + +GLOBAL(int) +jsimd_can_encode_mcu_AC_refine_prepare(void) +{ + init_simd(); + + if (DCTSIZE != 8) + return 0; + if (sizeof(JCOEF) != 2) + return 0; + if (SIZEOF_SIZE_T != 8) + return 0; + if (simd_support & JSIMD_SSE2) + return 1; + + return 0; +} + +GLOBAL(int) +jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *absvalues, size_t *bits) +{ + return jsimd_encode_mcu_AC_refine_prepare_sse2(block, + jpeg_natural_order_start, + Sl, Al, absvalues, bits); +} diff --git a/simd/x86_64/jsimdcpu.asm b/simd/x86_64/jsimdcpu.asm new file mode 100644 index 0000000..38e1a7b --- /dev/null +++ b/simd/x86_64/jsimdcpu.asm @@ -0,0 +1,79 @@ +; +; jsimdcpu.asm - SIMD instruction support check +; +; Copyright 2009 Pierre Ossman for Cendio AB +; Copyright (C) 2016, D. R. Commander. +; +; Based on +; x86 SIMD extension for IJG JPEG library +; Copyright (C) 1999-2006, MIYASAKA Masaru. +; For conditions of distribution and use, see copyright notice in jsimdext.inc +; +; This file should be assembled with NASM (Netwide Assembler), +; can *not* be assembled with Microsoft's MASM or any compatible +; assembler (including Borland's Turbo Assembler). +; NASM is available from http://nasm.sourceforge.net/ or +; http://sourceforge.net/project/showfiles.php?group_id=6208 +; +; [TAB8] + +%include "jsimdext.inc" + +; -------------------------------------------------------------------------- + SECTION SEG_TEXT + BITS 64 +; +; Check if the CPU supports SIMD instructions +; +; GLOBAL(unsigned int) +; jpeg_simd_cpu_support(void) +; + + align 32 + GLOBAL_FUNCTION(jpeg_simd_cpu_support) + +EXTN(jpeg_simd_cpu_support): + push rbx + push rdi + + xor rdi, rdi ; simd support flag + + ; Check for AVX2 instruction support + mov rax, 7 + xor rcx, rcx + cpuid + mov rax, rbx ; rax = Extended feature flags + + or rdi, JSIMD_SSE2 + or rdi, JSIMD_SSE + test rax, 1<<5 ; bit5:AVX2 + jz short .return + + ; Check for AVX2 O/S support + mov rax, 1 + xor rcx, rcx + cpuid + test rcx, 1<<27 + jz short .return ; O/S does not support XSAVE + test rcx, 1<<28 + jz short .return ; CPU does not support AVX2 + + xor rcx, rcx + xgetbv + and rax, 6 + cmp rax, 6 ; O/S does not manage XMM/YMM state + ; using XSAVE + jnz short .return + + or rdi, JSIMD_AVX2 + +.return: + mov rax, rdi + + pop rdi + pop rbx + ret + +; For some reason, the OS X linker does not honor the request to align the +; segment unless we do this. + align 32 diff --git a/structure.txt b/structure.txt index f69c9d8..c0792a3 100644 --- a/structure.txt +++ b/structure.txt @@ -89,12 +89,12 @@ nonetheless, they are useful for viewers. The compressor and decompressor are each divided into two main sections: the JPEG compressor or decompressor proper, and the preprocessing or postprocessing functions. The interface between these two sections is the -image data that the official JPEG spec regards as its input or output: this -data is in the colorspace to be used for compression, and it is downsampled -to the sampling factors to be used. The preprocessing and postprocessing -steps are responsible for converting a normal image representation to or from -this form. (Those few applications that want to deal with YCbCr downsampled -data can skip the preprocessing or postprocessing step.) +image data that Rec. ITU-T T.81 | ISO/IEC 10918-1 regards as its input or +output: this data is in the colorspace to be used for compression, and it is +downsampled to the sampling factors to be used. The preprocessing and +postprocessing steps are responsible for converting a normal image +representation to or from this form. (Those few applications that want to deal +with YCbCr downsampled data can skip the preprocessing or postprocessing step.) Looking more closely, the compressor library contains the following main elements: @@ -141,22 +141,22 @@ allow such merging where appropriate. Note: it is convenient to regard edge expansion (padding to block boundaries) -as a preprocessing/postprocessing function, even though the JPEG spec includes -it in compression/decompression. We do this because downsampling/upsampling -can be simplified a little if they work on padded data: it's not necessary to -have special cases at the right and bottom edges. Therefore the interface -buffer is always an integral number of blocks wide and high, and we expect -compression preprocessing to pad the source data properly. Padding will occur -only to the next block (8-sample) boundary. In an interleaved-scan situation, -additional dummy blocks may be used to fill out MCUs, but the MCU assembly and -disassembly logic will create or discard these blocks internally. (This is -advantageous for speed reasons, since we avoid DCTing the dummy blocks. -It also permits a small reduction in file size, because the compressor can -choose dummy block contents so as to minimize their size in compressed form. -Finally, it makes the interface buffer specification independent of whether -the file is actually interleaved or not.) Applications that wish to deal -directly with the downsampled data must provide similar buffering and padding -for odd-sized images. +as a preprocessing/postprocessing function, even though +Rec. ITU-T T.81 | ISO/IEC 10918-1 includes it in compression/decompression. We +do this because downsampling/upsampling can be simplified a little if they work +on padded data: it's not necessary to have special cases at the right and +bottom edges. Therefore the interface buffer is always an integral number of +blocks wide and high, and we expect compression preprocessing to pad the source +data properly. Padding will occur only to the next block (8-sample) boundary. +In an interleaved-scan situation, additional dummy blocks may be used to fill +out MCUs, but the MCU assembly and disassembly logic will create or discard +these blocks internally. (This is advantageous for speed reasons, since we +avoid DCTing the dummy blocks. It also permits a small reduction in file size, +because the compressor can choose dummy block contents so as to minimize their +size in compressed form. Finally, it makes the interface buffer specification +independent of whether the file is actually interleaved or not.) Applications +that wish to deal directly with the downsampled data must provide similar +buffering and padding for odd-sized images. *** Poor man's object-oriented programming *** diff --git a/testimages/test1.icc b/testimages/test1.icc new file mode 100644 index 0000000000000000000000000000000000000000..d0245c813cfb52a7260186c8a9ba5ae6b8160124 GIT binary patch literal 557536 zcmbT;cT^Ky+c5g5C@Nw>5fw#jsE7gz3RnO|6jY=rh$u+!J-tqP@0D~&NJ8kn7b!Ln zD;Dg%_wH>udEVzf-}=s4-#LFA0$Eq~%rCR|o|$C!l^sA+@%M9*bBia=27wBSz(uFk`_`k)TAduecKjk|& z26})d1aAXP^l7rho}fwC_y5KlL6ckc|B2HRbHO0%_eVe=9zP{FF9kFecx`!pASE}& z6613$G5Ejru|yz{+o9BAuya9Cep+Tus+)6OKG?Y!k_k>s%1L!j%P(>+N=?jhh7_m% z&&vPatp^~BK{=K(H%m#LATWCHC0(GT>Kmw24 zG_a-LK_IJ1(=5B_zf1qu{r};yrD&q%ccs;m$>Y|UQ*Tb+Y(t)L+V<}(&)M;Fn0Bpm zAKKf@-{=st0I^WEsM+!I;&)D~mc}|$mo+ROU-5U<3YVbOX=^BJjjpHHJzqa{!y5O! z9+{r>jfPGAUU$9!*|NaL+c&~5cPo8c`S$)Dcm2Ny&e^$k*Uq40yCK2cJr#QgLhgiq z+i!QkEo|4p*zo)aN~AKX;n3N`_oBZZv5j4E)GsdLSlV%TJR?DIqB^lB=|b}Tln-eW z)9o@=WNymZnH`xEpPQann2#(V7P5*&#Y(UaVusekTH)P@0puCfdGsjeJaz;(44lQE zCJYh#NxkGwN-MRIRzt5~=$UGkjLqjzxF}u$KSgjvxL*_?-Xw96E|krX{Vn+@f2_Es zJfrGVS8Fs{PAR%9OBbWxUB1cSY@BKQWqMh0-8@v;SXElht-;i0*TvTFY1rJjvdOL) z)cmGpy!A|5bGxpC*NN@Q?vCx*)9c;0{N(JDfBIh!j1Qh3YCfeq%|C-Zn{zI9IA~ojxm ztc9~3=FGF3J=bQQ)x2N!pXNVxxUpbl;k89~9Um-yu;iZ8t)iV{RR}8+-rg zqqt|sULJoF{~_VaiSLQOl0hkxQYWPTP5YJpGvj;am#k0O?{nVfzRG)^|D@nS;oYL~ z;%ngZkO62rtQKC5P#}3IG8&1=$EM(p00;1a1aIP6(qi&#@*m0@>MhzidK<&Q6tjrz zLe2?p7;ih@P2eb;F8nEaA-*X&EA5aOON4Sjk*hqa+N<8IS*4vdEUbZ>`?hy?6fL_R;6l zuFqj#V!tMT%le-8BmZa4KPkVWe+T_>|LgFd{XcQ?q#Khnrkt8OW!lE+kv6F_ifrLC zk+YDqk#i7skhul(GVJ5$M>_0Yuw~)eMP82E7Y8iyck)~6#qxzKX05bZ`D4{v zmq)8_tQlE*(zRt>rJH8Ga0AsH?NQ*FvN3kk0k1&s&70S4S>`j}*TxUz_jT)=ZI8C! z+Hu8yIAAcaduQvehM=n5hTzgY%DvJMQ7CU8XFu}*BaD8K7EX(xN7ABbhiHds(exO` z5oRp=C^wFOOn6)pFSEQZs1h|v+T_v{U8+9KkZ#N{WmaUFv&}h`xs`d9`IQ9~g{GqN zVjWluQ9(;!VmKecLQ+u#G#UfN=HoJfMEp@g1aU8E2icplj=GHIK%c>w$oR&5!+OZR z!8y+z;I;E>1?57eNFb(504ZEnP?9B2R>Uh~RfpA)nsDvG(t~B;x=8)u@>oNHG0l`$ z0XGvWIaSK)%9_sFk-B^JpBkq%IXC;XM6~9#QQP$${hbfGC-f}q-PM@ z{LcQ7^wa(4=YQ&drTw1syYbI~KM(#E{?GY;a}s;G&6*@gqa1i z5@zq8v&C-N+-Y;)&%0*dIbY*IT!3GQSp;@WTYPv)pp)BD2WOD;%Vk%Vcdsz4ulHG>t4EDUq85^-d*j%@O`_S*o*1>Jf z+YLKp{)_-jVE)deU5A50c5e&z*t24?)is?lW);-%l_R*`y+J z3}q|TnKqgBl74~F$ds`#>?F<}?s}db?>qmlU{Gil@x^e-3F#i0d&vU%1o=zFRb{Wr ztd?kq+QQO=vV%H*y+`?SgS~O8@we$y#Y^+U%3D=q)fa1qYfsk=*7rA@Y&_Z2-#pZE zruBT=mG)a5k2>FUeebdAozv%h(xX3MAaXEm2ziQo+Hj`l?6q?rMrNFM9o=&w`66&h zbGi4*{jo{cR$t$Lqxhy`eDK!0JB#iH-z&af`ryK&iH{FFDS7(r`Q{gtmp5Lodjq_^ z{@(Kg@8i4A2fmoUS$#kHqw?q1Uw*$)e>(qK4gkP%d(wnEk2^Htk1NLxG_Y_YY^Dr{ zQ)BmI9^p=6lam5*f3Sj}L;#1Iyfg*>2Uz^84GU%>9?!zY&>PO(!XBi}tKWe=Mro3| zV+mvhT8Ql-b)0yCn??M*3xtyp;Y-SZ$HaR-b(mGssSo#J7K=R39>J{QBWp)7`?yZx zYZwaK78QoM&fJ#Jgxx}?2M%NVXzLeS;V#hTeD6g&l-1oQpdHj_PdA{~DdyGeM#o8Y zqGEKV$bg)QnZbV=FUG*Q2LoC#f7$C7v9N>8v)`tn_S8V`j-!Gqkf$6`$Mu6%mr)XJ zns7GirSd3z933a?JWfMj5FhYw$3zO43%6lTao&F+Bh4Mh@5CZCO?wB8$i^CGB^vp@ zVmtpKYQH`V)`05MbR1)%w=1|iuAsYQ&;<_YTfC6ZK1ladMYnB{j=ft4l8~$0ip>X* zDGi%>_mF3+3Za)#LB`o}HK@Kaj~z46TQ##B7Nf1X+D}UmmoA?f|A1&Zm)IYIXzOQJ z_#@tTCUT!4W1B{yQ;>IRPQ^_@#aB?bC!;RvugniYMRMnSbVac4k#7D*q>WuUc^GkG zEr9GpQd+H#W};@)-`+NkLRPG@S0m4GRBztU=VI4g!_q*Q`kwRD z$(R^JGsOe5iL;1I!t4cKB>lp$j%Je-nCII$Bn_6ifJs3D6JDLA*3hSpfhamErkhP} zrz|VSlD?BCu`ZHQNUp^g;%6c)b{5e@wBF`RY$M#C|AP!8O?vSUr9k-oN)#E!hjh&* zDY*;v^N3&AbD4A^gc)6QnCL(si}_2qK|^nKBfO%D>}ezv)$}})T&(_nsewdP;5%@{ z2HDoKQsPXpAH#>B6_ggPC4}>KMSmkWau)k}5jLubrT&D&Dn0Ej{*m0Szy`0AJd2))hY0`rM&T)f8FTFka@Mma$)p+0Nu$?@OX?=I zHWMPvFSV2L?S>81F??9*sl4C#X{xTnOMyr7O+Iwslg!6%DZYyp_}HDu>>EE{MNoDS zS_JsfCZzf^esj$|%5&gKMO~gJAk@D(bPPz<-q>;n0IQG8IfS3YY<;wrxN<~1QcKu8 zw6$4=Pwm;Io&=n09i~hJGVAZ;!hlUx?5I<~R8#bp1Hg3MvDr014s*}LFv7(fox|h! zrx(MT2Jv&xB&xgt@X2}PFu<+@nKOpF*K|H=8?K{n@8(OmuJYNlU2&foZ|`gqB%)=f zvw2ALuKH+B1A4anGe?4XR z)MV~XGO5m$O(f-)G%$Y=c?2S(n(#4;MQ0Oe5%Dw~A;23+3n84CZN-ogDK{H=ApXih z6=yl;P0epsGV7iUV0>V@;1AOq=?$4#Gy@GC9!ediLcO3=1a1QpOIghr6PP+Dj6Kb$O zd5HxtbE#U%xT-D|U#4d$remMdHcDTn@1yPzuMfje@&zLsZ&DWXPuhl2SJD2C8QJx9 zr~4GlhgBf+SH=z_Ow>fXrVGS8r0O-t(vDD!itz)@Bku+%03$RDv!OX zv%FWpOmCT7@sZwNXAt(#;wtr+=~NHn)KoQPbJ?>4Gsp>=T2B?(M&&=li7cggUeU3J zPo3^*WKQY}shCfX@0cfeM*Z9rg5F6PskKV^O1^D|?Wd9_8&f@3lFCZ`Y--3>)Tfv4 zvx+XqbpOMsJNLnKoNhbtK;TK$bw{C86l5zpC6ml-IKAJN)LoPA(Mmd|eQ$G^#GzJ< z{G<4Yifa>=>_IKleU-GKrg83yPM};6_JZ9gcv2=W74;*uh%=5Bx+inIG1%!?-cnrO zu%G-P>33_IYz6U|Zi2Xk@QeLZc!01EzK*{OesyTd zrJ{z`NKMt zZbN!I@KBOBf~ZRuo*TNMT*Hs9F>*I?o^mR(C{I=5gS;B+CTT#W?2HnXp{K9@B>aT_ zZDkhXaE8uC{X?Rt!nO1$K`F6UPsd**oRI&<@5sF+^8;QTNf!45qk#lr7jSE}m(T_e zvC0rkBFH;vx>u}zlY=&cIYGQqWlzt;XUiREYjZwGk5gC2FhuVtX#rh=x8&z89Dxrd z&dQzFM7Y|H);Ws57=CMXLZtAoGM4WPB$R}4H)Vg3sM!4IYLOSqGhm{?i|M=S2Oq|~ zIq5f7Kxk-_lpWS&7^bUVC^rfID9k1PxM-O{!pyc3-w9(oXpLE~_mb~mtK$lD2xA zoX$zFGaD^=C^!Hz2UeX^Hp#|KvXXwvC%s7O0ETEUi24!_sP+l!4(ya~=dan)D{bRt zJ3*xLc!?kzz5uYU;8nSmkI3AOzsfu*dv!n54~x%fy%q0Hn3WAB$M)BjaHZK>4oIg< zo;a5wFmbHmMm6p-TYMo zs4z=DiO+}vKx=pl0Y}r$%EUIXbfM9)5k=mmv#ohnbVjq+{5YXgam`qUr*UDO23yYG}UmJcVBr7Pj9s86rNcGaq zDDfTrwIxRJG?6W68`l|Fqp9m4A;6rm=1PPg=2G1+#KxSIDkdWRsE;WHaW(jzz6P1) z`Kt6O%G%+5SrMuS1mdp&Kx#vK3trFJ+0+koW0LA71Gd?pDp@#nY@0C!R}`G1tHBXH zbF_DHCmrfaw*noOjA?q(KL~b}_{Y1mX@`R7UG3!EDy|##!8F=tVV=?0>S%6{D==Bbf3^=K9^SrC%8_ z9%9X0hMU7N&0`+JYTpf0(H*?xRAkLsgZEfi-d&|68e@P=i{;Zy*-Ax2>ns%wIGaEASrPF zu2M_%4y#q^T$4q`10cnjmd%w~W?mA?}09 z_sZ1{_DU01VNug9Dzy1}6N|aL?rQBVWK7kh%F2uvMuf>T`kroIdEKsG8nIsOu2fRA zo)+csAH^O5K=PZ-lhQ0Jhv&M~9 zbW3iS#j+^3lh0)b0Z~_+xVb>k@?Ho&VN>797l%Esy2f+aHeeEPGuK=%Pv$w!wkm(X zxN1?B{lz_fGF+Pk?Cfk-Z7Dd=5+E;5xKKY$<`z~{~N`OT=BX*XbWI z{!>FAi%RPoGl_7&I$iY_3+6YQOQ{KEb@}RhVf0F&%6c2d=%=h&)26#G&zl{lpTeAC zQ41}~r7xoH3vQ&Nv`U!&wRx#&IDuXJsQgA4ta6%u(YAa;SJ~e+jk+iWd3KI2i0KFd zG5-P}&eh)c?PqZ{9qXGE`EQyOb?XzTwGmZ6!%*ge%46FAOP=@3n%iY(6x!K!Wo^v8 z7WE1Mk(w@Q;GZDZ&&9%tX*GlS@VJ;cJ=frv;Kp_#eB9f*c`D+F^VbF#qQ?4F!%fs~ z5QzT6qLMCXfINueEEboZRxrTAR!5I^U&96k7q)@1YrSqZj$p%`zt;W1YNn>wy~I5O zff(~GDs+@g9fIVaK~n!x=G7|{ag1D)m}Xfd?J3CO|6*ted^QNHN-y_ zV^Q$RmNoRfUd{Dsv_H$-YC`CbryZ|WP!3zv zZj16Akx9sqBd6dZ>$D|(1^oBXSshYdNbs^|d+tiF!Mekod&`biJ9AOfMyhsFv=(*5 zqLvI_RWE~t59t+)($@5HWV~o%d#$7-*r&-|ywB@V?QwDFGC|b>QNVPsN=M35i^>2% z#6QDso;R&JTEB65YQ-&Spv@jr9rZu$q5+_f z6jU&RG5!suM3ndcMtwzGGGow3kc;^R=m8`H)`{7Qnt$voW&maOzlMdQk2%iAUBb+I ze+JK28uvC;8Wr{&<*(MJEWL&;OasMHUYTx zHVf{;17FX8S8*bGuOqgwB8&pW2t&i^LT1ueLAIdgP{)q)Q5_WVb|^ZVOn3N-en(#L zS_b{DoHC|?k!4qUoZ*Yamkbl&T|!%SBqE<@DE@+6&B>01AaAh_Y|BBB8HD+A6peQ8 z<$LI4qwXpQD%9n4F<|R8`Fb+!x}u%=4$hYa7xf^HirtQ+BOL_3TOr6+?wEZFGKMwp z1p%_JG4gT=WUOX+=UQk^#TuPAY@r^@=!T7Jmlw+5M&;a?97MJ(%};`yD6W|ojYwzr zJr4)B_b$1V3)$ZGx&04hwBc{rTqv?Sh29R^WpXQMgwNN7Mf2e|)USN6A<~qtbBo{x zw#%~^@cv<&iwN+!ftT%5Ad%gm(uI&8El}DBbg(WX|2a%;esK61JjJlscN6?k=_R{% z_*9nZDIYxP`q~R^V8n$PZGPadr#-d1A)?+h)W6VUZQt_UVLlDd4+q0RHS2v0aG2qM z-6|N974+Z{X#?!Z$R?r=zP0%yaRXeY{zJHgm_a>CU?Nm`?+M3|`wpKV97Ij@bs?0X zuFSnhQlUBbmlCvu?sMM=Yw_Qj?%;>;2UPd)2w;S=0>29Y=QiTk0s)7d@E(BEmbZ8! z;ARIWIs=BgBz!2l@@yFJfO)qu0-!S7mFYkPZGdzY*hppNP=E~-eAEmefXvza3}BNU z&e?`o96y{3rpSUXe zgW2l<2=(Nx6S(aT%4DT2JXV%mTABuma;$o}ZD3@h@F{5RFM!u{O0~ zN)&0fH!uVAV>f1F$V0LHuP{q{XI6bgpJ}@;2}hSSJi-N`wbi-lD)g{P95#;rroXw# z4dbXiVjG27L!Wp(1>=3mp?@3t;@M|a9_YOOQ(^+ztMeO{g!XAZpY{!%S|<%VgqBuG zHg=#}%l6NBj;^M4Tsp~)fC{>snXh0|Dkd>1U|1oR&Vf(Ec+#ZshP27lAq4V(nz{xV zveAdS3gtQD8hs@iad8{-18}+v%_sqO7{}?!xKTj{?Jy3DUP{fw#iaa0sli3;7g3yW z$2`3$i*PcV*Hk8Obo3#kg6Y_~jGjPO8megPXj}O1R1c~x@+0L4Wp46Aas@efAD8Sx zl6VA>myv>OoG2@a{pYcCt{Bo@PYV$`l)F$@@y5C5DH}K!5cT97woTG$(g+h8sw71) z?A`N7!SvYa*`(K$))5z4oo2i(hMJ?S)QwWYO4J-Y`M9JXP9n)fPKo`bsr*YJeMBMm z;)Yrxmh)lSW#R*h{@hm@$ehvYPU$vem6^%vG8#LBRHq(?5s2>;`6sRrkI3Tp{zG^v zezd`wa8YbKErJN5>_7X1TGYI-Wedf#F08bkJgZX5f{>hzi=akgTG_V~4us2^J$nxm zjwm7PQ3M}($keBVV2agQH|nI`zs5DC!`b74EjB% z@o#iL)?48_)CE(y_!{!rGki+T@b;$jWboh(tve~C2g!Iz+}D-?@g{H@LGfdRDK*V| zLh(ZLH8&|fUhOh96z@$|^jk~1A*9+^5d&(Yv=tDcQtEqN4QzAKcdj+;)A1>65qx-e zJF5s`ST~6khGb2tVEsia`?|y!oLfzlFdRFqn8%O8VyP3jG;CDSH1-?p!DEYAG;D6r zC^H{h;<}o-3;T158#4!R=;erFXg8`C2)wAOl7D!cDG>5w4xCIc_{(}pIu*BuDImG; z`oO>wJJu#M4iMWX_b{dtU3&6`fdXUYWBwXmiL9Nwg;Pp8!!BSw%74ZD%uGA#!7wov z?^;M#)90))(^F~BCMVFY6HB}I2-1~g^95d5NrSY6lPH};6tm1Ca^6km8bM%e0R0s& za%U9nG3WG}sWdI;++DEltqvAZ->f|O;g49$Jdpq4}&$)yKK zIs=WgZK8Xt(bV^%tyX6!pNQK!tx$+p+fWb7daSsVZAAMHJ?}#21k+|tcMC}LgLSTc7T$_EvnoCZK^K`kV%E@N z^w$Ga)PSnX2IgD!h1_>zZKTgmQ(I}HO`BzT~7uZji<<8G6mhCD=P$hJdS8TH~9u&Ai# z!g2VD*k#W!^RhVRyS-KJvk^WkA1CtiD zQg9ig+IEW{fO)XIoxcftXo4GO1b3&dQyom5sX-`?kw3G!G6m@evPI%d+LS(7c!#Kr z4Cg-}dTv|7iz2*T?!@yZZk;fPoe#9vcB!Ixp6U#FF6RU*Puj$4L9~iJn48k(3Vzam zMqv4qX`i;{ap5%IWnyk9tr7H-6#!J!j42CB7?zJHg;FbKk>sJs2yYan3W`%b1&%yI zcnfb6=dIs4j*)$PnFA-6;|_Yt+zu$KKPg6ZQXkcoGaL^g8QIrjAVdcy-{(XN~?G+%Qu~*KNq*^|G-uW{YtUP9lXEl zw1auv>531&{p>8oMCZfo`QqE4GxQpOTE&yUYROWB%WUf((f5iYsv%IaV6-AS*_kIS z9}auUG3r8nW7(k6IOiEGp12a!O?L!k)%mq9NY|@cObO4bQjw zd>E3mzw)@xFIJ5qb!iW4hZqWKqLl+k{WLQl>?_n5UW2;`^*S`(}jsYdv@Yj}+lo~fj=TRX+KN8xMKSL`BE6WPd#RX7}7rNm1xY8Fb z4P9QMK;QN1l~!ZcFS;o6#>gyVha3P@R%~LBX##)!V-gvkQ<_H9=QpT534e|+Rcs^B zLJ%??A;`;;Z6mB(lq+3GIAs|HoB%+jbw)AAou91x$|3?7Ex`DbcS{vSZ#Z^X9!sm- zJ1iZdKG;+$zC?vDnk!jE^RZl2!T^x=k>RT3DNm~N7RhlHntlN;Z=#aM)5IxD=$x;6 z-6X%+y_;Ob6WQ(y&xqHv-dM&CwHD=PSgyIteN#qPe!~9J%$IxTMk~gpEl0o0#>JF9 zT8Xdl$;M96Rv~>MNfga{X;IbyNTVt*HGSl!mrX8D$HLT%vMD)d6v66GNAqN1%D6qQ z;s&{JW0FuMpS>_#SjJxh0@2VGHNSjUgBRy%slH}5=CgW***<5leB6MH{UW`u69xB( z_m|DwI7hfa177GNoXxjPAfnk=G8Jb1kIuUsOzEN4Wf+p`Vgob#x|~>pjO9rwmBip2 z(YuPBo}+>rrBw^Q37UBiKp<)z01|&_o?1Le!PK>Zw?KJSW#InQccvjo&Y|@3bx_a1 zva+eLNp9~o-(gjDd1Z@XRhD*PE$VR7a`Xhs(%KBbPTH+gPobQ%v`Ywpgq`(KA_3`8wUwX%9BaDGqa`=!UvTb4hL#4fN&aWl zOIfAs@QPb3lbxgTE#sj@S;nwJ*Sc2a58`lDntWgJRg;f&EZJQjD6Wne)ry2q{L@si zf`N4b3LD{EyV;75tj!=0r4axLdg@k{lZhuP$IA8mS}QI**iv+ z+qE68<0a>mKkTgK$61#7dlX9z`Tx`+TXP8KDjgc<6pa}!)_h2EE$gjBhu_vrt0>-~ zR$MilckL=kQ9iMoQu35#nGHyO2!LqN-Y3NohzA{CimS73w(J70i(6fP4m=Xds8&O$ zK0@<6XziGD^dn|#m?c+9nPE{*y^~QJ5bYgPkyo>7nztdVj()DIM7D$;tja^a z^+_;|p?0nGGMq-?r)x|b&`FkUwE!UM-R_Ts7l@>`-}uj2@l7uH4M!!lT%bARYvq0b z?6b&N1>~%pTaE`r)9)FGco&PxwkT=WGlmQ?q4f*hG4omDa_ZHiZZ&wy^$?Lckg|Tu z1%rw_w&J5cj7rVNq)>YDd>}!3lVI>wI2PCaEEiV~FjmRGL!Q02-}qW~Ym2Ym zO=e!vQ7V(@Y?P%U>P?H1ThxJ$B(oSc)_m0HlyRyqTlY0~R%K&p>R!1qUh`zjJe@%^ zeT7;3OSZ(OPuoQOXi+GOvhA4DxC7SL^u5+MqqX)^CGp6Y$`vNlUYeoX=(}ZZ*>C-< z75&;1vh_9}v`c7nEh-TJq4I`3i+<;p47C(*NjTDnE0%{BcM`y>1Jo_|z{Bpl8<#;+ z9H-QOhh$I6uR8#vgFwWU7L{=>07=WO9PCHVNr>;wM%)Z9=m#Wb13QIt6Hy;$pP_=6UYz=UmqH7Q7hH6N?JNG!4#1a!N6K8`FCn#zm52stX<;%F2h+t+PzT|ft?p<~ zgk-)7vkdY4@hmVEn=zUVp<^9ee?rliC)#0HD&`#R4tyWxe8C39Cd}jLIHW!1ho3d_ z9oEj?8FdJA@}XzZd-~-OId~#1r6mC}ooc5EhE6B{rea}J$#e2P!KaXR9^L`}N<{el zKnxNF=MEyq2sZaK3x#5zbLWd{1iekl;yzxEsvdlav!5~t@{|S5ZGe7d^dEACeWWkj z@(12PZM74@ndH}Z&lW(msu~a0Mb9-zA`CoB(Ug4z;w{@0xd9p| zp0#-=^nqY#b`zAu+;=-H?^P@7)X4&?hPXOrp?$SyNl%f7X&a%a__$7u^7C>BoQ9s{gJKhu;T6yZI58)Hm!a(>~^C!hY4R@{R)nUD=MZX z4kA|TTSJ~8l%?4lej!e(!lv&;uBW^oISg|;yQ3`>7BR3wmjHux-DeNOnp^r|XW%RA z-4mVRbycX4Lx|O;j~g(EKu!L%_lR0b+bJw?%j7j;u%k5)h_DjH`wk}nm%g|?FS zKx|GNBi)92?`j;KUW+tWeJKxew$AU2@;rn-~N7|5U|HWG{IH;Hb9%U~*@ zgK#K5f{=~>6nvTxgx9S5hd{>XS<8sNc$@xe;%TmHohNZVo2@)cKr_G53JD+vx_Bqv zMBjYe8=p0U5lSAh$wOd z>Uisqc>^TQq1_-LgVp5<2S}`GQ*^*n5~k0?l7;B0Zo>y?CGr@cQk6oP2l&W$7cRy< zlOBzWz_p0_gMx6Sf~K|Cadm>ZQ-W}RNZWgZ@F6u@s)hlLIjkfFm~7ZbcE)ka;tICo zg4AtC1-JzY!Y%{$YsvDp5!mn2os+-f{7F-Lc))1ec-3scrD>zA1V^epK?=i7G;8vU zv8{%%*c(`JS;ejySeUk8%{^?qf-t!St0Ug-nFqiJPF50dS9+4AuW>8eP7;M!VWW56 zDQrMZNbGJ))~U3!PY35if8*m{g>H z_BBQVvLf0{PeZK?m`-a($yPPcexg24yhS5nGMhs%JvrG?_b7?1C)>-(-OSn*jpRXQ`@~hGbNJ$hDNM3NJlz@tm zj;Eia6pG`b?8q{q=e9qjZ~U4SZlu@zWfOi7pWvhGy%{geGc~X1g$4<`k2XtZi|nG@ z)Tq)YlgE^($Sl(DlB?Swq)^$8<$WX%v1-B?aW#H>oq$o)v|1BD`%^cOJ&h`<>_JYX zL>r}P5#(^)gNRckqW1IF_ry_6!tz9-Na{bKjljmQt#f89>S3z8Xy*2QRvvY0(<#Jp z^1Ir%sqLhPm3jKjgEMjz@u#EThpP)NbRs5*2*4^s;n29H3km&xGFr zKNi{|_9VXIZ$%6pxX7(ShWjk!h>;^sQ@Gy$`Mi>L6Du^llf~e-N-jyJ0q4jsgtvgy z;y3&!z~aQoyd}5~2PSZ2xO-cY*?8QwB{23M+$YQErU(F)f01rx1c>`ZFX>B3rv#^I zwnblg=c%(!ICAGw0{8o}C1mmDJ{FejxcC?A4mk}3qHMJ&8_5P?p=eb2ny(`c^DlBA z7JlX4=4?s`=FDd`?!&UmnV8M4OctYNF_(Ff@yK#z?g3c7Yb4HBo)Xy#-Q^L)>HL|} zyM-&ci^P<882hL&BlH~e9>2z0#u(zAUc7?wk;}JS!5#!aI=pDyut69pQ0o>FLU~Bd z&w>mNUis_zS=N9&Ds(e5Smy7&iZNd*a=gTF6I`%d5s59?gAbz2+D<{Yz_ZE*U&*sJ z1?QjUSm}+&*Rz6386gJ71x<=q6J4gd>Byv01sV{D@Y|voqWSG|!CwCT<~aNcZcW{% zd>?jcRqrt~^F~Ev$QH&4<6f^wx<}b{$1U^)0?T)M2qep=(vkXGc_ikx(vJLERIK_= z!R0KJ(x>qA5pl_>V#nQi(ps>;N0B5Ivc~?e>;Pnqr4OAgS^oxICu{?wNvnsEkprre zuosz+6?5S6F~4LA__Lrp5&{D0{!;9Ucw=vnTt(cr^dSfUmDrR~a3|?6H5gnxa=KE2 zv&~#u@&HFBJb_>y+cI{*k6Ki)W^oypxw5~Q+C z^OZ2<;6o~ry(}>BFaMY1?D{5Nu7o@= z=8BrACZVfwW}rLYuH0|^OfE(Zoi~v;gH5$4CIDh7YrXP7*qBOGzC-?T)1(6T1cbh? za9LjD)i>};TX$_+@sOoAa2DlO;|mSMPB9~)oP3bs4fJEeBApnP z6ShuEg#EMix#}~VwVI?Xfj_qOQ$K`Xvb1Xm05LnNZL!BO#T7>|fAi4g{g}>pW?3et z?0``djWKOSC{JTft=^=F!Q8d=S8c{_wJ6I-is@1Ll9Ga1K`W*~-*0?naDhRb2FpEQ zf+^1B%ixw{<3GKD{n7dX{iiU$QG-?`@2*Wp#~<2MnSnkO69YfW{mD%Wp3#u2?{4S6cX!_PStEDe26+{_93M0I%|*+=4~v}>GAKK^kqqe`BPd- zarnpos{PHqES~~>eG%h9Qe5>8dR^4viY>J2UEp#G_5DWAG9gv%#4Gipt()>e^OIEn zUxj(!^hY$M;9y;~ARuXKRUuCrHPMvBb=kE_Z|2x}{wY=pEMH;T=lM_^RR#alGRSc0Wuf2ffR#ZE*H|K6@d0$m- z(~%`zzImFEE3L2d#eR^c2?hAIQT1Mh>*q=8Ulg=~K$UC3sDD-2pyi9TsRO80Sz(dcBFg+%K>? zhZVopWM0N1&tF|}gh`z6q3k6-{9onK8!o$&>fezhxgS&0j1x}}Nv^vfWc&G7@dW*A zTPl2n*7Hx8)^nFl+*8_wXZ)+?_FU1PNZsB(qFxkJ-!!Ip8TXo!aqGw)8^D&?cwERF`Z4jbZbJgYhk6?e!DBzY9rQOG@euL zo^LX6xsek)G@tM{{;O^_7vgiwLo=-#2 zu7wabBN19?pWQ?x0$P2&H9rP+cc7r)HhfP_NKqiXRkp8~i}1pO!Iu#|nWv#MkrCmq zU|z^;-Wl*^s2#IM;r+NI^~!;E;oi;ag9YN2UtO5}k`vlHmdj!B%y07-GTw{*3Z!%hwytO<%`-iw7)rGY z8v!?wA8rC5MWkCZuR%T$JuV}&L?!t>TXO;=BNc~ouZjGHHTm)UF-&B^L#`-oRZ%>f ze_*s&&opf;2k&7lm^l-Cm14T&lF87kx?W}bl&&yM%(PbZ zi2V0IDr(^yJnt9#b4P7ciyzTvUbN3>uf5uNCo8>@VR)7O!?=zgl1naohFXxnK>aku zr9h+Dw0~jYbLl%zzoMg(c{6SnJJRYd?8#W#Y2P_JbGT)KVMcaT-2g8>=USx<>7JKm zIF%fe|Gg}CUqRt6^%)Oyp;W<}kzRzLC5^VHcbz)X0c0ljVagM;#ygU^%A5mDClI;0 z<24tOjQJTA??OilY7F5Xiwa(8-p-g&_>H#e+)>2iY-TG4nUcFtcM|zD?-h3)jV{K8UdGsi-?~4*T!cE=_~KHaTh6?IJ%?^kIVFk^)Mp)HDVCH z9KI8I9DXjb7Wo%}47rIaLoV9z1s#IqP0z;EBTc7Mp{~H=rW)u|fLgj3CI;MC#qeFY z=g^PvFSzy-WJEPiw)X<^D6V*eHS!5AU|I`$6ZYEB6>vEd*=PgFW^B^Hp{r>hnUkQ8 zse_O*SPi8&Aps62U)(c>SWkMl-UFc|c}-i0d`^H3#uXoz%&nJ#U4>uOR^a#in{+9p zm3tnX3Z=6j#P5ZrvDOE_fjcu)ZaMHGI%BF3F+vs&tSpMt_SYp9yQ!`#M~gqo@6s~B zJ<`v`p%AHf)A4;!u%IT`5$45XuIq+nbCFY*!|#xR{&j`iN^EUfQKE4~alUwk?gura z_^ozR(Q)tvWz{hZq`m~YI}4gAxwOs=N)^Xg55gVD0VneczPB8x(HEYszbXG&gs=9b zaEgOW!a^$8MSmc!8!|`B3L1r+RrR^nKu*h7Si@mZvTfh{0@2CMH4cUGU6bUIMbld7 z4&l13mE z8cjS1?1MSvJpp#Y?!*=V;c&xFApl1lU9$oBg*an%hA;zpxqUHiCic9sANvE_DwbjI zVU+~Sj2$c`_X74dmUsk#{f=b^cHmZH1*`LL94y)@3E*P|tz8%qZOm{CL!rJE&BUN7 ze}JF)0C7IX!2>Qt^1iXX@dS|&?s7Wk2yz4d)>KurI|jvjk#bl z?X0DI>_3b${c7dkh&_s*GByfJJ1FhJID%o4cUVi-OGy=eLCjN$H$gOi zK=O&0VY6X*lm#0<$j1^u^%JCDiFxwZ(kY}+>Mltf*%5>jtH}4WCWsvW zPxz6gwb?|E+47zX;?Lq@YrEK4v{R}TtrFRh6+*Evr2}<+#`h7&x#XJN^Rl=| zeZk0D*||=j>31~w^7r@b7JlVD>GZ?j(cpV{WZ?!c6#_(PGJkpvyn5kX2J;7 z*lC;x%g|gl_#u2buXQx!W%L=XJsMuBQg6Y0PIXo_;E2nws$B3}JtXQ@{AZiyUFmxp zUK(of#Tr-L2|_w&rq-Xh9Q9q@N19o3L{&)owl+_hM82_nk)od>cmJufqX=v|Y+-vf zxa(&yGSn{GDh7)^Uww?xg|br>GOic@t(?kqPf;tzFv~;P@@l5eovmOpk+vQd+alpT zv^#|X%AcCaf^E#tDz;z+a-!0OKUDNxewAOCd`@ijf9 zRiM&NbzkYi_^K?EKSMamKg$jj1<7_uwc0T zPUT?pcte9!eeYJ~S_lxdnDn}^GK9%Fve3?+e*z}H;FT1rd z3YsC!?$YM#Bo6I0Yp#gLwSHbI7DYCBx*|ktHKTiM?e)fc&@=pN^=WVw;co3%WLWhP z^BdIBY>8O~fYYC?I<33Mh+k5qo6EqtI2e{P7LHKr^R;LAE4g|Lns*Z?GzIZ!mD-vaJikna zK7#vv)e`MC?jMWqXzp`+oG;q4V0uR>JL`6NH|LnSNVW^R-RLPjUa{RUL2_rqRoz_4 z6k8@yvzWe^qCO;E<=n4z6^+^ssyxdvLkh>uMAe&##cD9l>@KoQby&UVO+F-r42%(I$ z%ywR{?30hq0Ln4umHkIphig9e-a_m2wO!q1d~JTaE`7VYq%|w%yz*G{r$t_h&2{sg z=PJI*BSvHiWIZ#W5#-p8&#=Ftv)bk%sD<~NsmNbz6^%}4eE8P-6_}kqWpxj*#g2sy zo3Y-b)by_RSP1D(`&C>Dq`&0}KCqD2Z5`;4Hs!EQ?}IQ(wf8VtPt9-`7bPI=(8LftPpzFC}r1? zz|ACTTcf#l;PR#n&R{`Cy^_OEnOp15fvr$j7IVtyx0p$sLdQ*J1;;Q#@qq0Mr748B zEej-6(2mAfaesbO9ZmEu*<|@HDqS(h>?|5Pzu1%@n(4ULcvo;?gyLRrTdyrAK$|l( z9-!ccO4aH7S+y3WCV7TsxQQ-F*1+1xq)e01(i!GfqqbCfA2VQ#9LJ}6JF&wE#ktV3x2F`(YTViR5*S?9*&dqv z-O}IEku=scwK-_{4Z}Z;OmD0Hx@nT*16|C2vdG)F9fELRpSDE8980-%xL5oS!*wLw1?#4t$$*&VW&1P!g`h#Z0y8-OaIoLi@UuF z+fjz!8x-A^N`SjRXzL;*kNMRcK^mQH$WsQG80=oi8YdS1oFL zNev2eX*o_6y1#CDM$H{t*6@u!I*XF}ws^7u&}V%twrdHc`vdECdRRvd>+s6styb2t zz(1SetPAc>nu}R`$M)5IXKx#{H_cGVp^JJ+N__F=&IgLZwDLBZyl~~bW~i(-u)NV- z26eyKFenq)`&tJ1UZYe^KghTe^0?=a;Z!lY(^tPH&Aqi>dnfjCQ?2H8U`RuZc7pp~ z^+t7(eZCpaPZ*^Deb*YsLI!$*>SBsRJH*!f_19Y+%x$qXjW10*0$$V`Ol0>xb;q>3 z>}94LK5&!@?qhejLAH1AZ}TV)ZvUtG&-FtsfX0)tjK;+c9|B&~?XNSqU#NYrJz?Kx zRPvc46g>2RJ$O6d+@5%-Q|5!6CKz^&@AmQVV_{3SJVku;_1e4&+3GZZU@gMt&u zu;rR@Z4LHn|4MWy;Qa0=^!3b7g9(`WHA!13Saev)=3v}1Uqb&nT)oqdO;53p?d%#3 zQWuX>XZJoK0Ras|f04MESGJ!g4XyFp@`~&cmesE%GktX%rQ}V{bNfz^{p{@OWlWz@ zs(TNfi2zjY1T*JlO11$Q&l28mrZDb=1@+Bjy!AEoyk~rMhVdp_=B7obrM)Tv$#LY#f zyafHmbfe?)hh4{O27T*0$_!^*;yQ}tHjk)f?SB;X`%X>s3xMyo8BM8~S(|n=FcVC@ zb@dCEKkJ%YhxQ$4ud(>JFVHl*Z_&wq=$H_F#E{SYP*K{1GhnOwb;f;NS=9z>=Z1= zIEmPaNYRVRI#Bbq%TxI1PW9p-82YjD=p-mkD4BS8u<~d3j4hXe@$Ke@CE&c~@6tWc z+y(@p2cBI!18@Sl-1Iy7IBK9KH}E3rwEn}SvDjVWBY(WAjNLYI%hYPlrX%$^AXRUg z`bxp4|v6%eZMw~s=g0BsoM(t zy3HV22)@551Mdd?tA|hufdA34DoKHiXk8U>3~5*2JrRYSE-v4{nWzR;^p7L)A^U5Q zWE|{`ct52W@e}V%O+fu9wM9E%UMJZO&9X>His5AE*#0Jw5y{m9|`k?s| zzLx$?7)KanBw!8^&ohk0iKK^&mum>*tIW{Fg_I0toCBS9m5JQF7!44f>aN9X6)ra| z!@d-l_}B1r_`lFp!UjIK$e&2%J0*xnXkODI9cd>o*uj}9;w~H7guI|x+~toBRUNK5 zfMF^#c}VP#{4OdP_eCZwoJ|OmX0Prh21#Q4?+_EkwDH?1Kml?m6=Aa0c6>zsHlqw0 zbh^=(y9T4v|Ah>}-q3C?Sd4d7?~K1sn4y~PzY70DnLi#vz9Z-wh)-U3eTj%c>Z6itai|FQA*7 zmNWdZGgNmObR3jHVqC%ZKt3>|2@i5|n43v6S4Oe|$)3JzSSQI%V+J{!sJKm@)GJh7 z!{4-Ynul^fZIGs;uc!Ob?Z6M|^>lJJnBhf#6^mswFw%VLnPVBbV_56}Wg^CEOEAKq@JJrO)gErCpd45^Nw7uL%F&cU_w{8AoI*^+`W$i&{L{a5z!u$yNYk#2PhWBpxsY)9(Q98mOyP@ zYF$WlYuGP&PHd}lB^|Owh=Hokkp42>&$vnE8U~`|m}?srBy!@6mnXJfSys?Oc$ka(JEPrjcz?r-C7r zK;I=u!c63I`A@MZ+zNgZZhzTfK7ufAy&J!n7_;IapGAtCb3%Y2frnQ;`mSaL?*{pb z4$PBM6g&Vgh8lpK&a zg}>+*TRoFK=zllQUbdNjtf9$$i1uE)bA+n7pqWyw7r$3EfZkEwDrrz}NRvVa_s^dr z--DdBW~F=y>RvEez8I4-T8l2)LlWT;@32<8TL{RwRWV3`aszb>xXc%bSrxS@x9~_dj;^h_&z%(r$-#jPFWo+ z>S5;vo)(eWxzl%vo7oN{lrBV?CO#u@mgI;F2>*x?qQljnM0$~D_9x*DAuqn(7JD`& zutexBI53?ed@2x+P}%}XjAlPSSG-QWm9R!sp`uj#2t`W&>;-}|^0#sC1&MOIfQ$T} zvc~CN0*Ewvgwk}2t1LTswIZt74KEVPj8Rn(L60Fh3&nr0lgH)pLE3}>4DW~5bJ`>R zJf+7d#C!lDE8QQ}VQ}ylILr zBb0i-D5>`g*DM5e7vQP|RUJu{ar~;*`I&`0bCYFNK5tSZXh|JcUAJNyj+>-7Jwi#C zmWJwCbePEkDncgKJb+M3?-_tFd|ImB8Ic|xuJcBIT6jYji5@dK)?l<%+#MdTWbqZ# zPlTLy-spuIi?AACXh><2?k48I`n_5P&LL{QhKKiGNY#+>Cng8$-0?{xvc%rTwS=40 z*9IxE1Ab9=gJcV()MkWL~#krDPtA)G2G2j~3J`3z?56k5TSrjeeCvj2<8?qAbx42q!?l zYTN}aMaNXNf@P`Q%GLank>eFve93}H`7b_X(kuBp-s=%rg5A0l$*NJRSOe@7l3Z2i+DpKvm2NG^3v_Zw)6x{7 z^h-l_c&y}6J=W*7B+A@4$tX^ijQYOl+SZOLEP6(>AJAB_vEeniIorGL2~?g?SUV4% z5pv)13vqZZ&w2h zhP0X2VK2`Go6qAWjPo|R;6_i|X^5s)d?E66Ljqxbc|+|-!sD#@mJ7tb)qK-?5;P>y zI6(R}H{3Ly^kZDM;V312@}zx2tTPWd)DC;e=@hFDM>b6UP0z6sUj0y)|ct z{uB55IJM>{w_${$i0gLC?!v!Y39?9l!5ktbZHP8bl=R2lH~2^vqRzJRZ{I9LRww~d=jr!xqKO5>Ecc{-5cYH+pYhvDL!5RFo|a@-Z=T*2s< zLPl7xwg$k*npMq=QiajKu{2|@{!D%1s*BnqbsoX~n)TMVb0(XXM^S7Zy4M`WQL z`lBihRrbArYJFaG*G`Zi1=bM*DTqvO^Mro!4{vpY&zZiYoddIZqAZutRU;Hi(B}e+ ztK8i)2d>He*_n*EmlEIBj9L;I(NclV@sDd>hM6-R*BXSjotT@02(VGgt(S-0RvFc0 zz!7pS?c4DY$#+`q31cE&HSHqo_Mg*;Ck9W~H_sqg?CgxqR9lV@*Vd3N-Os6x6`MQl zsW~}6+XAWe$?#@6^-YAVaXD?Z-|hNaG~{%1V<`2LtrAfPvwDH4lyNXVe@g( z!z5ATWKm}L^}3D1M}C;vMZzo7^>y|_o4u?%_CE^Pu~W?|*R@_$ZO*>i^hCKeDY{{v zVo$iZcCTWNUyOC3V%_u(Yq!j1%WH4`k6O^-Y;q~jZS|}9lwIGn+|ZQxp}t&S6drH= zsN1&inWa*!l2Yvf$V5p7g%8)8<-@;>fhR%NHQY}EqYV=Ffw1|1IC)31aqoU89o zgbX>BZfb|vysw5ryv_44toh22ojS9 zgxu};u5TjJHokf{3VMX9cs@88{W6cT^#Den3fPRtCdIDl%fd|vy4rIN*X230XFTrY z#L}J)obAif-ecGvY*Iq}k2dO)a%vMj24qHO*#OEDS zmTi?(ANo&+6~P043Et%%-^3G`*S2k3AXpMp*Y%EnCTJNi{})>=!imuzp0qVt~2F$mDUOgiWp(0WJaJqgshPAs)i%l}8+ z*ixWR&fVH~TE|{%=~=5K$A~(|5K`tuymtBpqQUVNiA}$`s_eyk(3bW0JnvM`D|2ls4^+Xyecf$gJ{Ln=Zkk zs89^jA0xuoJrrc_H~YJ_+Lzc%I;gy*+6hc?oXj0dL?!_=W!U zU^!A$tA-vx2a2D1|3TfFNGAWt!_ z+q=R|cFVZ6s#Cgy4+Bk+n9hJI=%x@#BVYB?pb$ zIxm;$4SQ;^l|i~4yk*s`nn6?$*j{zHFcuo2^h}rydmwLK^a#e54NN$MemqB$A zXQNZc_|mE_roO)HSKEG0c_q2o5rM9LQg6-g2M1a;@mC=y&Hnx`!S{_m6A}qS!smmN z3NP>IYiF127>Lu`mnHUfuy1%ca4po}mA~*OskatL@Q-PJ_Ey>p zdh#YcgwOVB^n-!fLCU`n^Vz|4Bl0+VF(eWVV~1qlz`SP1telH;W5@bl#NK1O*q6|A z*a7`6peAWb{Wge2qEH}VF!63$Fv44O4?F{TK{PAt8X75t#IDEa1;>3uFb;wodp~+V zkH7IM&|S}}->}OUW0b* zP6nKJwWq%_X~4^xT%FrrB6$k(=}9LIfNymqRUL)4wzOw_hp%fKjCz4sSO0zf2AJF= z8fz<0$vfK7PMKA9*hHsnuG}pSqm}}R#4g%Q@Q=#3v`a8Zh6BA0u`^P^0HY4fJI#zi zUmBhP@D=S;A`PlC7L#(}YS9?-LL`i!Ab&>tSG=a2##~AFp|;_W5r=7D{PB5{z-N&1_dla@?AP6>~gMiEh7c{Wh=)PGOF z8SX7dFrOG-^eUVia|6E+@5NN&JP7lcyUK=$3z^T>$CBo;+{0IpuCT)Aejrg;_QSH+ zt<6ZZpQv6Ji76Jv@Eoxu;ZDqDoIvOW*p9CeXxHTtngnhun8b8`|6Djh#K(-xY)zTS z&6<3zH|mXg73V8vhVl#gEH+TlP`VG7B+pA#;nQT{VK?#brPXuZ;7&`|k5G*$1lKx4 z6N@}yPT|Z%y*9?7-eM*j{7YVAgLPqRZ{Z>|(y$`jQ8jLk77I`nj!+Fp;HK6~YB}OT za{;>$HNHWIT7mYg-CGilNw5U04Z&ucJeFU;UN(BoiN>tZ(MPFV*tCACGMVAdp;mu| zn>dYNa^ZE(HRzS3W87fG)R0Hq{m476JNPS6VZ*Y>rRE&g9Ee4h%!-1w)5oxY2nkHb zsz;?4ykb2-|4RJ9UX9g+M6vhbPP&G3w&C^+Q*?+aihd2dUpj{2jUPjO#Hb?pL#d1{ z#EAUwOh;05;!-A-ym+aZd6Rs7)=}14vS^rMeK3xv9-%!E@1-rF<0)pEh<+17qupaf z<-5=`80s}R`a#C6r4t!*n3HCK8J>*J5p5VQQ6>nSM4Koa{s*#<@|?c~oKDT=XXklQ zkMn*cm}qNx+k*>edLDV^Gg>+?XN1yUCbcWq3wy|Z3JFO_Hp_LOLW+Y-kQ+^5O9iX{ zrhJxc3OYs&kp#`;QI3g6W$7mn`9_)GCTU8IFL5V{rDp_#vv~Dt@_p64 zpkners+^foWVT{-=b(cVY&MD@f*4YNia43rSWB&bLUOds$i7Ar8$ZOiknL)~fq#)F z7=F$8M53tJBR$mq#Q*G`%im4VbjS%T!pBxabsjORNt2yJJXZfRemNpNw5T1r1N~AI1g|Y82+tv_GIk5)Xy;gskcrVOiWlC+YNlQjr($fo zU)y*U%F2WjFgZ+5K|8h$^-7R{yIDrzf53lFzsui4oE&qVKS2EMzm5NeB$?VMctU(L z(nIM6{!GeK`gh(@su?-NqtccDguGB%ce)*KDt%szFK+^!=O4%mrEi=H=3SrzMkvWT zZa!B*`^lZdZA1LQxyv<`Dma~7bQ*-i;HE}*b6DIUzgrwN*JWxp=NISc2qlJcCdf9> z!r9lQmk?vwbyDAwKiFlG{Pik!gxDPQlN~6Y=$FM_Ai6PiDw{5B8=*u4tPlEWR2u7y z&HEmk>4fmyD~z=fZf50ueUp0h%wrcug=U_AHLx$Q4|JM(|Jk4Tgo`Sk@OVnE+&DC2~Rv0JGL)ng4 zlN7GFg?zMphk}Jp@=`1P(SJL@6`#>>ho6gchq+004Rrw=DdkpGEO zCo1Gq@ome4WjzG3SGe4laK?cmYa#}WXoGQ5@rv{elPG^p?ybBd`$oyl5y@sy-mSSN z1yC8GZIaK_C-ZJglc}#9m=Znhzz9WuCSS)MkEY5B*@=}NQUn{Ay+xvC!`EbqPqR^> z;bL0_ka-PaCA-=|DLVGwsWjb3`dyre+9h=r{irCDED=evCyNt=UlVqVm_ppL_rfni z?|Eq9H(`u}P8iF-IUeiG*CnX79B0^PEZ%lxm}6P`o-I?YuF z8SjzMX5(;ejoK>KH>D%9Mf)30mpvB_)g8;^3p%W=tHb$!SQdte`F`d>Pabcx;e!K< zH&a+WLg6;oT`%1VdT0Goo=_BHsi@q(KEw1+^^e#|#=pQ5mfSb8AdRlUW)bAL&61i5 z`&WmYb=WVpPCz9HVo3pG3b&hDq4DeP8GT`2V+IY+5rQR~4QS*>*ASx{vc_(yc0YP> zL>6{~WdpjQ?9f0w{Rjmf*MO)K0;v( zre%Z&)fZ|^#QuT+!vqp2Ri)FAr$<}0CFJhKk2HrU6KCDg-XQnewJH&`#1RU8t)`Bl zt#&g^VcgHh>G({~wSQ_SGS@^;SD$A>7L(M=n9N!A8YgD7-C=nzYxGM&hZ;%*#Z^q* zRl%yfTiUh!A8TXOr}!;VLY0`Gzc@s>pC2)ctOD|(c7MzKx#kgBs6PD$g@5HI?G<@T zUalrd_Aq6e>VRxkl!KBiB`-R!Xpm;jA}Jn7$BnU<+~tnOIiM1B3d5wz0j-;UMc#BZ zO*bY5ro5yXh}^3nsxK|V${RJ~W_ieuE1Sp67GL20>yNT|58i0M);U#{YgpE=xw}-C zE#)aLN^jFZBwjvXys#)rcGT!U^B>tsb;uZhu|4<1NDo0(odqRZil?`GmCZ_j+v-~Z zir?5=Ql(niwi?21q{nN6Q}2Y@4lDgBtZ>0k^fEsrE1GY*ho_r5l=2;4SgU zMg}y0*~R+bumj#7>aW1%Q#LeyhyAqG`T2oAGa?HZ(E1ckDw@(fgcwTqXskiLi~Cw% zi=MZPQ#TDW+xySjLm1-}Ry`8qV9TszP+CW*>V}pHxQ&H~#`}1B8npf-AvTUwdx7w2 z*);1~qRLxqxkEfWMP2Jb%(GRtnaOxGN;x&5sc#GJHRR9=*Wam&q4lqNV2z`_4B2k( zq5FHMn}g`frl>7r=u2!-sTVl)BUF{PF_4>IfU9@qR<0*mzjD{EVwyj2XNTmOGC1G8 z7aChRU#A#N^SIM&S%KUCvqr1@8iFJT3OwrECA9SmEN{hMS1mH#5`&k1H-?IxydKw} z#EDb-YKWq#wpiA4|524!>dQ0%`Dbd^sq5BVvdmTOUwPg5Q+adgrkX@0*-K$yD>qM( z85R!Dx}y32ql|T@O=t5DST~s#tUGG9)dX6(&$!6Yw{)XnodM!CpfAzAnQ}n)Qrcw8 zCH$Y2UF18^UkuEcy*ay7l>qGbDC=EOztO#-(Lb=~L6z?;!^X-gsC`n;V0E!AJM{_9 zHfmg3E5vSrRI<`{_usFcli<4%4&1eZ(-RDi_K)t`2dS9#y!$)^Yk#rx0Mxd-mNb*M zj#7@B^iXK}y1vOU?dm^!Wbo}PBD*3H=ltC}`jEA*lR61V=D4V~4=AclhYTY4UD9S-yH)$8YlUFnL=iWx3z(mQNIv>%>+7W%0N_njEP2Y}#1$pAPeX_w=)T z(=xl3u-3(2X>VX5g=9h9K#f7I^oW?@j;;?C(p z*Z5s+B*F2p)h%NMNIzmzm>|W~vnfa*8NaG-KHoN8{Qmz@qOOaIHEAds_u^ zNLXmoFWGrNK;wMbsad}pr^&*{r&~7)Z0k7R^?y`VSD2oiHl>}e_lfUn*{#b93vTk& zw);UEmTIl8e)XSK568nTSV501ZkGEWwZpG6r)X>2uj&f`dwn=~X+;D_48>KaA@0DN zz{v%Rkegr&S1&vlh?XZj3hCaG&3E&*CvK3cR3+~fit`3JjX0HcrLb$+p zL#Lt^<;3z)V2`}3%R_YKK~=L96>xRM9_&$K&b zNBK1H{YoX@G5Z8?8gKJTGUybqaKU?^kT-3716jaD^=0Rf)!@1>`Oj4D@-xLV6$hz9 zfT?mg=v?_j*`ll`Raohy*!{qT5|0H{RU~o4cr^KvpmSqT)_Kd7+UI$Nra{@Cg}NF6 zrPO?6 z$1XCh2;1_tdP`|pqcw9}+4s7mF^eiVwc~w#%92d;#!sbC1n_P*=*^;3OEtWwbhG3< z5>ft`yb$eG^|l&>IR(14frBfDc18Q(HzSsKKgGX8o*9$II*g;fs01V{V;lM%niECDuEHjHmte=@(#8a{#}JOU->H(K)J6{Q2F6Jg z0seud5{96^ahEG2;cp0j=_ce55f;fq{UkBH8c@NcZ)0Y%gQ%dku<~mZso_dhCN)H0 z0@hHEpyaC`3hc?c+x>uL9#P2yZ`t}f9k7`lM61i6Vf zv0_N3$2_Z^C=~!6gW@IpbtfS!#BJegpoOBZp0B|hMSt1dVC%Ssrt+e1Mxr*fbg$tI zcXs(I-4BdIWu0~bU|RKkHD=um&@JVM6$ikN6!$zepydjs-5GW_SKioAxTmSo76>4% z|H3IMn_63fURQxMA1E!Z8ZuU#suoV%Yt@;ShXy7<)xZNT4ZZ5ykv*3D9xcYf< zN9PpwVSsOI7iw!cqbZ~0&&nh9g=;@m|4}<3>@cv^@@Veq>Zt~*-4OdEmsuB%-CFoX zc?EZ-w1Ne}zbn@w_Y&t+DU0us5<$R}Lu3eaM(BGA6kg#`MmdYH&D%Dwh^O@)N>Zg# zw4q;voSDn8t06E%8%_ypDSCvzfY_h>m^cBoJ2aB`2~**Ik$4hwYnURRu!!I-NN;%@ zG9GoDF%7j3lLr@J{IJIgUt2E53usbg#zy5e>r>{l0k|bUeu_Jp?;V zX43=+2qhU7jeJS@R!n~v@hbcDB*bF+xagsnFot#+e za#$HB8S)kWjT4oB3@POVCPtxNaTbNlMmKWixn-jrZ2vX2QML+ljOyo7y*LeYS2~H} z2kDgLgTKIX#LB#@@K>TUYdFXZQNU6u@`><(>oX)?cyNR=XsW98(?uX4SbLqE3=Y?T z!Sf-nRQL0y!t_d3!d3W3MgG!R2)Mk@wE^KPv+ZMtW$Eu!2G_}iTdQNNEhII_$xH&F z!H12}xn)q4VP(Qn*hSseU;{i@x6m~oK1(&a-_V0A(%U}>gR20o5aJKu;wB;R2Iy*i zea>|Vz1D5@A?OWDPOuNm-CQy25iCu8ZbX)vNqtf{!1JeNmrlg*q}?lDRP~E4teTNM zhOrEIIj({622vIf%RCAzak<0JgjEbv}!`WP|G}e^Q;Hm$JO5 z=ix6_-KH6lmzFfqQ_%4)Qicfid6=SmDLn~u;5*p+h!c@CEQ9y}?OE}Nv(1KanlgJL|hp4d#o{N$7IZCjVLJiAIo97gi@5^#ky( z@f3vsQX#Lf#0)vcD=up(e92=~Qq~^g4FTsw*z^6tIG+mwPsmrtC&G5f-C;Q#+e6svUW130`@)}%Q=PEy z3_yrt<|7t^&N9u&>jfUnTWD}fC@TnaeZ_v32{&W@W41eP(D5iojH?@YF1Cf%i93aN zqEE$t0h;Jg;`sbv`X1t!Cy@}hQw9`ipaQ3{?lyv>xW1`5v@`Om9b9MJc9jhr_is+b@!?@$X;*$q4@4 zsN;1DR@e!qu@3QgzU{Rf`9GLiVuW3t4p?l z@ZRjSW(6_T$O|PB=|M9{WZ^ReINev+g8a7f ztB{HY22BvQU~C70A}gkOY=&qCE_|eic&s1}a~IqqaKw2OedZs>N2aagD+#MshVijP z-#`>!O+4jZF9;`Y9SaalBA*?ha4cQ|o`DIPVLas(McN3SS_=e zw|9hs+1GFiCIS95tS$PIwMm~>dNeUhXDqK;LDMa)+_C_#Ypu?6-cX~g_P1G56Jh^; zmF&wES4r%ngulxgrb}m!%k1dG0H)vV?la#Oy_SWr z)@RsBAuPt~iIOIkD)f$I9Lwr+LwttS<~&(4owdW(w>ez9ksbnzrBb1yC|mMaNJ)oE zyoF2S$>LnWpP|K~Edsd@S5zpFIWHH@7o?2bKXR?UBjGBOil&O+C?2IR6?-Wb#K(yO zc)o#|c3PCK&xyMy+@Nz<_CP?; zqJ75lUul;(-Q)x0qjTB_Rr+`1yS%&E2@OSsP00uA9ZQm;zSaH!3^j@r`DaV z7;+HSud5tqvk==*siTxb{cV6}R$pCFd0uja^@VH&G9CL0$uzmpRJ1m6( z8j)2z#gvBUZO|LX5lRw63_FQ)BaZ2r#GOmV>Nb*sJZkiFN%JQx(=H+18NQzrJVF%_ zYIf32Y;Z8J=xf#t=xt}55n0-2^uvoS+Hv%|9yo0f{mg_I)mO%ak^4ukg>MZA-i!>B zeibifO`7&O&o}&)#>{=WI9T1mJ?G)A?%}?ekg1%@{rmq=QhlkkB4ep8S{jq^MDtj3 zBwVW&OTri3Qn4it?mJZkNy>x(d7JRY|DhsuYR#jJiCVm-E#a_cmik;cMx|BVTy$Sq zp!&o8o6=F)G665k6;1sAsm#va%G~tis%}gE#hCQ2sYNS;Uv@k!fq9ENZj^p;GIzHD z9PR9y9V?i_6mC=-8;*6?lq^X)(s>3D5q+RzUU_J+sO?b27w?ibR8@e}@s6!k6KuIN zNYJcdiZnX2ZCKfvQ~4$-sNGbpj$YmR0^}Y%t%VJydCzE>0Lgc{(0UIt$(GM@2ljfH zqQ{Q(Gvj#sWN24nTWb;QPE>pIAYx2VOH(S+&g)>~0i@39SaTe5ku6$@f(aPDpY?l` zTHiVugG&r(Mqo3dmNy>7O$!QWNW-;zDeF6MpPddi+`y^rR+#Ka|3@3?*P7wviZ!Pi zFOlCyK5SS-$qRf@*GSQM71my()H_|T>!Fm{;S7J#b4R3%T+<0nGgx_RI1K_;Yvjhd z^Qp+{!)!Rhd%q^K3`DKZ?4LX+&e`G4$N3(DFT)yBUL3WjJlr zBcMBB*8G900iuU}KkXE`s_ol`9Q^GXdX6{YFJWwc3-L0+tLPo+M8!S8O!A&|Yx$p) z?x-=9Oe%H$q>6FWZ}vvod-|u=(6o4#r~dEEwX8(`{+xeUP~7GM6iZxwq<9;vH?0vM zWnGVqE}zdH<4p&gWP954=rh@Oo42I4ONw-g^xNVsJYCi<(Qj-+UWO>6>~LX^uxI`C z5}9ybgat5Fu+4i+$wxu6J(eEA4`?E%;B;~=b^RsnRBq#j^J)p^NDfCe3GgNVouYTW zf6;Y0IbwE6yiDWOUQCji?5pT&`KgV5JSP zD}LRIf;jWq@c2TQ$$Q?f!bJUV`*b>jAKM_Q^2_0>+kv2hLiT5{vBUu#4c%ELEv6GSKwizchT_Bi-S?v2)*UHx0T5MX6$2GO)?d~B)hkd5AU5c6i4d|A zN?EIi4Z1glaHwu5Lc5-nsq)5>SSOWtyH+0E}{4YB8L#4>k|5iaDupZ25C$ev-Os z5wm?+fAt*ZBabcBiOhdz56*NWFpDqTA&Jc!BLq?d3YQ7KLuZvN6z~hC03HaYCtj&o zz~8lOcEv|N&tq+62Y)p4t|n^(R80`a`3a=~vdf79WoYT+ zke>3H(rovCD$=E+tDb?EK4h6KVq~FAiDYT+(VA4SuHdFVJFlU5r`E7WS^8ZQ6cPxa zs+sP)%A@pElW|1piyVivy2jA$B1`jTU)PXtfM-)ObwEi96nQ`ZER95bqxo-CbF z^TWNU46G%OP`W}`Urv>v5s_A~l=uPpw)hNa4q96lk?Vr>t)#B*!&!jbAQHX=R`6Fhw@1*HkI1AFoP*xG}Pp{0V_F8m5QA=?u&; zWvl8ot*Dvr$h=qz=TAVlmc8fQEeBO5^O`sKS3`Kfl?|ZDyqG2Fpas0ZX^){_Y$fIX zrDPrjR>cO!lVX;<5p@*cF56pXD(6T~WB@BSNnXW%uU;=nSp2NoU;KO8Hpmw7nh{D` znrEv9##mPP%HWThR$|nt0WZtOYM!Sbs0ddxVoNH|sc?%im1tG!G(NajJ{mnMxskiO z!AkEf*jLwt&r-ADxO4{DNfaU7FsFaK-VVkrXK;19qu9N55aNN#Zo6q z1SX{19>&88E6){o;LZXG>-Yo*@Ti_3F@EJtl2VqsG- z5fD50AhxB@h>XX*U%L;riQp5#LZ2Z9EO0~LBc7g|K`0|`8_^+WHxN(e64rtVlxol| z=qjpDK?UqK^-#)rL@_Nlyd6oUUGhaCqv*3HPr}vFNB3;(<|P63^PX_BzK zP2j0waq>!tMf73?7HSajecB=IBKf4#=<~wSQD#jl(`g@Jw^dx$e69vo{Xe$e0xHVq zd;Awf3@j8y#TG=mK}x#28x~k%=~%YAyLVZ-yF*DqDH~hD?(V|=E#l{#_xJq&49q#~ zxvz7dXJ&Td-kG_VI71<8fg*OgOGWZ-mcQw>lr_w{{rX9>%;#%*GG9>t%~DXzn6M&2 zj&WS0bZ^4V_&SMaFewozig(chJ{H_Cg(Zy$)b^bLCh!B+q-O!C|7O~-U+_19zcUpO z387rTI^?p5F99>i&KNC6H&iNMkKrG*Lqhw`2bc`t0RI4Lxzb>&btrM)8 zbS>s*!RnNcem@IdfL_`UKt|L14JM)JOtYOEq31GhD3l^6vj!IH3;VXfIcG)sT~L0m zRVF%mFwZaRx8KHmmmGlI1F(K>vB5s@-8_j}J$NwhfkFkm3oM(7;baNCIWr7wk?NJz zSzr}aoqY$g%XdZ2Q^*}Vo7@{vmVQwl61GV#JogQ3zd{>qJN#eU$X}nn4OO0!oxw$K zj-1cDh>r9z&U%2x97)TbMwjX@%Lzmu-*G>ih8|Op!fY^vC0%(|AQRHJfXw@OCQ{!lm3N%UZDtPMcnz1+LD|rW+#zScZfLQUqLy-{a#6F zM1Hx=@$_&WPM48R=EZHll-A3kDP+K?gnvH7j7h>=ATpjnxDfI*#GUXdLf7pa(K6ch z@C{;BoSfzy2^O!r=@VrrUMyQlStYeE>Ow6~mtaYez5pKfWX!(cWZcTQOKux+Aqhc; zop1v{*<~{L^knBvGlU<>akACu>a@ST4c0u3gbGSH5SNWQ2aFAZppBDJuKH+RYL*2P zy^yAP@CAmEp0Y`VrKj61YRN_{y%#7Go(x)u^+hP8R|N(j$eDUBUWl(*Qu9${YWDO& zZ{*FKs!f(CZjSq+Joxt#m6`~-o_jSW7|P5091sK3$p7pd0ILRHHa`GYDqw4zh65nW zl|RE53jW$Q;SZLmlzea{d@?$x0ES5Lr$Kfjt~(V&%8(jnSjZnFPy-F!gA^#wKnIY3 zMS0A>`FV1C-f4VK)VciQ_?v!T!0Gr&CnfMhJj?WXfh9gs{cC{%9;n<@FpsA%X<5pX zH|2WJi=#~QeCQ#53Hfrg zcrSck=HBDpbljZh#py9s%ENHv)eZ9Yamwqt6UuA~ z`(<0o6h}TX*uk8MN!q=Ntrh!yHI9=Or?n^#0xjm4oH#B$F#LCj4qZ8F$fKEdCpN_9 zGi?GetY1ZYlQ6aG4E3{dYjq7ZAob;UjLa{J-M@s&$Eq`h+$_DigW9vVI_QMEViqP4ZUQ0JL5YwkqBKu@e?J`;&GqIYDzJfLRFBRE} zgp<$tgOT6J(=M(kH8RXn5&53Kw z=MaBbnTHz?7nlMaO+*3HT`d7V$24B;jnt-i{Fe%@lic#R^<;_P2Zh+t#eSili&2)s zzpGyqL!-8CdMgc#YLp|0>SEZ7ROUZRMNql$SD2m0u+TZ;-jPW`Lv)NWP2e0`qMjn4 z0sJ=2ikZg^x&PM7f#>*ZW9{5;@!)`zBWt*?5;Bcuxm2L#fk)hqB`x6O2r!)u%3hN9|&g4rCr+O$q}ZN z$?svj%cxzHNB$eX0xX%QGeX>)nV?LGO*-Ri)=@(+gPUEx{~?2sW2^j|Ig(PMf4PKwDlF*uYz?3FKDa+?tTz0wO~g10ew#aP2PnP0`pvw zU&{R+P#++gT-Q?jk$TqQ)L>+R0giGVS-4+;B1G0JH&PxWnesU#0y^yfPyk9Ie!_)D z3BZ!;Cu0}{!6)sSLgc19l2X&6X0kHs!R#`awwwmP^je(311ZVXpzeK*NeKjRXhp)ZM#tZDhz4@E8h_@smU$th#KDRQeG83 zyQ-lgKPEzUGUXN!vq%-lc8uU;$+e_xT~wK^X{laJtMOP#w(P=*o5kPa)whk89FKpo zs<31_!C!Xn!wK|P9^UkiT3Kw9U}Zxp%1*px^hG+H6nfBJw%?4o%}x3`MR65Q>YXAR z7wC7gP>b?t+!EyiEbLExW_`PGZJL%*tRy_$UZYWbC1Z{14~ch%*Qx^XhKvr`K9(u4 z!IJz^?mQ_L$}aEp6tBr?FnlD6%8k?T7LMdzRqYoo%e}NpAkfMK%51h9;eXqqOx54_ za<&$ZK}xMI3L~MGh6F)7RHD8hkb@4ZrV4gLi&hc1S75RgD5)EL_P@HE&IlaPJgZFp z7xZgGW4<4HL4BM@L(i)^^77HAR|T@)VPLXR(&c|zmRKhoUx74brNNsfeKL5@GbZ(@ z7jYvYj>)g{}s{(i76e9PB`e#SS=td^S)X5ym z0iL4?M-!CIbAgFT_}y`dl9W$!NHi{ed+B9ga`-BaN}zsJHii_^9Xpb>DndE_Lv(hu zQ<9=bOWewoFJ?G^G_7J+8^AaNBF97nvo99Ud$cATVOjf}01lwu29&1k&tQZ&fRIsp z!yD5_+}Tl#%x9*>(I2ztcdd`{%vF~wMtgxDNT0dbWB?iI9zB`gkj6gOvT5mi0tRx| zMa+kU=8@en;lBCrOio9T3ru%nBj+KCa`k9)IJ0n#gE<67JLkFvdKf<935R|L9rxwJ zn!{%Uqv5fxr6H+^jV7w0$%vkvYr?N0wdK0d>S&rc#kL8Dp{6@t!)3rq+?crN)N!v{ zIIpm`ep9$3E?)!h;GB&4L6x|0wU#g~T%+6|S_Su5w8l!p$fbDNZ=lnlT`r2WhEzEZ zI`vwZwU3ax#Ud?={vjqjA4&EHyQw;11?Fxzng$cvFFi`G&Gy(unnkl9iBL2S+I- zMZtBo#49wz(@vD=T;~%ZcxXiTi52+o_!@MP`%dlx>NEbZkQh1Q^M>pc6Cb!25*7C= zRJMIMfe~pCf(L$!`Qc=nq8iUN@JqD^c5Q726(s+)>}6iz!$XR~iip?4ha!I$=tUii z^-RLXvg0AaHStD3m?JJREyYuR02l-6+v=MPNSj}5hk2dcV*f|+9t8Q|%*2`eqOgHv zf1rHi#ng_V(3rZk4-ScO?=v^*T?Uk8&2Q;SytlNHf+n!fdJEHD;@tf6Ga7O~2g_xJ zCiI2{W`79Oij2sG*{4N!ike!Z%2VW!I)hP+~CQYb>#8i{MFHy8AyLY@l$ZqGe+_uaI9)=vxI60q# ztdD-n{l&~nw&wzGF}~^O1-db&RJ3DW(q+BDqAu#6Bg)0QkvpB%O1EZ)x-rC`V<}z& z;RoMJzcm6ETS9;ff0y=s|2F>S&39u#Z2Nz@cE!K)U5gk8t_<9j(Up8KR5hj~)iUCh zPiUHc4BxsoeID>u^FxL-v1z?9=Or+8k;?6*XhwevQ-iz5F(aYriSfT2tZ&754M5KO{QuptNpQ4NEqJEn>w!I?^tDlvvxgPwMA zl{uc4D)Aw?y&B&Dr}K`l>j%M>EM}Mkcw_(cJQ}z;=u>`kibI$dcp&^*~2^n>Zm(1;tT4V zMRybr?WF!C@;sWpE)azJ-y9or-_wWGo5%6BA;FTO0?v}Qhiwa9Aa=T)2&*GTSZt3t zNYqlV59bq|*V%xoh?|#mVK%#cX4mCj@m|9k2R8d1V|Ir^0~Z*jZki#@^nP&=rr}89~Mn;-787Sb@vPseE{z9RTi3ud<=-N0)S=#GriN%{hwc>jHiz3}*C&TnUcP3XdQ;=mBI;8yfD6!nUs{e$2rq4 zWzEVx;1?;Qixj*Kz6; ztN0Wg0wpb#2KZx05PToVF&qRx72O%7gYflJj*%dS?Std!$o+;Z0pUpEEZIaTFpN~T zq${t-H%CyMq8IR*Ume*Q{F&$FV-QB*JUAkah-Y8XXGDnD@SXNiW#FqM_!5p=lLoSAZ=sAO}B(QSOb;Zksu8iAaw5h# zYKX!OfX6CPRGlm19+K(i^!Qa|t%JRQc=ACN#iU@evz#INH|}5SO(_r4<4i?QMlR!+ z_nUV-@(|30@Nniv+7_a;u&>kW8Q zJkQRcU_;`J;rD_kNtgEGA+VI)Yf*5AlpTvycIeU*0y;CV1C2w~vICM$Jv(y>Q+FM) z$ZZAPHQb$tNsribJ+CLjZml6CJY(;oJoumIH;$ARo&GFjd4@t}oJVaYC~MeOH;bA5 z(Eyh1m-B6pSN5)4n$qw5OSxH#6#TDsPPU?lq`2mH2P=ZMgKOPK(n<@!HfianAp7+% zW`H4`dzPo)g#1$alv@Pdvq+(@Enor*RD!+<=oyN+ z8v)vdYE_!a9z*{vjsMMlqn{>t5{(0Eftf@HH+1qo;!10$6mLS0UQ0?3fwAjV@(V(r z5+f^`fLzkDlt(83G8wM|4<RkT-v*@3#&L@fKdU{?-v$XTO8j}vvwUr_=k5HQb zONDb)eddOcg1>aeNQxrb5dZFRssH&VdncKjiK(y$jBb= zVAP%%dj}-yOkA_s5p-PqjAjrC`RYqlhz_(m{{eX`{$VgY2JkdMBV_SfS8`&-wzWx+eZmLNAzt%AU`X}b`J zL58_W0_0L=hQPgvwSh|X1 zjup03AsSYV)BLZl0F5*|$_KaX3^~ekn_HO<6qu1^<}~@Ex^gCvd{bp(RxP<-Apmxi z@Yk~RKV9B$*_EC%P5^eAuleEYY+?XN_c;4cu=DN^Zd_>CS{(m&=oi^)lo4&Zs4Jsw zNu!sLel9@dc$oeqm}zm8p&I6|Gs(b4jO;36N~1E?TC%r9-H@#-eF1+vcV_>gQ%eH@qk&r*uWFoRre@h$t zw-^0ieYso(-y{s%P$ABQd&b$&y@U!)E2t&GYlk`{j}WzHT2{aREIj|!WnWa};@)D> zEp799tXCry`Ci-CToe#sy$N1w92m4buxKP0u(+a+V=PiR>y}tM3-Pd@f_bo*6ELT% zD=rF7*y|;+4^39il2(NoEMG0&5@9E^V7CM97v&4&mZ&2Rf~_GiGb_RNu=_f@1xF&~ z_gD#LqAE95ipHb&$Y0@IjMZP11RYq+;ov38Hk*Tv`eo|RsfhWay^V7{u4#7)7YKN^ z;SX;nUR%DIt(a)FD4F%Q2SEQ@-PjZ}RT7F#z)TSEpq4YUEeX7hdhZMM=2z z|Dm>!wQ{tKK9PUtywzNwKy%?c*_6@T-t|u@?YR_r3271>v?#wAQywQvOSu(_Fa%v= z6hpiSMQK7vNznG4`lLc=bhWYdVrIs!GBm4A!f0@Z(W7$J2Q&_%w*)mz({GXrypZ{ea$;*D# z6?UzdUkO?@x5iwFy!O<(KO2lT#ws&6^=*EzMQN+CYS=dTcIl3zYFBo?-L0`FVlQ=H z_x||4spbk+9qf;h!Bq2`mlj4n7ldH}p&Rx`+dj zR#5@b$uY24MqCM?Eq)~7V&XmE`(*i)O{oV!CTWi8ff=!xDOtJMh#c8zo;*>0DY&Mf z8PWyqgB^#DBTgePpr+9mFz2!7aA)wR2$RGy(kOX|(nsx~wbPpzwajvsl+EW*xJX_O zA1DYHdW-DECX$1N+ofxze~La7KPtIVdb(_=yse_5l2e7MPOAy4b*VF{-`lXh;b-HE zrkUo+mY&wCHf}q*BfT@C%dOkIN4o6=k8+X;8v?&{v#c<hhdnWQy8h|2XD6Q*zsP;*{%Y5& zcdy6aNZvx;g}gWUu;IgtkK><8Kj(k(_`3V+$AzhH;_vZ4w0^w&+3+j+x6<$4KaqbZ|=FV_wv4T`zH>JsP}919BkKYK2)z&tzD^8u3M&8tS>g;8?t4sfnq{5 z#hYQxu@>0FI7@;R)*54jvPB(1*`e*R4g^QC6Wy8ZB5;+u6}y*slzCQo)p*zYH2Ze= z_4*G6oCrJ>G#z|BWF~Yj>}L3#hb0ADB=hYUbFV0G{k1P@6@A!ObPnY{$(gLfd9 z5w%D=$s5VPDX*zFX=myEjC!V+MP%o409;?5EniolCR`)@E_yDWlbk6Wlr|Izi*Y5H zrIBUs<>nO{l`2&VRiCS$)XdhNtvg=d)llD9(!_73wqRSKZQ1Q99e~cruHbIJ9`9a{ zqprtX`keb+23!Z-hdhUUj|Yx~jmDfvJef9@H;$YjO){o9^r`D0fBoO7B-cXnffAsQdA;CxcIq zKRfY!{Kd7Gk6(R$qwsdqyS?vqKOFw(^vU;g#FxacX$x83vcG5i0R9a9<^0>=kLqIm z%dc2IyCPL_YL((@<28XwNo(`g!PX--AT}bD;hPFJ=cuG^iQO8g>axvZyVed9pup=n3_y1}MWEBcO4FiI1s+ znYp=+#lFK^EtRa6S$(m7ZS&Cf=8>y*=jmAV&th&*{-9B-x% z!eJD(e@@;wus=6RUtVNwvxKDGjNO zpr*9u^yZ9~%$BT{?3SFS+{V1R{2Fj&K^de7Du(glECdxvK%vkOOg0vTi^oS20*LOU zBV;p*4s{<*mA;lC&-luG&3eF|Nwwdx9eli>fXIa4<8HY%jl;L z)DDgf-9IijvTxMsMAAv>So8Sw#QUi&rz}s$o*|sAKX>;0>*+xueQ)&0j0)T{F}5C?&pzK1NeG_*Hr|I&G(JEJ$D-(^s4$TET(rFkz`re^Do`wI2Z)f&(73RGaHj~f$b(VaqE|n^a#xc69RC>Anm7R|jqmJ{)>IyfC6Ls(fPaNy9Pw@xY1XNyHTQ zRQ>6ZGdIq@Kfm^Z_O#o@#7nr#6<0>D-n+j1hUSd-Z2nxy{KU<-w|Crey_i)%t z@{fET7d?6U%=|g!#q2Ac*SI$`?~LB_KD_VcHexRH&SQ%(@KP0Q4JL``jh(`f)5zH0m=gh`xJ0b2$GRCWd;xrx=q(NiLqnNXbrwZJ0TdA1D%QKuqy($952^4y)iG_$S|o(dLre z2S3n;Lc!T{=p(#~-BxH2C$>n4Ze{7>-eOiV_NV${3TT`CPGZ+mmRUGppAlOZ9-{QC z`tHl3#FvepF+uqhz3Uu9ktJbL7t~$h4(vI!ANONQDEcz%sP9`0itb}h#io#>zgi#@ z+m_$0Lnb%8Jk^Y()~x9OAkUP8Bnv2=q6JJhszcnDoP~DcJNW9Of3c8ek1?MpZ$A$s z&i04gF+iN^R+(}_JZe>G-;UI+j~CxU7FEupYfp?u0nO*)&7v@TTHGfn zB5=I<)+>bTu<_(-M9k4Ot=|w0?bf2_$UO}us5#`Z%4fh?6rkv~_iMD7z~A&4#+Kgj zF%katqTJ0{_`@mt@$c{t#~-zPMOYnuC6Y(b zjL8oL@ZR~z`Iqp*OUC0H;1yG%mTmBd$J2z?2w-m-5{g{cVxKsMJYOa8UXAJ~G%>YB zk1&tD38LM^n9oqC6X>xM6_jywP7{m#1RWv>Bx|D!kSj?P^i<*m@jb@On@faYo|_34 zBlNzqrJ`x)Zm7tXK=z~QWFq-tVny9jJA0b!i5)pVA? z!=HSCptuQ>u7Sxuy!qolN!gtI1|{MIE1dh9Xvu)W&Jxbjj>jJ(fT+toFAxrry-iIB zD~Z|9Zj$}VeqXsqiYo>WBZ!7up4;$+v{aL7 zJd>>OG@8V0th}5-EUT5p10-Cl7_5yVSd@s^IQ&?l7&3^535Niy@Rr)==! z58zHxFFv|X%s;*`{hdJV3++FOAMSimZGb=6+{moJP1G_9lySuJkk|%XaN%Wl5H5%} zXAHpQ(SAI+Uk$!sfxmG!;@Efm%9F*FG+freX2t=Wa%Xk^JoZ7OVoV=)s{EH* z3-*#Y)aVxW2V>^WCjJ9d=jk`x_o&34F3vWTRk<}Q73D@_Gwz_cd6RSydQFTn^(VT~ z4M{1&2#o{Cme}{Vw0XUh!c!v7NpfJffc1%FQ)bQ#BSuj<^hbnaxmRd3f^D=8)dWB5 zIzWDouQobIs>YYi*Kj#J@suUIkrURH$GputQo4l^#>k<-XbZHtoJ-WB)YK?<3Y7fQ zbp_d#RACfM+Dimpk%ZM`M6U`_}H;%Hs+Dpfxnb4-Pwj%o3+6gIv7FelD^ro&V!DQ_q->9= zb~)!0FR}g@x)A26&et1Qm-=f?Ml=3&ceYm116t<`OQ^RSv*Swr|2uQ*0ZeG9Hq(Dhc7{ zq^(81PA7?){8EF<1Yf$tH3in@^G&1I89o!0EqiD+!*vo(s$TC1-h%wT^-e|#>0a&i zuxrG*(o829;ggVT&_GaQteEyIQA6SetECahfcm%MR-~(JeyWMg$2{XrA?MQ?IRz+a zD2=rrZRseXk74``9I1}j2j|O*H;}Fj;0tYtBlU`+Lc(={sbH@x^5ACfReT$0o-M^E zhDB@Ku{kN2QJmQ>Vkuqmrzq7=JrC<9^SjLFt>+>yW*U@`9->GPjJnJ&W zGh{uZj=aYKOv@py(XXSZ5x$?@C*57h>Dwkw6b;mz6Pyvq@zi)>To!5$)@D0Q?Dem%>Y>(;r(?pRqcF1RSrjqYguN5@Mfs1T5I(SbO5t+W>CKW@)S)ST$@1gddy<5{$0jQG@lJH2SON~H*%|hmV(3O%pXmM zlJ?SnS9J&Op#Ca)WtT%y=8x%Bkap1RNB7oJ5pAt!D!(Jx#n>_wLPAq1-h-Ge$dlYb z%6!hkI@DT!ZC(`Wx$S+{8_ZtaG`cnR>hbEDQesZagNhFXRMB*4KK>{*t7s2i13W2y zi@TDrMsNz(?nmcxarL%YEHm6KT@P9zK4O?y-NZ6)zEz>f$dbM)A=55WSW*XSLHcV7QW7U);F6^j9@VqF%%*2N3T{Jc_Zo zG@*{9BhH7nUoh_jVQ=9)wAoMp&FIs4LFuEo^uMm$+Y#9SC}lK(&(AkPf$o6Y9j6 z`qG?yL3-t**i3Ft$y)CjR)pxI^(nd+hoy6oa)|n}ud)JuqPPA?$=tvlQK&SnXFWkn zY}o3Vvr4eHjvl*%t6g5}UBuceyDzqazLTe-GfDnLqj#3IA3_|gU^HDotmbg)FbFj? zuF4&emf=~p53vxnLi!GQ!gIB#64hmy##2WB(YnOkiJj<(ZVMzNmtSj~!xK1)wItjn z)bC1vTx9xasWEPUBwD%wx66YqJc~28+{g983AOGq((#t<(AIQDMY(*#SK3#0U`?4U zGNoHZKE*A4b4d)@J>q$x3n|}Yhfs;we)timgt$@b4gDg4-`3kwBDh$l-(bp1W93&r z#J{OuElBHlC^sQ5P4MclHp`5yp}n0tOAT%&HhB*Li^FQq z^gg5aRu;6AVb@E28XZzsN?og0hrvW%B}d(|cm`sV!>X+19D??4Y7O;a_5Hq|@KZv$ z?(^^(qD@;fyg$F7QG!rTEUG~xT7yQ*-H}9RSLt08#mrN<6a7>Z%bLR;t9sO@hi40* zUC(fxgd45HxV8Dp4K>)tM4f6DmKqdZ7K1H!dSCb*d&5ju;EW5=9A<6D@2s*r=1O6 z@SQu4aO&_)Et^>4+*frU7!wJ5E1%N!0`VnN)GjB0B$r}kwt_cDnm@FUafe8&)a^Yb zrSkeZ1Y!|>doxHdmm5{*&GU<|uQXvl4?Iw^lUd@VDW0a=n=bI2D31=gGvZ0?ijrQ( zN)4Vxhe9bFSJQN(NIvIH?Vxx#-npVyKo7WHjNyhjeiI*NshGazj?zvZ0@1&cvnup^ z1{*rLH`)<3_Bf9w?~3I)rL|_o9{|59G$d~Vs*2?WHyv+?S~$(7kGZalk; zuP3@=JD1(Qw&?5_$yX zV!2Pxnqw(ko3{2&Q|!6M{AR)0#WC|qayf^&?$=};Se zFEv2?>f|ptmTElo0qzguA6O}Sg zuY=~SCxpWy{-{4!pQ17l%w7d0^!PEir@n5pp=U;>HfYe~y)`NyQ;r_N6vvUlM*Tt( zDd-@H1Oa=q!zUzIKoJc`fyf~*FvQxBlt*qQ7g^H=$zmI z#pK{A<`~IQ_~`hrvKTV5?_JR?$mg#6l9JTqmMcO*#Fx6GygIKNvP0J=kHi(NV_Y@5 zEeN4LIry6yNZul(A1BuOlCAnuD>pzUx&lf!rrNbQNWCIv>kLHIUK8a%dGC*eONnef zqgVV}^dwDP#%J=B(C4^Z%VzTWV~^|CKpHwPRJo?=HjkE$Mx3ebkp_AVmh*+zkAz6K zbDfR8^K%&;ngNU^N~CbvaCO&F(uZT&Px4xqE8v4wETJC*pDc$3oHZBP{I+1{dWEmXiV6FptkCkF*K~Wt!7^bU7!xzkD z%el&3SQ~w(!c*8!>L#3@xaV|}vpcB!k%i-FI-(;vj>TTFz5f z|5_iGhm}ET2ff8$k7$6pSK}m$Oq!&zY+_l&fjwU%H)n+Q3U>NrP+rVeKcU(Jo8{H~1rSN1@e_tabf_vIRhM7p~}RPZtt~CG)FVm|I8Z)nuKhnA;FSQvXb4T7cP_s1pzE|4xK`IF3#&AOo?a5wGlmpXDt_z)&ygb zDe@A&8AU@u@Q%RyXnov6pAgJOT!pzD)(V&U<~%HdCqJ78d(56cSpkPJuQbOZwlg*g zULelU&`2PXNnMfn2o*=Bc%#u##NTG#m=c2BYelF@iP4RA=;^|@qYJQ9(QM;A_;&sW zz7&3+8;o#346()&J|G(yJG}Z)15`KDHq2V$ua_W*TYboNHHa*_!$=;KTz01c3UerW z#q)u0mF$8aLMRII64oIVIT9~9ffGNDj(ksxllh-Zvo|2 zUE~_TvPz%8_P{}fzv3+sD8Wn5IAk|F)1(6}yEXsWe8Gxgzr%%D9r*bcM7HE#X8QNGd){m^U zfDpQL*Z^p5(*mRfMyzg+`v7k){^=2m_#zlG{)qz9Sx+^=Ay*tOeF4v%DH-%GNIUVT zCJUn8k7HFsH+OC+_yp5wpv9%Zqslq%-w<7*@5aL@a|ZrF5lI=Qc7aVK!(jb8iR)qa zs}B=e;6T=DLNsD+!7yPDVlEDce}^3R2*E!(O1hSDYodl#qK%fn6=_dQI?oczPZaM%oxksNDUi6;mke5$oCmfW}s`kFrCrp z*p^epkI+u_`^d5A$Vz_p6SScCLu5JnzF5@-kBR17HoS!SMjfA9hb}xRosgid1|M`_ zQLlTq7AvAIwE2+usHgQM*_LR%ikZkAXsUGFWgli0U&m00>7Y%|CZj#iy%~RldNZNf zK8cbJn-w{uvU{zG_fWLf@T_{&^;$qgE84nL#rZ1ww9wWt5d)^TU3trT37H!eFj3I3 zmg5XP=xq~o`*hapCbah6O-lM7vBS||j>6Z(bBHYOlAj1r2 z(_BOQguN_@q+Z8T@Ew#Z*rbeSYk*zm=+awvuvcctc+) znQ=_6b{Q$LLw1u9F|DZz`IFFG15H&TsFWE8gYZ;Ior4m7H@87Q6gN%Vadr)*VDduW zaV}Qd(;Re-ar1sWex@%#o865y4#d6KMutcEUv<_R*;=eRFV+ScAu z$ALAiE6Qgq3~O<_vG=-W~ng zoX4ITph)$ZC%SKvCG^Lws`Nj06^7W{Cw}DK)ioxFX_`mZSJ)O9 z*Cv)8Dwq}~6j?wJ`1g_|s7sEnum)xo>&m+ikM%C&C?guJ&6&C=XYDrHZcK3R%5p9A z&zhPN6?C7VU8;%Z;GM-$Xht?hP>b%4so{RW$a$|}8)MR}t}y^uCG9e56825ct}<21 zaCKYpI&vlda^YT5B@QS`AYRVu<_{3HqCaxC6DqtgED&Lj6`J0JAJXPhPU6$MwMw

    u~a{qX$F`z8IWheD5?Spm>*^|7jW2jia z30@IUXjW&%0f|B?KcY4GWu@Ke)7;h4f=D5YE;{HjN&m%dwIonUjBnZ?(g(__j(tVb zUBAnZORlxPWM310Xc$2W`R3KMbazf|S#ab5mcDeQ2ZCNFpjt*zbyySHiKJ%AmGjY}JY*YI#cM1LDIYi<+Oeb@Dy%`S zaS(j65L0`rAd1>wwHvY)T2fX3d5~gRG!1)iP@`gm-m-8A=R{vmc8LQp>t z$h8V}%HlLpeU+VHf%K+~%&P-GEqchw1)i38vOI$a1g9C+F1xrIv~A{xm_YK2Lu_ge zv85)lHnEH=T3IPA29W$p?-V+LeT!^FmOvYEJ>MeOQ?Q+L*!dcV$ka4n#h9ahK2%DT zBehmvta)4~5i%-Ot7?hYN<+(f^Djy}il!5%#QVh`f?o401go73Ir6fLb>1>k=ob$) zQR>O`YTp`K8$zgD(bsr_7*q10=5v0G)U`q zONpX5Rz0h+JL)cIuSo0YCwwWXYWkD6Q23$tNaAWyQaLp!jQ2)5>TJS}6U>-R&@Zyi z$|5T4q0AMvc4vS=Oq;e-;P0riCaZ$B%#qq22rIU;atE}`uet;Pdtm=t0zvp0-{L!? zR5ZRY`!F^|JGz)C8^+_-TPQh{XQLzXQzodU3$-mav_ch?=NC|%i27kaD#oE}j4$(| zFsy?%jEC4`(yC56@dYEM?q591=1?AoV}ay8cET3 zlyil+=U@k&MsO>XwoeO}(`}j!c!P-2xP-)+OyYlvrj}geI{LgTG-L7YUI@_i zS>qmdId!9^JdHu}lC-qdRlTD{HYt|7z{hISiW}3uRm>LdiauQ8DJ=5om$dVg>@M)1 zu+oe>**j^2n)_(lWL1epo$c7Ivf4r{R< zE|wy+4GcvGQ1a?>Y!8efYjg;PBw*3UgB?YoKMzI%IZeW?csIuKQF)8H#M( zQp7}i8w3guV8Yb1SU_w!i!_);=*H^z{=uKhZR#N5*Csx1md6!`9;{8o#kh;gOL0Ed zQmGghY>*?c!?DyaFw1bySq=jk^bySUUOAdsu3bBWGM~t4k|z&_%+zF&F1g2*b(7?* z$%TDHI|By4fKaNjhw+}E%zWD~=8a$oJ=z>;&eOJTR%N1bqYmRrh^U%JKjdyy`j9HM zhDbh96b-6*QzW7WkP%1R!Yu52U)YZ^?Z%4URXPZySjGQU(GRU!B@>Ekn3)ieh-naw39ly z%@tP(dDdy7tDHLq?>H|R1_xDWFUgnj>!yJDU%-qLreI*o@4*!XN>QMm)`E||x7t=h zcQ_L2m0(!&@$z28dY!Y9$H>q7T{y=u8u*oySj0DQ(WoDyAyr{O5m6qM(#=QQ^qp7k z=3`i%BV88ZtIqs)DHwZ07a+QTvpx{QdV!n7Zy0x`S%7nn@1+c;OdY#I21jXi`jf!E ztxeU$UPp3uA90Jt))F*4QsP%QaX>$FfK-|?Fgmo z`Bpa8QwJRBRTn5p76!%tkD;@Qs%q`Ruzl=fcLO3NAqvusbcvMGUAudA?C#!lcPMsu z=druH^|$lqa$U@eHNG{zHOD*O`MfRUEXS)l7yPO9Jee80irhY^H!NgE?atOhMO|A* zsK;bX=tC8UVjJ48%FIFfhF=ogBhvCz#B%yaD<=ik#j+WA7qa)j_v&0`z^+|ptD>$g zNrsUbQ+v;8C&hNQJE&F$wKSy5^F0bI!4i|x3hhwg9qV>!53wGpKL9n-nX7lLsFfG> zZ$4z%oiVR>rEy~H<~EZS4H~Gwru^Xnm`}-gPN^ETc%t=9X)^H+5gnM_9?$gLd7x=? z(XP!Qbqg|_dIl_8Vo$bBH_(I5*Kg4HdQ_S76jPnJ8ehq1n_<$)WWm$z>`jc~|B^Yn z!FLmKsVw(aFWwrqy|R&S%~{QPC-|3JkP`>|%j=3f5Bdt0x%Wf6fxnNpGOt05^P5=@ zActN5vhRaGn%{74fzK3F?k(^W`zG%tG$%VrFcFH3_zGk}M)y(RPw3afCyW@0^y~*_ ziFnQqk_8KoG=Xdb*&%n}Y$p0y`?(MC(^>iarP#v=cYy)@yn+fwBaa^}qy5q=&a7Zq zX-99n%nVbtHvDGM6lK!u>>gPuvw`zf!pH=9DWZ}!2>$`;dK$n`BqQZ}Ef0S#Es5n#jBxNS)f1d@_yZ z-+FD!FnVQUPF*acr*@Q>${JxlKo_w&dSLxE?kM%F@G#yU**dq`f?o2`9S2Iq*4ZZ_ zspkIHfp@e+U5jc18N*sSMKH6VKAg6j_0)1@y)6gT+k~CqMJlJeo)m6_Z_XKHEgig^ipi87tuHyx6b>h#&v6D}>wyeqU(@ z3jxa^Iw%9G`ijBLK(l2B_z*Zq)Ike?pOg})0*p&ffc^sqLJQ#ykcaCKWHI#kvX$U} zqG<=N0n3CVdfoup<|=iXBR$fgiA0Av+*5N_Ek|EM&{YI{tu$ zxj%&ePF2#KEm*0jG=3JyWpZpeFjR86A{mf~ZBpw&8)0F{eDE0|b8&`lV@3Zh=XF#c z+k1!i(tNh_4}Z1cj-iQPtNn$J6wFsUm1hVJD3~dC0EW~QtOlb+sPj!I8UK3zJNHK0 zls!GX`Asez7kL$RL3*5by(%3U#?Lo`gL&PPv^6PCf*Z=Z;Hkg~X@s*Dd`RY;`^{as zF?#oAF25UWzs5b@Cf6x>?u~lbmUp_Qt@I$DFtx5T3)X2Xg1Uhe`3>h35Ea&+Tg}%96d%wfFM8fNDxSHI;ud?_A{uON0EMNW?cqcZT z7$dq!8@bI(_?#Zryj{4E!B(?L3bTFi#Llta7vCeiIqQ>c@J-y8fg(JZzh}7s`wo0R zc3T+Ef3nq`gaiwl+KH2bbQP1Z6SNA>;?=;|;-UCrK(;mu+X5~Mc#9Q+x0l;uUeM{I zn~4X+sVzeYCt_G*6OQ6R%5>~AR>vRAM8M`0%^g(IdXnnVQ)oy)4qA&mb-Ifx5&TFx z-l|j%Sg=p>#|=xcG?|ZLJbGKw&Z|dZ@v1^w)Khpa@gX{#%=EvDyv46O)goW81&1eM zE#`ntvFI1WzPkHpfo_%T7V>dWoHrnK%2fqX2u*e=(Hj}GmHJm8Nx}xFLWG7J4t_>& zHGbacjk?uU*Iq=Ls;#BB5O-4xXCwSVZ}4oOSPrjH_xf#zGi9|-li&v7m;?F9 zv90!fZHUdr2{lfztgBJ73ZBtAhV2I3sORQ|LuV}4;&(t7^+Wuq@G!+8$Mb{&`_mO%a8Su7(&Y3a{B1(no(jL(2T<1arqXt%1?@ftiubY&+; z-A4`UoT7Y6)0)01qUjfe3uKp>lNtX>OIVdT>5_lgALC|=TDjG}!-esDl|vi(5AeSo zkSc^{=x|ij@Lrlm%BS&zNe^i|zmNV^5-C`fZ4mtu+=(q0ZUm})BqR$IJ3vG*G_ako z_>P@!A0f}f{0%>)572Fdql84G=;OrR$oH(Xq7lfxm^0)BMD8=6=tD{!(s3QKc3ZLh zsC-E4E!k>mt^Tm&gk%-IR7@4Upv@8a3b$pA63!u2G48})BG-F2{vCI8@WQ^M*S5yW z&KQ=qT$2WCzvy<0Z>SGqQ$#AIis~rLmltRHkr~ps=msKDJl{J9&m>PcEW#N0sx9fV z33U<8_a&0*80`u1BJ*n0NqEaJkCIJp(+=IBCwi3+qOI}mvQF>m*bT9}!x(G`QL$Mo zWw$3c&k}!a7HR&9R^)PCGe`NQivRwymD|AID>z`z&j zu#Ib)IAU>ki)O88cH2()0C}ddt#TT%sYbIN!4H^D#%u@rT%RbG`bI@KEn&*p4i3S5+v9*v8WJGw_^hm#8rqsz2gYh8|Ilv%iT>6)o&e zw;Z7CuCX#X(8kJR^~dQ6f{R*|(Ny|M9n6}Xc27Bs-5D`QwvAipIT)C7+}t)>}=a-Ydmx^`ZwV2-AMcem6_<;*A20_A@MQET+lLx7{_8ZiL|+5ID230XQ1 zn}SitDp-FV*(V*)Vn`<6OC5*IDWNOv;Qyss?M9O@V%u@W z=pflL2;e>{7D-w)d{F@}L$yX2SKO{xMW&^EmDv+z;dPQ3IOwroG#pdfjUn!#y7n&y z2W^zOT6;mgLA+P3Qqs6gWucrXdMA&QwxwiC)5KT8SBg`GA3emv4a9f5u|zUdKnijCUNA|F|^>hZb@l3vC9u=Aol(m;;{^0LU&ZY+Ku|I-HR z9yBMJR%ulAk3{LJh?)q_M8zU=ZDG4?w*Kw9REeF&H4GB1R3v&VBQqs_cH?lE_}cna z$Lwx3c4=(d%Z104x0*h(L3wxG+QLQBHp@Wr6Y)X)uP|rfMfD<&k;HG==)o#y@>Huq zXSMmR(MNr)@2W6DDe5d`{gTHvpDO5=hSb?6E5x~GM(8UcsPLM(I1rBqcFCBojOW%vuz(1=W z)(?n|DQasV;dbbyC4<~SxnhhUE@v}zYlw*ni`2n*YLK5|Db}*$r{pvG)AnCsBf7SF zN!@wX7WsqfI>mR$(TvE~P^K9e(&nst+A_(A_-++f#0~r;FCp))xG#w&R@>SOCu5Q- zvTn3#lRT*Ep&a|%ZngPX~_%+HMvf{w4vR&e5D^7~H3fI^MkY?Ou z`BlrSSIN#)t*fa7XPDe9b`^j1^9+|W?`Rfi@wgYt<%7ADfXq?0W5pKnYO$4V61jqa zEb`jP?IE&s%e&?hpx$`2KE6VxYpb4}$xwF~AIB*bhqO(BfzqSO@D)v>%aS9uxx@o9 zz~WhRr0=Hmucf}r2Uu;aYz?Te)~#unojFGxR`oe9Qjudg7C1>tQ)4S+BBgAxZ6(n_ z-c!HrQBp?Y+dCj?5PLz(;2KKt+=eRpr?i_j(!rsR*_^^&?R!f1ojb<`R~7JMtx>53 zxTPk$tJyd4@^%R)n02aI#5rAbvVL%?l_ssG@#3RWOd#doOUU8Ai_oFnM1o0|# zuIk?owjvch)WVY;X3lMZrT&GkHMsa}s)rdD(IV6IxX{LTqB@PZJXkex@PQE3W4=md z`(gb~G`#tw7H727pH@c|?5#eloSSmNbXN8;a)$1ZWc+4tC~zdQ#_0f`sER|+8%Y5PruS27rLmWfY@yFR+vW| zRp{F2T~mc5S&yUUkfy42XpCpI{u4UciK5CtK3biX z)FIFC-oa00GAFpVSW;J7)1eTDZ8+0hB^s45x=u|#3t?NB#9dFde$imJCRf&B_pN7$ z|Dr$fv`s9{YxdWkZq?J$r1n#aCmV!Kmu3HpKV5rTIyEH8+$j$56zlSZ6sH!&H=@xx zQOw7lX~62QaY#IK^MOvw#ixD6mP1vR}K$ZWu71__7rLNi_bXiQj`dt ztqJi2d;q(hw@v zEpD?uE2!KeD4RDe-aeoPmQeoQpm~}Ac*_IXTgmp{$!Z`~fvpS}6ZT)UFR`ZJ7hgMzZ zpSB;RI|R@cE97Fxk2h(bAF!YT-LX?}KIiJdU%}RO<9o*lUPLChT?Z2Vo;H*Muickf zwt~&}$=aQeY{__8Gpytd-#Z*rSFGQTqC0aMH=RWPO@7vW4QYz>Y2AcW_&uplL{i;D z%^Q(4d$Z;|0xjW5?;$(6&vt(nzp3!w#t<2Dkd3>9Im!FGPLc_cPA#oOncvg8G`!6{ z+jJ6pX@5~O3|q70s?-zR&poz#sVbzxX{$w1l*8=5F0&+W>3kx26uG4Ns5sp3buCYL z*qv|uO-4D4R43w3tgIwGSOd3X7ilQ3u-@`QH!6qPKTmB;-q>lQbcl+Q&Ht9@b}!MoW?dtYIHtc{USd1SzW*+NaJUBr4xdm-AwabZa4 zOSvQ~X#G~+|2WgbD+GF;m0K?;73i*Squk=oK01?D#+7V5Mt{T`S^bz9$I}Y+tbKfU z+7ga0|5e5r?qxxLSTDa0(7V|HJHeK#?J;`gM4JAR^B>|H`kS{O{^Z&#_zBluaW7XX&L6C;43$sl)lyzd^DIMX8u3+P3?o>? zs{F|sE1Z}9FZ%_tEi|3`4fk{n5hS4eOHF0rX7Iq2iqVFnJ!32P>gJl{RJxkPPSCxT zBP-r87t3nW{Mid6(?X7NBZa9hFZg?Kw~MKz5zVvqoiG2lp51k#;$-y+<64T?Y=w5y zvh?!uPmFcyFR2KNq6iQ1=WLZky43K!iG%;em!Q3-y{F3}Iwy4QuNc;nYKW;kT3?Io zqjpujEq9_17~N9tFmGytgU@qT$w#gNinybExk+IQNMrPMaFM)QSiTPwz|prH_uU=bovN?cDeOV6ThzFW7}TAGD?Te zQLv4+7G45GGsc&`1U@kDt-A_#vo8d_fKXV$KV# zE#HM(3oYY6f}`BQX;kqDo{0FemdWqLF9j|Ye8*~+n?V9ydGtHulEHTKd*(*%$Hpj@QoT*N zfXz{Y{5Fn29#=Gt+ah&M+RJ+&`WnzFh#}R>Q-P1z#UtnFx9e9A9B1sWv2Iw(Y&3sR zj9}>v+j#Blb}dxs#kr#jPMXDADSIE_#or}PUcMM$5n+e#(C2izZMw!7(&|;eg7Khn zoO}lJdd+d}f2?n2qCmp&)4L=pxlO7*|GRuY>3ye9z&KKI*p?>QJaW@~I;C$}T^S>) zEB$>oy01v3(J4P-fO;@IF0M7bM;s8t}E=F&Iz^%+Ya^7?Dw>8G}6a!yHR_V zetTn$w2^VF%a%i7zG?2u-@uNpU7XOwIcH4r_vMi)Yo|?uN|AbRHYO-b?74$2sPM0r zptmXGB$;R*^#r>Y9o#U_JArbUaKdDi&X)O!PzCpn<7V`_pn6XPvW+#j`y?V_52?yS zve-ApQAikP8*2dZ<~HTNMM8M(@fpYl{z1PHNEI-`k%x`~k=;w-nb3_cIXntFW=VxU zKpRD?pm$I+b0_o@>du`9FM^K7ZGqFFr@m@<4?N8=42ggvc3uK|#mvrCV5^8UJq25Y zQt~Ufh14)=!3RWtjtgXiUyl6?39wJTbD$sC2*=rQ6Y_C~7JxOs+P?s>dbP0|kSVi? zGeDPIKyLybOKY+-!O%gTdM>zCc-Lne6i1wK_y|wN*xQ8yT{YVFT2N(vX=oF48g225 zf(yDh+CgB3nvvBEAPQ~Fb#Sb-+Ghi}N0jGq5i;Nxw)OL`x4dne!hhTd>re7O)}F ze?=Ak^I5c0cjgf|@?5M9X+tUn+)~FH{>u1nV}W z7uK^cSHzJgI1e%&kwbW6B6kvj{K(Ze@LIucyJz@Ru%cHd9t-@fTO*nZS}8MxHlQD< zA=iPqS~48*r{I%FCp-l5UX5Yn!U{E#dpw__k;DkQo#+f~UKg9FHGQaFjUO-^kojYub(#ErELeS~ zg(BpY!g!K`Vcdo)wvCS*>JpyjZd#VD-~dCs-(QPsL3?D zWHmZb*PLob+SNL9BspFC5`eot&gI}nGo`LX1smZPmzC{*v9KlBo%(UcVKE0qM6wU5P=59vJO}C2| zAQNkIQ##;%raR#sute+bIR(b$!FFcYD70&QqMue8t9z#%R_;$ashuikv1*iT%IgA? z{2=X0a)oRR<7uc-63t$)YK+LlZL?*QF9g+%E45XOp*lbHR_17;K>3z6j(K0<#h#u2 zQl{Yeue~XG$|XY=ihX%cR~{GE3m{uDIT}oBDA(8t6xs+?w%{l(R5T0zFo(+i0-^cK zr1?PeTA#s0O)HyUv&$Ar&*(%jngrvyb^oMu$ErLO7rq1 z@#xQ_R?&6T7!oYZLfu!U5nqujwk@~@q1PFdTjZ(g_42mynxMrFt)olnhZSb4Q9~ z^7V;op^MZ$m?R#HUb_Frdq~W77skgv*V-wZYlkU+$)r^oh?C^GiB7#MPSfY+P=&8G z(TQ_Ni_#(3hlr7Wbiai=h$Oa$uu1s0nofCt>pUe(HmPwNTq`NA{X|U^{b%`}{YF@5 z_?*y2F3~&;dX3*xY;)g-9h4;5oUc*+Be_8@5S40K%(b*{Z zsVX5MiP&O@21DgJHpFrL=+p~NJJw#gCR0z-A%t5CkD&|rGS$GC3|8T?D74cglR@AZ?^ z8G?N=fZ_(Q-alOy4vu#{Bkl+9*{FqMp$pod7B6fjdBoUuQXeczZq0j zByu2Txcm;H@t+{gL5f|sitZ!PHap3+h^N-e>?pZUjxn@}Z*mfKPNGvKj_SR_ZRyXI z@nl=HS@xZ1^E)fi;B~GoqGg!M<`%IX4jPtQ}J z(z574nXOprXAxf(Hn|#vLZZp$JFx=OX{H++EqVB7y^Coa+g~%FXBN*>d1-d0y^!Bf z{*HPmmC1emio~80imPytlYuOqj32`Zb+^&B;VCZE?Wn!SI;C;10*bnn1IEW`h&)jj z8AV7ZsoMSQL`P&_TzRBgykx00UQWcRHyF-!_~XvH%oZW5Nc~?!R8g2RwEA?Kz3jdz zA}U1Es6FWWQC+=lohM%v0t+JS_oZ)GT@xxTexLoT}}f)buQ(tL^* zA8s(%F-WgQO(E;ya&HC9Ra-5VAOf9uN^1(OT=1*$Ih{cXufrIMT&wCC%)@KMdBXk*S@k6Hbhg~InRu1N(9Ck&MFUk@5B0YOe7dYnWAb<$TLT)1oY@0XF%nB}P0MKs!aEAbA$K~Aa9X|CbEt(o2sRqk$XuHj@k7_7~?iN2bB z`jk+!lA%sq-7Z@wk6R86Qdh#P&k?Nz9oX0Zs>Gib)qSmOT!F0pQ2FyTpc$*Y7qg~* z1oc;-b5%4WVr9D_kG0?aoH~R{EKZS)7bt*={^8VGnoZYrT5UmbTP^)yT5eMib6ku^ z?QRwt=wunov01rW@5p`SFiy3Q|90^?=}>SXVAH#w(?dPcY2l76SlF7%1=8j;{Nh>1 z466Zo9|NbDZu6h7JfQnm@ZG^zIR{+1WR+w#BomzLp+N*Svm*hD$?s^H4cVt2sy_lb zM(?anhLQqa7>_}^m3y@lpeGK63O9J|5|ua}z9^t}CzBD>A?-edG(Ws~G=3>{L)|vq zHkw}*iJ1cq7|vr0S8mtXp*;>Fc@Y}1L$x)J@I-~Zw zcucgng(BP?(5U}Nf-5(xGl@`#X4y7;veg{X4D=2ELgzKDk-}@;tv;N0tdXxgm~y*z zp}aSGgn75LIRMv(iknt$SM>;69rnuR6Kz(p!ba>MzrHih{EiaU>THze=^CEv?xb|q zh&AJ*u9&TrK>;bcJu+Zr$DlLqfWtLujc}wDR~U%X_{@&)^-C!+EhlSM=cyXX&Bs%e z)gKKrqYj%e4HmFYw{Xy1wprOB)i}J628ises>sj85E{C(x%hTr_tp`m+Ki(cUCS%t zPj&sRq=sH-Eul7fPpC&3<6J&kQdu>&&Ds}S{=!3Yx zAL1W(b}?c@KeUWyKJxahvtjqRt~8J0T9%E~^zn}@N|B|26KKn}H?bLo$iOM~$&6jS z8#&SOXFB4zQ$t@hZRU;jj;*=NcXiD(UKFsGWvLCo^F`OBgTYv|f3^h!=L@4ZQGlil zUQY&KiEnNn4jc}>*vJGIddF1{fYq*~VGrcItW8x5Jzg9tQN!)jyIZH>(+Zbv48o`x z3Ej@<{di{UEwm`~U_%l*%R9cR4SDX`tUrbvUiL_N1lhj0S$r4SMcuK*QIc3VwLeb0 zDkG?CrD$?|UdsdWbLhtUY~sClvSm9l(siHi6>e`gQ#l&TT>M_V25qOBHuF^3h2#2U ziW3=rorUr-@fj_Xq=BKGbtbXgJKg+E_`>yu&V@|23saQh7nit*&S4ZPH1OHrQ8=OZ zy>4+vP{$tif%vrMEM;hDeeDa`ZSS?_^%BzcskT8BY*!|KMow9hFDk{SQ!56HRW}M} z_R`J188ID>2G{uYO~*CALYr%ol()UJO&4UBUEgbM#ZT-&IU$_6L?N1oSD$OFm{s^= z_w&lvCB7Y7sl8=1{Y!dgMGdl*IfZhrd>!j1eP-$+&VD8wGM{&kW9xDfNaOpR*;l%S zc63*1`9b>Q_Uy`D<|y4hs*L3Vx6muuMP<3nBF^TNh{62sDZxzcd49D^u7C^Vp88(g z4nRA$lwJhT*5~Dq0fjcR@+q(lqEbHs|0`Y3mWZm)h zf_RN!E3K$aC2QJN@K3;`&@#LcEIwwOql_t=q;uv!s9>t2i}ERd6s2od&M{1Dd98{ycK2NYIp_TDtk=1NegKb?bHAhvq4T-exLJO zn0#bT&exrprr>v~$pMr`?yXp&x?X>BJ^#T6O0Hu8S z0nSEI`T>mjx6r*`#riXN@jhYyE&VLxa)(y9aHYJ76h=WAe>v?!A|NPX#`_-uu5zY1 zU4qp7sQtF|6I4MT%J@sWT=R%!#aJLE*kOZb+st9I_U6}e_i^kK&hu@!eg2_>Bm7(^ zCD;$7?(L`k$FJ{sNw?*HtY$Du1p$&}%znY37S0|C49{!k@BldBAKqASgx@ayC9uXR z5qJ!(*|Ur?ldyEBQ8E0F-FJa-)G;}^*LiEMM4 z54a$+cI~dHR|-16QRXT}m=#o+j4xEshe)0?{TVuOX3iN_g79Hn8^?iM>wB5!hj%!> z6X?)GJGJF3b5h6UieJVdCJKe66Osbz5A}COIK4qxo-=eX5&BNtBzBp^-{0i}>R2b_?|heXr;9&0 zcJgZp)%LHY*SgK^?&U@8V#C3T2hH>F%an>bjJBWVVtJCalVNXI9lMAXF*vh$$Z3_W za#ZmH$o}oZ(wr^N+cuZI>3?Iet61L!;BJ-oTh7z$sIB#jvV7=Wmh6~q%$vG$9|I>+ zVdu!_eHPXYTmcssv^2B8kYY#OY7i}zVDrEW<>#qXXgOtJrWUHDDWl)Ri{-Yk14J4;t-x-8{Nx0^geVeb8ys&4(%tf)K&94I>3V1bd=(0jB}F*J1E1 z=x=`l9s}9*hjI8=RQ(0+Wb7YRBexapgU<6p(1Z$-KO7yl-j;s{*%LKga1r5nWdh$2 z7yCYFEqtqQ1be!yu};O_B~>a5IoXo+U<-GOcxJhQ`&f7-Ba`=ltck?=pNVx|{{TMN z7kd-99WCqm!ThFwUn^#H=q4%(+3VEbfQ_6P%420aIN#*T^o_ic(!|ISd=K$ItM>@1 zNxA)CPl;XVS<0MNx3Fdt^GtP_T*U&-<$?$75JPC`AC8B1M*19XvhsRFEl)24RwII` zqG$H3K_|{X-6BSDyLHVNrggJLc87VXzFXkNvQ(3$X>6luU7Cz@Qa3%qgEw2*w>pa7 zA{}a93>1))x);(v_J62WGWxnDvKZ#twkCcJYjPv2AyyC}rH(su1qo-@F|pcB01JiV{(pR>*-&LAZ!*>oH7A@!gddPi5}$q@~}b= z@V+lAL!SeF%^T4JoKQnCO686b!l)H@H~Rqcf=4N=NA~lb*QF!9g5hDsNSEO0s$0le z;PGJ93}|RmH8Kma(yxKw)jSiUTKzGPmdCjPGIJ8eSn&U*ZZs z$MjrYTEUaPmo28u0Vhd5Qu``l@g;g_7GJc7@h(11*w5M-XeK*2jVr>49lWDU7Z2{_ zxtL~3x3KO>K1;Ohoq(&D$&psHiORTHnTLgCys)^_B%dD=xQfsSl2^>f4+5g4e)vD& z9pe&-6*NcuT|5a27c3QxhqB6V2q!_b%qiq@NEH`JWI#Itp5hwlrQ2`p5p1(G6^noi z4J$>bg(>1m!V~00emHrNbSNK2JRvGK0K|BFQ|v8#9X>pu4(rC^-R@%xQQgu?v<*3- zFB6_rsziC@e~NTo194aOx-0_!B3-xsJZ>l66Pu0!BD;Wa^d+fu+llh<%S#b-9yUaG zocw957j7pf=(lk{6SKAa(l&gBdhB`+jHb}VJVGzXW(AB!gT-pM2IMt)XQ>M5#K-Hx z$*ek5=tl6WA99Nahp3lI7h*aCl(7@NtX&*aiH0fn`0qfD$s*mP2v^j(v<8X7-)UbH z@7lb`lf=ZPC!BZqgu1UKt=QD6{){9v&X^x#g&fyT^T&}i#a6cpWP!wKX&W+x=#(!r z4bSgI7U(Y&En=i;FPGp2_tekJ9;J>@zN$=#oGpJq{o>OgJTWVypd*_1ED3#*dK8ovqBaJO`U zHU-!KzEA@|9nDtR4qVCmBEJkySyv((2WGB$DG3Ajcs~5d)5ES zx&TZWEA6D5kV_YYqliL9LS@m<7Y>nB(?Rx8@697N#> z>J&rGo|O+|t_Ct^mn2iGSxbugRaN1D&{p2)J)3Bfv^sCV2Pco#zp)`WMO3T|YpfH5 z$qjV_m9TVAwOdY__?~IQT6dA7P81$QcB(dceIQoIt~0tWp;8L}e`Ot%*KsaL^t)&$_#u86a7BB@~RqMvc zV|=K>nMuW7rsJ&GjBkcv?3_4>){&zLc2LD|A9-ZRGx)KNev&-FlO=K?9jt-ss<-m1 z84;GL{Qnh+jKzZJ461IUU~Ak=^+&)x=)TecIO*XnO9Cm5abhmyvg8Wc0{w+XRI$;6 z^i8JwsJQT*!50lmzpYguUt%|_ZXmmZj0$^1?J-AMf|NS07j+`>R#S-2h&KpYAn_f# zyYYi~N+DMtCt8wTtZ5J~jEzygCl>@2$ODLF9{)&8xTj-@@G}-@6-MNsvEUSQpXwa# zxY1r&UN}xCk)KT;qP`^?AM;u1CRr2YB$JDT9>c`<$m5PYVH*D1ii$tORs(y@^Nj|Y z$xx^7D7dA4r;SZJrnXmYjoGCD6-xr&O5aG$tF8}1OQ$;`*B@RfKi^Hpbj-t*G9 z)<=cDS=mkW;>NWL>n@ZnShKV0ZN)xcitz?j?Y2)_&yX)Wpj^Ygv1p0p8$XxH>R4H3 zRXV9`cK;|Q@}d{jiEjbJnbX_$ z(5IEuH%BlgWZtOX%AA+9srol7X${kC#qRaZ(l6(_xV=)(;hnP!l6?_WFCHrz%=~2Z zwEA%WE3s|7%=6Dosf*_AOj=jf#E)C!WV|gH>FcHaBe?4JRy6|HV@H+#1`jSS5{-i{ zF#?*uz(DbV1`}Mg;eG84I4<#xg${d#zcp-w1AV7y-ocq}uay&F(#|NghW9PrDolp| z(jPSa6fP*PtZyL64b3(4iSvog=E1umykCC^Pxtwz{)_E!d#zZ3EwbApDMpoxzmj{< z5p->nhg?a}r@AgFYSlDRyUhGe(m!uKb zFbmzG(ZzhF=y&Z2&mS&n_`<`dAu}b*=Ye+3Dn~oh22Ls9i0aDBQrB`aW8<$IdDy%r#dY1{;K^BSZ0?+>vc?nKdx7kl2fPa&XH6IRkCxxj)&mol zo>8m^GZxv3qoA(hm%UBk>a2O4GB7aNsnrf#8|B`Bf}Fto>JIRT$2sFhX!-JA8WYrR zJ5J7qS1lq%3iwp<_8tqKmo=n=k9|*`*)kmyL@lVNpnn46s_Idb$7RDtG}?K#dLug3 zHc++^dAjJ5@GWwvSlN9_l9BnLy-o}zPi%G(l}1gelLvDk)!fW?ZSNH)lmBgE$>&B*s68bO4fM2(7NZ_#^{a&!oC8&H zq=&6p3gg|2*9eDVfyL=v`}B>OU)plDHQ)|EGzcHL)(U zdHVD$Z&n9$T1*|Mo$cncmiK}yaU20af>i^S(*4ZJreEc4tU_&XC6m1tl~6-DVU%BV z2X0tqB-4|Z9{rGA#)rLcaqkO`I@}iw<_>NeU)&G5Hs7SlxH+nUnT+l7p2=B?o_A>F2OuZ=CL0DEZ%JU2oUSS#&7xP@oeUdozWLRc2E{*={8ZnK|P)bSp0 z?osq5ncUYjL+WP!4938kb%Jcx8_#s$F}J{u4)qFL+dS!)=xw&Y)UYT{010?_@aXtayClI)^l4*%d9Nu$ms7dOkW3U%NZm;0HR>O= zykH`|Lpd%vlu63a(D!V)__4=m?p@&myI4UUZqxX0+2w}$+T9g#wITS9%I#I-m@ZUz zV@*Dd_C{y3b`RsRsyS549wr~P>JB$wywWaGFqBx;&|2oxQKJni-`Vm8cdDG*aG23d zG1R2weWoRup03Scr09x6{a8}v=~d0#5t3ha;rz|y%UKxqyWBwA@yIcOz2&`NDNQ7A5eVq_p*@0g%%zkc zfE%kQ`v%a>-jZMj1GzJTc7PvvD)(6EqF}l0eE13Q(|n${itjJu@OlSTPdR_FAeu6Z zUoSYAJx$;btWFpy_yrsYN(ZihiS9PwBk;4WBfJo5Gxc-K*c~arwZV1)TJ9#SvT_n{ z4K^k#l0O7(i{HS1ibe-M6+A&ctauB~M6|Z6Ar@k1+|Q1cQ6w_Cl zT)N>6cY*A8Tq@5|awaf{zh0zR(JZ(?I@m^om!6 zw~8t0W-_rXH9C|yUs>XpL8Q>yT%O>+Sw7aS_-S5{@-%s}coH^1E-wvW?I7C9GmB>s zj+KJ6zxXa{SJZ!aD*cDwzxZ%gxyuFYKkg#ytJq<|V#N#m05t*a!Pn6aFt1>*7>kR1 zFa;A$i^05EAEKhr-yELrF7!Naj>|Um2>*%AQ1m8nTmA-p$E6}oXdQ1k;~*N(yIxp| zj^^_Qoi;ZGHj&4WgMz!hCCCn-&7}sp4B|Gn=tPJv`+>wGD7+h)jp*r5;D-oVV1PFx znW^(&J>nf%0XHCveOJSKkg+a0_%Hm~CJf1f@5z?Iv&E`GU)o2}XSx@3SY(y|0;(1| zq?AJ%a(TpisE_dUc?vzp16&wzG`7kn2R@B1lETm&btRMz{#Dt~^x$QMEI$_9Dm%FD z4!B$LHUbAlF?et>JuGr%igkZ~SuA~;L8Bkuz6O*3U( zF*sV88!;PY6MDH(fP0H?O=%(OnRhby}WYY?pXJ z`nzm#=m|+xWyor8u?J1>?Py`*~5=8)a`%Drb)(u7oH%E+1Ofn-wbEQE@M^ zKvG7v4TZ(lbp7g?!h5WxPLbqB&hI5xiE6=2%vHXRGM)WICZnp$rb^e+$7EJZ7BeIX z_eJkm3qqF&Pq5E>J}29_wNCLwHNVPg4&DLmN3*40*nRBLl3N^csk3+=cio0hq6VIA z!djtP=4Qs=n{%@k!ir6F(0p;qsZW~>>4g+r^SF$%~f=Ep_##7yIq3>H3I`!p^No2NP$ zoPyHit)9b?`x2E?5+W8BT8~5$@MP`^OJSB>#eS1p?&#cuhQ;|WQoM8{i>^jd)JID1 z`u8i#D`&cc^3gQV?uDd_<-4d$RLqO#C`~&G>np+xt;I)kV+X;nU)MdiyO74|GR&NZQW4XFpS1!nwJ4iZDztf&JC+pyENfMfZ+UkaI}$iu!(Cw(2Tf6M0Fo znHlMyDpRtTxzi;DTq}DYQ6xWUaRs>sXko|d7csw=J=6xWUgQi@uVr6PzNlQsxfGcr zPvGA7pCXOo|Kpw}4iN;~Ckvy2=Eb{-3TP^;P1_C(DGN~F0CKVml;45P$=-^8fZq{+ zWz)cvK`)Rqc*NaD6bAX)7m}6GiN$~LO>jDEss_S-mF`sS`#*bc72H;~ZVRWAbdnCI z!(?V=cFfGowj|49W>`zoT4KpEGc(1BDUL%9Gc#j{nHhX__O5&WQ}_H&yI)&Xqe^-k zW2`x~YR)<4_rYfhp6c$y!qh|BPw?WXdd)&;U_*z>5lVBjRiL4HHf;T^%)KV=(q$4Z ztFcU>K28WJxu~93z%Bl%nxC4bU8Z;(<*)XTAJ~9ZVr2tvw(?eKqm4?YzmI5A2vJ5& zGvAd))m+B!)3sF3$^WR`UeS(?L*rpQ{Wa|K<3x^cWp)ai%zi9c(yT z^^TuP>Tml|G+DT=#SQyc)}qElf@%ELx=@ld)T4Snb*}f-@?G?OPU}i?SS^-@n(y4* zhAQQL;dGL7s}D(ss&A50)@KgZ4^sEX=hyVp)giws!kaMrL(|#3wGWVpOs$#G~|}_=h*H_=5O{lb`yzWRB%+ z@rqX4aiw>zm*@8wFG}Hc__W$9bP;`y4op1Z46T^ zf9Y4sdyLJcPod=m%X*vQJ&4hoLG6L`%aw05_}Ihc3siT4kCfOe^SzgA^W@K+JXJDT ziIsu;mekfbSqeZG@vrMTDn=2#H74az=_e|)N_}HT%MiskgAeK0ntE>=%_bGoDN*@B zfwl6GQ)Su49H}`>Lk(^`p1&pw>|-LXBzkt;FPscN*Y*~3)*s*0O<3*bRkxnpY&%}b zrN^0mD>=drTVJh7<)@*R50s(zWjXemVD2T(?8M@o!Y5mEi9`PM#<>)fTR}|;J;wG& z`FB>f8BT}eb{g2LR|vi!C;M&_%`z``vq-xV4z=&4@WQpt+h_#;ym~G}=|-+z%DQCd zT-L-5G&@vm$G>JUteOrmk+j}5^z=-ya~Fe|fNxvQ><>?AGG+ha?_2wgQ{hIg9OwDl zWt0{OeluU9T`qiNXrx>Y9zvRSS8#cmUL8R^;{@xL4&H_Er48HpRsQBR-2#ytsR9%# z>^LP6K(l$0#uPkmC|1OXUL$UG<$)EMbK4h!#}j5X)4`WvAL@0Y#r_MbWuj;|b~#?u zWmm0B5-&7YsguR9;Td_S_zj}76C)L5JZoDn4T^u$#D>p>-L0#F6a2nc)j;>$*kxKs zZMUUZ1cjMzS9L(Mj26pRNsc2p9erw>jH|5)DoOmq#y%x4>~ifvMYP|m%9CtmO z^;Aghj%yQTFU>zHTcvm-iYy*pGLeOv$bQ=MxhOwBv&I(t3AtT%TE7*#fbom$gTo^A z-3f^|62h4asC@sMoE#?3o-UZny+6(JYBGz{#zv-b%~J+rx!hE9->{quVwoc z*pdXCqjBeHb9p4c$IO?4Kzn1JEs!?yAnPgraL4Mrbb)R8DMY)VTJp8 z<$Dl50j6;Vl*7OwKR)9r2-{EN7K(C)gEBTs54P3jgh@lo5c#{{ed1P>BOFMnDmn=r z%S*?1LFib0L|pRAZ#83%q}lEPXN$yh^TV_Yy8hOjtkUAhQvW;)&1F#~Vz)Yv+>VBn zPjZ`ZVudi~18Gt|&-V~LN~*Lw$uWfPZ#tctP>X5F%e1UEEJ?|^STOR+IV%28-N!!l}jl9j=%$x6R$ayOG-KJc?0RnnFk$v2kwIk>(Ub{+%dBu<7oyTkV zFL5QLBtdJY4>eMVj0|JY0T-{M?5CiijhsIyvhLbb073Yg6jU}8p$sTm4K3nUV{b`L zU|R@hBqbRv@-GP@VkbRc;_d~q=SddX2zWomPdhbvyENyjoe^W|s|uIGGSyB_6NanQ z>Q8cXik$Q_q3?F@YVL4IabX}Dy#Fu8VY1x$lzKW+X|gr zl~_c?c9wgmZ6W%WtP9st=V_jK-e%e;i*1Uya@oxG!tAW>$;#z0QDhQ`C+&=xr}4X zAYyT0GV2v(P0BXT0osvJA#X3U-hE8pXLXB}F<2>ZZdyb9NM2sDoxF>3MKVEU&@}Xu zbXSHOO2Yh;`66Wn+n7BWx`>;@?ROXR@A8jY*#ZNAWy3JOlw+YwC(5{M#Ia;7_baW3 zn!-Dc6w}H4!Q@!x7C}=8%&`(SyL<5-06VQ*g}1=Z^+>ElysmgTzFr(HT0`s*XHeDT zad9GI6ZNAwE~$+{7iWenVy_b8-GAfF5<^x#LOXGDU2M@A)pYGb+zcfKv>;e08Yw-b zZ26Xg`;=X><4H^CS<*+rn_2eKnQrH}k??Y>KtVOMyq1b4l}&2AFFO*=)s8vV3|D0INuHGC!= z!>em!^Sw#-m2C+-s0O8Pf(|gki|yR<*psR-t2lmvEVE)WADOLE#PM(DFXo#F#7HjD zMqi`tWbP<159b)a2KY$a5%2{3N#(fMi36Dx=KYfWJje1{PE}#3JdTSjde1ZEdE>N% zD84b#Am_8dkh~{OE!;qZ0u+FlNpzV7{@}ElpAnANU)^gf$ zLUtOrFPqO9=Z(cOcz*nI8*cKS3THT<5MBn-Earo6!NVnMsFT2B=mvcin8JF)z=6$J z7V{u@HfxL>DVh^=nma=jw}HyLD{63V6?_%Fw=f6&#i=?Iim^OW@|0RB`^H*8`yu0F zx)^Nf)yyf@N~v2+3i}mYx50?}5t?!q3w)q_3vZxPGFF^TI;{gF)|Av@1~Z*{N^4y7 zot~}k&BQZTtE{5$u@@?O{U37eQ$PGIB=c5Mz>Zy{raNcwm&k<{nZjN0BW)_-TeF3DJ*mE-hY>_EtnEX8 zpzf@~>X%Tp40+}Kpyom@$Xt{N z3Pq2nIYRk3qlnXxin!J19(07NulEbPFndf%@EKl^{Hy3JYMY>6^rQ$+QHmGh<{^)X zg9w{bTg5QBFajgKPV@Kakwh~C9RSHQPKoItG%j$E{Q?k#^ZZ(1jC7RT22Q7JLA(cZ zY0W9`!J~}&@Ta0A)`(BO=mY1815JF0pJsYrGAaBdeJ^leoZzVi`n_>dsnCEOhj0+q zaYQLGfE#y5I16~epX*}??h#NNu%dm!k7i5755QXZ9WPWc$t~0Ghi4Ic_-x_Tf;7P* zAT0T);0UlIe5r6V=W)r1VrCskGcBJMUN zA*qSisqhO6=I@sO=?w_JN}oAI0&IB9OemTMu_TvR{PMl*7Is2e4*ncxK}mI973WHE zU6LL5qNXPF4u7ty$~#bytEg}Y626oHW))yNjFym?6LqiIZmimx2Y3`at130mmSbKH zBwpcKm$Zd;@hI9u-i!IyRgWDkgbMi&vo0`JYAxie_hsX04^)rxP8Drcxgo8yUMfq_ zs<`8dySU$ix)tukevjYfRaDq^w){`#c4M?`f|np@ROX@#Xby^f=+i}W6^>Xz7E0bq zaEwFA7n6Pl70QaK@7-U?RxtmxHI?q?0LGKja)CAfpnM*Ff*LOCChkJV$~?$>GPg*N zP>;mkmgdnP1_%bW=> zVaLbDLgzW#0|%gPo}YUIq~>3;^@mD@)h2P!W{|*}4Vmzo$t99+d^D<2azwBqBTP~$ zn2fn5AqmNWX%Z6P=+2PvfM2%Zl3wthi9+&9bdnn`ZWMnY-4d5d43OW&G>LKgZgHGs zZVXwxLHr@mRh%e(?T#1A#2;zEl;W686u`6`X>1zf3gL~H|&m(NTq1Gbdt zq7lG};GR-zaqw*}TDNisD8P+T4yliTJnHv&;Syplvy&&g-PK`O76kBYD zuLx&pHj_Z#5w$n9)>)xE%D85w?-IxceH77Lv@=z1&-b=M$!vx0M!ThvU?5qpvE$GRpQ!!0`B{%t9=xE0R;3}| zC@e~`K=7x}Px&I@YUfY`%5bZ_Ph`4wqa{G{wU zd^xm2`U1}M8IvxC9ywFsa7b!33Qa=mjOkF4WSIC$QKw;}8ssn32Qzoee^U*_S4lr8 z(?Z$OS&F4T8aPV!*f|Djk)F3Y0NKDBjq4?MA(&VvzgM;g6(#pB)nt-oDY_l;aneX_ zQ)m#JqptHILp{n(&f(A^`4y{kl5NsY#@i%H*uLm?qjOe!?&JD7xert7YaSzlqFz?D z7VZgLU15U7d0Xki^BI~9<`IM0${##bkx6|70?Z}W1{7MSI8?7GdK=YU zaT0eu;A2@P@u>$vca3UmFVki)YRz+0uQ<^L#fqJR_2{ZvORQba#p(cjZ1O~IsENrd%Cl&n zBTtq3GsXkZy1A?p4;RfPj?#WW)x~Qtzo{q}Ts16|je=_nnUxtV*X-f)4t7*hL+M@) zA~I9Al`9A^)3))3Jr=3U1=H;Zl}aJnVz#^q_{(rY`aqO}dQ<*P(4FmH79;dY@+@Hs zw?+P5%mr*W{H37*!|qcm2AFC;tk8;nx3H2O740$nDfJRFP;l8k$Le8Vo$@gW#UfI+OoBJ^g!?50D4WtViZ@v$B`FG7VqdgfU??9bz1$BgR>_appOlegbPKt3mUM}c7P<}{%Bk$i&HS2@*j|yxSf_75#=~V_?-(qgC0}g)fHH^% zn?9kx1<$PCh@0gtuQ4Jnpl!c8saL`U{V{e!oJ0S2}xcOAYZ-()*^F zWNp+?y)!i@h*qO1&>FbO-vM8;x|>eWpCnZ` zq%bX_@@q?2j3Co0G5eJFit<=)sWJ{j67B;Qlpe?u)uj_T38a7I)!07c&K7cd z-X;4pnRPMgg)CTYk&R|o@g?~r&PIF+s)u_%YZ-Pee?gQF@w6b%`yuVN@S5#yjwh(< z4o=|ePkTF_i8l7jylyM`Sy(U_qYrdqy8RNg5XEh1bCq-lFu& zqd&&Fj9H~l_+D~FekQe1XTuR>1!~!tQ~3t!gBfcILsUy5ZsRs7(!EV6ESb{w01FSd zw;znEtsk!3A74^yCO1xrtAg2oWiBqijtR|MTI!kJiYhABMaZz<)n~lEk{>Ca+fK5k zNuRdcM$$T8Bqnts_j zN0`Z-+iHt&%70kiS-1%KT^d$&w`e-c54Q_99leD(Nt~NDm-3O~9Cmo!PaqSrxXy$JX~lkRm8vNk;!~0 zR9I7aYA~k(m$6n~nRQY2S>cd)Wj+>IP1B)%2l&WsMK^#u$piQ=U}orgN<4VeV?E=O zh-1y-UJ#qtTczHS5wtHe56TvamgZ`u4b)CVzH~X_2^s^JC$GkBg;#~_AV)zx?#~&v zkefB1>m}J;XOZ%o{*uo0j77zMU_kbF&0my<`JdG+#D+pA)tsbi>}ExC2!%wFJGmdB zOQd7g0A~U=twkltYn(N4X~R_}Ky}tc`E5#E-q})V!5GR;XP2}Fb6ImCWG!)0MRV_^ zy_dhT7IWgHX*IVJ2`#1ShCj^vg-4QR6IT)+P=a$WP;@j+Jey8qyb4sYRBV>3 zDfb=E&vL1-PMA_Y8_A*}Wv%Ey`h9K{<^wYW{}sQ5wLj-3sgL6xH$lC?-4pl+(~XaE zdCs{nbhfk+>;oN2H|0O&J4kbo&-l+d81yAU8SXyzq|iKP72&gRZ`^7M79a(5G5!E8 zTn?~5f?q5*2-#)j5CDI&cYW%Cb9KoTglUaWcn=0 zc9%A`wM1)~z$Z%bbp6@CGw6t_XOHl{57krVo-81_gnd&ti_1fQb)|e!f~BtmMa#aHICUxOjEDhaEuBn zm$|T*uVo)C@jMVdTrAE68mc8(IbZ8MnPml8)f`MT>U@PF^DL&Kv@r%v=qMiDprO1_ z?{$e`!iuey44$QQL^GSQHBBOlW!}!B(;u^~^AgZ!IVDKbjBwt(qB&7L{6YL`KY@@< zMmk;r<%}g}koY1uLbaWmkpDaAPP>M5q&;Mi(9wk-SqrhK^cwalf+ot8J3+qUr>`4K zPj=i7Sg=jZCPd@>bR|e~!w7|MC<}4-skf=Oi2;SP>AT6bY1f&%sXrqd*{>L|?^Rwj z+s1K7@PfD0>=C$47^v{TqbTxmBd}>+rW9t zyXDx$za+SBz6iJpPM7_}eB?UwyYRt0bMhvlm^X!VBp>D&-q1 zP#k5vC!oOG1z0btmwrP(6JvRsvH9XI(nG+Pk10NNO2_($&}%vvP6Qx*#Vore%{*>lm@&sv{dTDq#^|2D;qhJQg zCmnM*H)X$>3k4cD0A7MPQq#z}Rj8@jMVyCmsletR#l0@QlAJ?)sf!MuMfK3^_lai= ztGpb&IaGO@xmMsRT_rj%0Mc4nGljopfw)HDa9&bwss0?uJkbaYDOw!TC$huac#eqO z$U?ge37g?&k_kQL7Jxn6=)7j;Ql2g12zC#DMd6d2TY?4H1qrpn6@-Y87$A_`?yULtPbuF4?712IqhqDhbM&skSkt`sXzWI?9NLt&58RQtm%#+z$GNiNM9x8*IN2h|V}w^G zx@CtHd=m$o0w6)$#~UF|$XJwYa-}S+5TIsDuVvcM?W8fWLriOUZ%_g|7Y_8$aYvz} zb|^t46l>Z7+>@N(ts$VbFY!thki2znm|2cRM-pO?Z{FYp< zuj!h=NHr>yUgR+ti%#}VQALy}pA)Rb$I5KMREs6>YUX2ynm3`gx=!vQhbtC zW}PT$O-hMTJC%?+27fclX zrcWN00BYkd@nz9!>N3te0ka6?E)*K)2)Uny!$}sr+kjp8B>x65?1vXl17lnofJpGY zwG7-Q>NdV6wi9PiFf1MBXex`TJeI9yYZPx2<2kkR7h&JHCuMJa^=UWR43}KNR;h_~ zJwSrJOe{tAP#l@Ta4I>Ep39W$_GVkM28*2%zOWB#3d44A|4_C2_VJX8mo6a!b9tC` zrSQA7(If;s2hYJ9X&$8IBIavMv(}~S)L`z@*m~7j#HrvS)gttDuP9|4-pJvYqK4dP z@rQhhK{e=;mvf~!2bEDSxgb?JP*9)tRB3}c7rS23h%pFWq4*77;$GoGl2vBiYTJ=E>C0r51;sl1GAg zi$jnRu-eE5B7ygcmWn=d-{r=M-FcT&T*V3e&ZukROaUfPC?*I!J(I;%fT6>L_$IjA z;+`Z-w8984*(F|p=74Asn6nc+Av&KtD*8?IJ&G(c7B2}j6D5kBJgr1^;sS@Wq7UL$ z%b8-e_@+^_#8$Gf@H=27yOTozw#bH(>A-iHP1FW(om3b&2TX(SdCmg&z&Q?EME=k- z%ax+n5Ygy}cwF+Zut8{}{XJ)q@VF)+c|BmNUK9BhNKy_4+yJT-X&!IDh4K{+BVdp8 znWdSC2TvG15obVE`S&YTsn^n5Dwbzf#?LJq$i@6umn8(!Z>CO!E^%F^J&OBgb3(I% zTxS}m5;NrMEtKoHW%+Z;zUN5OK9?TOpNQj@a8NTsM|87_Xue0aeEc7-bJfpCS8YD2 z(6sZW1Bzel`Rj+|C-`Z3nI%}%^0Y7=8hs%4d9goM8tS8YO+fjAY8@Hv@y~! zUM}EVHgl2*`JDzjeOjz2cY*pkbw=uRwFRvsWj9?03HCjE^_#nXpU_J z94G2E8-}im{S2{?T)aoWVHOV2lDlO;At<_3hJ#XrbEO^7pFVEzEy+WdMA%bu+%^EJ zlpHj>0l7$q4QrwK`U;k=vPflQG9IaWD@dl4F6ewLWzceAvQeBGU)OQdNX;x*rx z1(r8dW1MJ{#flz&+IqC?DG;8luCyj~$HbS9QCK05OJC7a{G__uj4=0j?HCK~5T#G! zc36H^a(T~966FoT@b!meb3rJ1L3t{pGy1QxdCV^%H6?}YXur5(Jg3LqQIpCubjVOe z@h7cTDUd?6iCne?Twt(5nkcGBYA-#`V@In>2Kh-Lsk*g-#eVCwZh|xJ^VA-~0f$6o zIM8n8D`$c|Cgaj~qBes}ctXriiY`%#R!0ZvbRt2BVey}$cHf^GchL^Fuc|Q72L~?& zTO4NPCEFn$Fu5g76i*qHz&;Z1q$N7Bv@v=?ahntj{-E`jPV;@JCc;FwpGq!%bRkjwwoBV{&K}H7Kp(~P09q#Fk2?Lc6vaM51<-vJ-GV3_&k-WTl7-bO-sZ2kN zZ@`#FU?_75pS-w?>oj|tBi!$tnQc}{$Fo{1;?l0?IZ5wjy+G_>y~tmM4k-dr0QO?q zGps359=@BjfQt26L(gRP+MMA$9ivdW~d7%P~Z&cz*I*5=G3qR{yTmnaL< zW}%% z1U7kq!9Rr%?oBU^sDC12E&Rmjd!S^>@e^%x~AkVby4C*-3&xW z`Zuj@vSD7mIyTe}4JuI{SBabC`8KZ^x1}*PR(|Y8V|7c&v^rk^ii)TvP(~9J`Xm@K z?N`~1qz}1AbRnTL3$JL19y^I{N`lQhhDwI4MduYI{L-vOAXD)GxsaK8j^c~y&+{y3 zA}mI3OuR+jiU|u=)9Z<=+_tc9($cLQ1z3(#wIK6I7D06+H!62Zm{V{HQA*A!B%%%S z|HArW#}i|T@kC0n1yz3w#7)b}WcFFb@)LMq<&M+_)Jf&4%!(o@|5Yx4^B^rifP`mx zC(&c%fdmHLnWhOUCBI=-xal)UoEKI_ybM84#hk?Rq{VV^nhW^_kD0}#9wj`@J4f%z zJ&z(XHSy!v7`8TO8Hvo@=(d>tNHE7r$OVC(GEN+u?I~NA?8FJ+x~8AzhT!+*2zep7 za}YRwT>R3aE&(ZUkZ2%mb$vv80(`Y9;p`LfOHHFoM1Jtcgjx}uPS|6!j_6N*r>Fa zviPj$ImWshu?l2_785Xp9Z)@W1t{@~B&*%5YtmiCyU}@xg%TR(Sn33G2R=1xAx1>{ zEf1G@n{pL-DaM@s1Vi?}%XTIPIGgjX&?M%2fk{rL=0(AmEEn-g)Wh7PjPjxzi0mRI z?n&XR%*7-#Y4v6|jmeD-?&FIa{45#Oud7R@G>jX`LdgcDiB*JOHF?X51+R4&_y_7SB zf0$5=U9z{KjZj+yJNjtmpiF7D4=7fJl*_XRn()5Eb*kwBjK|wj`uUQST>#Pm$g7z ziFlXSC0&+^MzP@Lh+9|{?Be^9bQU`Aq-Nwm42yE^IcTwzA3v)&m6w`)N*hjU%Mfd( zBZ#?Ss@|06$S7q-1PfE6SnRurv{-i5NlZT|m0EOiQLq#IAr`Luz`c^hsBj`#riYiU zDEN`%sJoR?jmXfBM)(w+P?z}%hzuposeqm&54ITL&XpR%Yl$U^2RK{F>O}v7?nb=7=Nje>hp#P-a_v&FIn5Om({69D&rayoK0xhUQEWoQ7`wQ)feb@$! zFZoFBLVP;)S;BWx7h^@pJDM#E;l*S=;GDE?h-9>=gb^+NQjp0$R~H zvQD@#JG>}V*cRV~HwOGd^v@7@>G_PV28Zm$oH-)787!cR3k8vx3!xI~!<;uzS<#Gw z{SYpzzpxLok6(w~EBPMWL0TcX>Uo4#Ejef}VaH0gn6>jCNZR;~=??1E)Tr!06$MSt z4^WD-tWik{T3kP-QBDcoK(v(cJxi$a%MZ~2QZB(m+XA|<2`#cd;PsMcmLRO$`gV|~RDp;zA@v44O{q*Q)4n$bW|W@#^MZc-gqpbg1|0V zN%~3}PX(xG%J(P|{Rcf|gOIhKdBV+@yO~pDBNiOvdzr+6*8oR`VUacUJ}w#ikj~6E zB9t+wrF4*kS=z`?Gy{(H1{^G5v^T7$aF{+8n!NNtgJY*gX zC+2%5ewSq*2|v~e}i5tARnJjjLWoo7}&P;>+_pxu*m z8NEVNlvILMscu9#5Y3eczxUK>@~^H~#ur(uO$m3UG~9H#a2hm4Ixjetw7f7{s7!On zA_J%_M1nOKmNyjIC9+4E`#6d1F_)auB+#BmKX(@>#2znq7e z)%^SdX}nAT7rKW!>T_y;c)tQ?5*Il|ix|`hOPu6S=4Hbd`fdRogg|C*K^xMZZHTJP z*vZir)x{m=e!%sGbn|J%%iaybKdFvRPQY2lsHH}s3d0q=LwZp+7l+u{V{aIivrrRV|HAKsqljei9X zraF=UI5+w>WFE2?pkbE~LfomlDiWvI;eY+@m|&k_-*Rv zq-9d0ESrc6(wIEyh8!scwZMIbv=MXJE>3!bc-0Ig3!%HOza%@zIf4uk6EY^UW{7X) zBqT;iqVvB;ct{3N2mL=lb1+-oE&)FZ?B{=%aC3=_dh z?6vrA;S54S*c;&k(l`Ivz-j8JTPk>-QEnG6dde;}Um#h>+iAECiV{A~r*RTUhUxL# zI?AlL`@DVB_hI4uoAhUX-NIR{X>Kclc#el%Fxbh1%$>wb1kVhK5(*fR*UfyuIFVM% zCb5j;A~>hnl+gV=W6nOmAijua<@!zVfxp=<41k2;<|!g8;Hlvz@q|c_`+L) z8Eua|Hg+Cki>xK&42vf{=UdD1lbX7g^Q_@gJAZ)*yvV!}2!@J{TtvapqD+m}ClQ(W zMN^duMXk|H%iJA!OP!x*>qS@fA}=_(C|_arSv4zzi4TmgDz?(o)*%#0oNF0$<p>0uS=fkZB~!4$UIeh0d+Zk7yc`W~H;Cw#CLkQLiq>e<9i*VF>j_5M*$~06wATl->mI%eJDM7r) zKvwJufg^A^^pem5wD(sC?}D&LAW#gxbMyg|MLg>nqDA6`rc}|CxW?d~_@`t>@?CxM z&>-d{?-RT-v`t_L?eV7zUP753X~IrujUx^KBwws8!9vLsQ;kR@Ic+#cTqhZfzgqb- zp))eNLY8tqh+e)VbG?s$sXVvK<(}>ul4HBG*c4M~9;6Wv9~f4tKGCUbS1Y%09><+5 z`7iketBUYt$#J<;ATVl70~m z#9UY9V^4>rs1_59H*8i4Nh^#nQ#V ziJ5SNdOF4PpzISf-b zW<>9jo@G{s#>$LXH~k;TrgKd6*I2Y%p>r(!k@wCv8zu{2i$9^yz)IsBXfN2bzDr+! z@O5-7JdN)d@))k=zw@t#hXk#j>2SU<)7c(+2DsRUL0ZtrVh!XXT45}MR*Sx_zYf`n zw?uuH0HW@YS}0j`!#@pb7CrKGhDf5X&Suaev6F48&lxvJS|n=?ewQ4S zq)wYQHTD02jA1!X(|=~nG@dr?xBvR~KR*9|zki!PW9IL(X3zO!?!5U67A{)6((0>8X23InweWzT3OrJ+SxleIyt+zy19FJdU^Z!`uT4N2n-4i2@MO6 zh>VJkiH(a-NK8sjNli=7$jr*l$<50zKp;_t=pqajhbIt8WD1o=XE0f84wuIl2mw$e zmO!vnCRZp`YJH_~T}f$Kc|~PabxmzueM4hYb4zPmdq-zicTaC$|G>t|V^&0Dwc+`aeL z{Ra;pJ$~}^+4C1KU%h_w_TBprA3uHm^7Y&IA3uLhP5bS)>CZsdr@c8oeiaQ}nLrZPELpH|D>b`Ij~S_U8XF=U?9F zt%;3GNK8&i%gD^m%}1cn7#x92rL#DEKmbJXLoPkz~H8#;n6X@ z$bV`4e(_7eqTjuL|3R<& z)924$zI@ed|Ni60&!4~k(LdGW?K0I79{sZ|4)L}tmhq@JR(8EFvF1!qR?m@c^vJ

    ehX|yq%N1(B2*0%EMbbl_$p9lxK&URhKt5Xm54bRzLc?_=m&4J^aUqe|-3#bLpS) z@XvVoXFUAx_sc)?;-7i(zvo3*^w0KK#Jlbo`lJ46>GgrQ>NEZ6-ADV7!-oclo=KD2{0zGIBHdvr5&Xp^2pZ?F7JTbJxY zUAz2BX{+k`-^ITMjq>(x z+6*1;A5a|c=#!mp?3SIY>Xcn9ZdYFUyO`?r)ISS7hwg~{_XDAnrz1h)JKHvtT%HVW zJaZtrYya-tp{*07?VCq<`}*}9x;M&?we-tQ*7Zuyly}R{YdRGd|1PHb^c=eM9D2g@ zKWqvnKijeayf^8qy?)rQ=J?^ruI;;WM~AkOclM9)_jPZE548`$>2gghe1m%7j^(DNT^x!@@;VLik^$F;m0dnO`TJQIAjC1+kt(jnNtJ(riR*p$`mygQ#l#D9&YDX0NRKv>siec4(zl*6&&Ms5Ku3=yHTF2hG zY#Mjui4kG!wuP+yOi)ej;jFI8y@bKCUF^})?cmnZ3F)@tt+E}OarrLQ7R97uOu1V& zrrPs&F*W4qGPT`0;K^x&fRm5bCQrOFCU@Vqlh&ROt0+5>(_DOr*sa~q9?)D6~^LCf*_2BHE!C|GStPwQ+fSc%AFHzn1yzdcQWk@0A6)?w-52TF)M;v}(Ac8t{q9pQC~4hg$~gJ6&Fps1IBK+?zC5BGETO9%cgzU^LTb@|pp)4gw& z1q}W)$Y^?FPbhu3fv>n7FNLlaXvG(aWrB0GD()FpE&DXLj(LjTz&Isrq@M&EX(z-@ z)Dx2Czl&?<=dC*Ua-sF`&lN$fQ)X%9AKZ(S&%&xTTHmj*WoVEFdvr?<^iYk1&Pb7I=Jw#1^>ok{EmT`7v|9XT~; z+lso5Hq(X=H3`S}H%YheX;AOpRju8>qpbMQw&K!bTU8Y&$7HoDC3%<6L;sLo@p zYA&)^5*O9^0v$WDgK*7Yh3F0&RF!D-dNV7{&?xlfs~5#{W)zXdN3OgchklW zbPKla>4GPBb|?>QYgQf`t5csDt}3~-v7+{Ro2Kbzqq_C#e~PJ2FPo{hu)wLd*wpV` zF@udYeD97feA^$zc)TeZx;-3Mc4;`R>GW`Z@3CRr(BV<$ z*nv@C+vKoh-_}9+*k=9uzq?&?si~>_dUbu%&GM?2YyTu(e{ zwp{y9@h=YT`e)G*mGYxE0{L!JIPK|ZxcKfwMDewq@wFHBWVD_=RM>TBFKyF~ZTyL0 zJ%|2H@*{11vQxF)^7AF_>MP2&ifhXDrmO!crn)?BrrJXTr#d5(fAohT-)|13J{u1K z|JoI*zPUfT^6K%_`s0UDUArbJ!=qby+XqG@`?~ZT8ufb+l|8bvnoi|KX-COrxTEGj z##HyeI0Q{~g(v?U2t|At3I0Fqy;oRUXV$RoNir#unIw~Bk{PGRiQ~kMdpBO_xXLTy|*r{ z^{lm?v@1;qu?yYSjJYxUyqQVwg6_U3Lv3qnt+_s@RS!96Drf^TGi@Xvs>iUP`U9ne zJ3}gg?o9uK{O+K)^Ll2nj<{F1Kjm86zMKn#2gvhB?U}>lUW%rk$TCYyYQ4Tbr%ekv zD4*y5&^y6{Y7k7Q{y;HsCW)o|sXq$PZ~qAt*&GB08&6QNKkPxHRpQN_J&5yD`_VJA z``I0%Uh2xOD2u*1twCFlZd28eyJWxR{{vhLeTZ4c9HE%lV?+~g{Eq@kpF3C|1f~NX zpk`0VqxPL~S4VfIo;hheq1>PIJohhW z4pJ=aA)+~N_>Te%+kOJYkb}j_4K!{KxZS@c;>^+QG2`bp;aX<)3rs`4I!#wxxx6K_ zUfO_d5!X^Wgf;Z8JSg|itzz~vDp`G$3U)uKg8PR7#;rer(tZ0u^$zDp9UHwbOm6Ty zbaoxA>&!-C`Lwl2I~1Ukbtjm`ZHO9S6Rt78fzraOqqlKt86B(|W*5CWw;P%??cEyViVRzf|e?T-S?8-D`T8+P9BS#5Rl_*Z)duB-~Ez4}vn z@f@`H>#%b^e>_aV8A{gW_GOncdaz~GE`o*JNva@rP^$3l)M{)Sy#~|DtVOr7{xG0? z{ZG$Yzu$Ondgaf<7e71L@L*N2{_6U4=@}~`_m~HVIUOOQji<=TBUy!np&T7{5L=8M zfbIzGC+d-XNbOn($m_ucnbM?P6KzWC9)=KG)78-86KBD=mVg}Vst zZ9nZpCCx^&aYs`4Inx=U?5S)CVlt;7V*(?CkK^R&;{-+87)hBrMp31V(*7{_y8rc` zLvw%sciWwhHkE-d9TZR21@UfMCD1OrWa1YCG3faiQr77t8vH~$GxazkH+dGxPCAyu zN&E%Fg&oD^B^)L2;*XGcaYxAfKg#s<-`?rG_>V8j!N0bs!796au*H}Dz$Sul%L|6O z8k!Ej6rGiPApr$jOhv~pz_D@X5crt0NPP5}976ONG%@N7mKZsYCq>K?$$u1}-SrJn z*>495FK3|k4tk*RO}wTJ$T_D8Wgb^0NT%g*-Ix?>9uU)N`$U|E9zMUNiz967U`jgM zsj}`iva+|8pzUkHl@2swECcm_fIl66i`$=Z_%jaw^ZWVpefVGYKB%AnPI@|nLa(3) zT5s4joqx`PE|_tmFjhRROw*3Za>@oIE!b|+StOL7KWt1iJ};4Bx*BZM_2}makV{KT62er)zvKJ4%7>Tqm_BG$uhQX zT1zXQQPN5$6g0#5a`}G_E>57>Ip9}=OWaMfE8=3g2Wh_2H*dBgR5o26uNybRO+#hq zs$K)Ru|r4es#oR?R7m(^MuB9qh*vnR$Ss`E}&>TMeEvMqA&f z6*f`#>TOf5x7njE^|=twP5RPi567{FMw1mSJ#bS+3#zWT4%e=zB=-uTyqg2%+%$u5 zl2odiA{FZ=NxIUB9dMDf5;|wcFqPdbkdpLI29l%8;mQ` zcEK(37Id8$auC$gJD#8Y)1h;Jas{U!U!FgJDKCIfEE`a=^;@7jxC5B>+JM?!ZjU;4 z_+1{`9dzQ~>sjUI(UD(21!PEgMj}`CCxBdn>5fviCvP zdh3Ph4F@L|*86qd*pgg%W)DGo)HzQ&5hBkYO4M?KNm zapUGqg-dfslhusDtRiY3rj*!=H{rTTW=t2wg6^c1qdFLs*&Vr6h<0u@qCIcf0Mq(! zKKkL7;a!cK>tDK7; ztq+%6w@PI%I-#j^{uILT*j&uf6drOWLx7mh7H3SMCFzscg479uEMcI1Igb~JFn6ZgbDFzaDJ zMC#qJ*o52B2{AWeNm18RQX;OwQ^T%hriWh5P7k??f(KtgX9QotWCUHtWd>d*J_oT} zfNJMTps@cL6nGs33LlSWD!=f1s^FBX$_UJYJb`&qnjt(Q!m1|oX(gjPu6c-^U)9eP z*Y(n5jXh*#b2qWDr5j(=+J!A`>q48`J9BE9F=%2>upS(@;OBu6zRB$tfxbIpT!{ObN(QGGAH zps9zVXn}qoTD$N??VVUdM<=?xqcx|xqXAvBEWrPDka;}sS91p%@6h{Nzocv0;G9KG zB>j{sNpMt#P)G=l`NgR~(X|i$^6C^Pn)d zx{t?i=*bnebkbz)?Id+q8@{-w6?5PP+8l6EAK18SM)B2-yM{$&(HqdK#5!M!%~lgn?|3ki^c%bydj)>yaXnl(Pd~R z)EL9CoLbp0Vb^!_`K|4_lI|9|y03v!GFU|_8#3U_hKdN5ehsmrcRBp-pmNy{bk43o z?-KOL=pKKo%nNbJ>_qw>%71=VNB$xLs2T-_uB~U@nLgl<4I(K&krOrW*Os;XaEFKw`D}3?iDuY<3 zDxw8P%aav{%Cd__^>}l)np)Q^&26g@N+(numl;OaF2Jn&)wV910yF2M4I{-Z9{Dxf7KCyQW{Q z^F*I-@~56@4d)zhO%jgRAqubNGcWcFh0fj&|EamG)1zh78pItV@ zV_OE6!~b(|*aw~cJA*QN|0flW(RXWIQm;38pf0xi5zlmoGmdtp@P-;Q)a@0hvKj-v z&ZMKX>Qt<5g^WKSmP&{7#hOvB&^X5CmkqOdmVxE)yMxAlAJE%51GAmqlPdeD`wdRX zH`}1vSdSleZYYX$s5dFEuLZ7ds>wE$o3XXUdQz)W$LJQT^ZI#8@eo_C9%0ChqYQD` zFhgh_SPnq5Z6#1!uZQ|__5y>g6DYUweO7B5@u0;4cB9)3zBufkeR3ieGd7Tz*U^!t zs%gkF8miDWg=RvFq?FdhE9UgEbizTJMlnp%7)Hp7vLTAhJg^*q_P-7~+r7YO?FcFl z`hW)Oum>IXaaRZ3lFv;CrXHP%!VHciaGQHlm6oi^UEn6tg_r*iZOox zZ;%gR4EkYgk$DJTXc<@zzdICJTY<6z4xnbA*VC2*L3es>BQH+6$IP7!N|-zriSC+= zXV(lRD@uFdrOGyBrJxDhm|IVVy55;x_$pp+PKC4|S*{z%wpa#GrRDw00cf|b1ce9J zgOUS#fMu^8sNduMpmU$!)lu7^GsiuG56^j}3>*)}H_b$I%oB++%|J$pqzhHfYbDe% z8>!9YI%YerhTDm*7Ih=5ls#Ehr9GKd<;w!-wmf&(01SI~gR0#&Pg{06-yGQEaqftn z`;jyD5kqHPGFnb~lPyO=_?oeJrE~yZ!tcs4v)hQ(v}S5Osgc=)Yv8n^>+;)Bbp`Fn zI$e8Kt-fP9fFj62yKg-x+qDDKZQFmpYn$zb$z688%vpI3UpN@sx@eEIoOGvaW`c#1 zu>=(liUYd`T}JODRFI%20p-6(2CG%6sp%K{W{UI~hKuLBiZw?1px zwENn~#y!VRZQeg}VW(f))xC-3i?(RZ33sM=CRCU=22-+zGIg{*bSbF^Uxw>~Hl%gX zD^cyNs;o9nO=fF;Exc7ym)@dS7NBI~$`}18bsIL`?fq%f>0@hu9y`C@uKo75;PUHx zQZx&;SkVbDCg*T?K4UUTMj1t@pZ6eO25=>)KGO5vgBN`V8Qr<&^e*Uz$Sz@dN~dI5 z0R4uQp#10WpS1n3W^wA9?c%(M$WNh4&q1#4?dGENSj8BQV!w7 zNmE2=;v}U2Hc6MoPq5^1<6K4TIB!{iitj$T*Zt|ohv(k=pzFrJS64k_QSUelZ;#yMVw%pF`rJ&Y|&^pJMR&S_xRQhYfZ*l7XE!=81Q3=7XTf~$x44}&D`m?L* z`?H$r+fhyRP0IijK)(;q9i;9%fWpHT>J{^PR_GUgzc4uIx+)TRL6Jb2mu7O03$c>J zJeqon%_$yd2uvdsY55RASv7#w*7l=I>-$iahF)ZCV+X3Su`#EisRq4tfDCddfE)_k zpdJV}8=&#@1X`c42im~I8=A1}i|Sa?8ATfBgcKz{A|R=zc)7)6xqQSGydKS+el#bhg&fy@`f8YvC&H;}t?$P%uy;H7N1tKq2hZEf6gPAR9EoG`Zis$on;3N!RNF5Q~lBxKj;D)XB=s{QlA$H56n+jf`GphI%^nJW;ELqv%lP7In*5 zmL3tkvPa0M?i4U;+m_024%VxoUec}5K8ypv2<89f4t~$7og?nmd%~_X`@_$*hofd& zlSrdA@cb?lQr%F5GnA_+mHL9*hC)$(tDGo8P_@|udjm(_sNF)u^r}10Mvs6{37=uAe zs?;bM^|At9t4Jd0;0e{;Y@wltm0!`r;#GCA@@m?b%I^+^wp*b62m62-%Kxivy?=@DUa01@a228srcBvID=6uq zNGp4&qN*;Mpr&J~{N`|QHPBdZfp!n<1?5owUuW&{pw%JZTCZE!!dPJB?096#aDP0Z ztvxxfsy;)ZuSDuqWw>%tDW#5El-tbE=C_jz<(>G#qHb(qWj9`4)kT!nbSxF1dj2dR z2kpVlz_{NERPD0^jR)QCcG>w{9(MOXdn7nu`cQas-*7Cxp*x9N)|xKY)Mu4Qt1y;4 z3#o=-Vm1;DyjD!9v;(Erc4n29ccOIFofvgZ$5H`m$UzA?6z$&x%B*&SS}W_vt^1sA z4%oV#pZ0J&c{0F%>_}Kr$9N33Y7oXL=}s+>w;_u7O*v(ZIzkn(hF*`Z;x=VfN?J22 zG;L{>=Jt%T+O{nH()rEdz-mxpwGrw+*a;eT?Z4M;Wpio5*7o!%H;3tYpTPbTp|D1% z9%LGi&s7a2O9Xux8dew5Ky1gC=d{wQ5iOj$^d@m*N~5|dsnOV+*wE0FT3O$`R6yb8 zkAZsM_rS1gJ*eKk?Qz@o-PeY9Tg@N0-93HQ!FBMgTX^$n-!#i?2u^c2nkAlsiP&T5 zO3EOr2-iz6BD-jo%no*CdYhm+6`E2>Zq?N$wU*Z;v^3N$6`+9}w7b89_F=38^_w@{ z>D|2L+~J)YXU#=VshRU7ie|&uxzhNs41A4X|%p!tKWKB@uU z!!o6H^UNt-VoOq|vOKXZ6STaDn@#7bLIh7mNJZ}4W?ZxqRYmUrs+&Fk?r)~4q zeF2tBb_s>DYHiKh43MYE^UxmoZQdRC-&u+!uq6! z_})TeT(5q~0NsXt^R|}T+ng-7tbz+~+9V1ux@NP^ z_z}q`qL|oUlJcJSAY>jwLH!4~f|PNRB591SgpKB^<45ueYVdZ)8bB| z=`kmuOIMDQS&_$Sxe>EWcGxivC-j$mZpcw_-ckWoU%q>7;JvrUPrUnf>z#kCw198c z7lL*Bg>L3X zdX|h2IYT1^pJ5S$=DDQ6c|Lim+-d*E8)K92{H1;I?|-*|Prob#-)#_r4SShjuM-hC zdFMRw4Te7qj!3*45gU6uE+Oh>QeyaxwB*q1nW-VykZDl;AwBRa4jyoY0QbK_&hWcT z%k;g>Lik+bWG$7kfwxz*AAbAw@_X;SuK_E+5`iCnW`V7{2*BC~1>Bs|fRASa2=a@5 z5*`@-I5s5Y5iBg|VM=(w19+tW1H^yp4>{33_pmYEck$0_53!zisBs>581YM`ee}&0 z6(`<&LjykeKm@+{f(3qBLjXHBBZ2MiRN!(j4tUu|fB>fu5at>PqTT&Kyr&OH^700$ z-kt#N;{h^#-9fgm8$kKF0kog%GpxVMGeUs#Q}U7lc|X4gxcj~UY}fTb=(7ii0~~=Q z)c2_L~ zV$RW-yb~mz_!v$sJAziIW{?`qAw)^hR7P3JWO}83BCVlxBDLKxlGbDBh4&cRGnNM6 zzi{{xn@;nw*?w6PoVJ+c~lr0e_Iuu zaYdGhT@)i|^L%{X2{uD~l$NKQCQ7uE7?pk;S!^6dn9U;@_2mN@9pxQa?G+8#?G;tX z_6iGXY5eXWaasqI&O4zQ2pgdDfa)N=!N2N);_hf8(yyxH(dT99)Oit_dxA$19c8kW z(`2D;5~nB~$tfxu%r=+zBI+tT5p7kC*=x@}1S0Tc%T6bGr}I-qjg z0kqF+5KuoziFe>*eL&3JlCae4x){{O!er7}S$6J8Awf99Wh%xPeBA)4z|f5=w6vp3 zE1Qv()s4uOno3klO&PkSMvrN!DZ;i^FNNP7WDe_q+TnTMJ(T~uxB`Qx{}ZEM)IB3~ zsj?v|Yq2Dea9V?49+u(rM+G!RADdg$K@%C9NQ&}0Tv1gO##~#DuCFsep3nVnh0gw=+}qCnNwu@b?RpRYOD+CkXWAoU4>u)b z4ppX-+YMQ~IxR+SR*;HHButZ9z^f?WNo#~0O+BA&Y~-?Qn>e|RjT}}}J%`m?w-kPN zP#uKk{ts*crh`_X($?=Rj)o*sZ6!RK38N9}Nd~r#X`yNO)kJkc9ag1l$WdAvQOd>!OhHpU zPSRYr6o3SZg9M6$cJB|sxMw4%vfA;qX}=XThjQTJsJr#)89(QV@sO~tfvB{)&IE$7 zIhC!h%an*Ka|$_Tf}Uoim*Gpfl{qDnnyg|?T}E+PJ-oQ79#Pm_hg3G#E(IWk915(y z1Vy`7L;VNqLGA9XkJ_zvLUSm)&K>dG@yl^<$D!$<(3a7N)bjo~oUSW5x1a?s%4*msQ+Ar8L8wx9&iAF)UIZ{}osa(P^LW~?lE8zWPO;TTOQGBmZAKj}mMD~^% zBYNwM5#2q;sFrTiQUG$u;dyUR*`}{S!}?V>`+xp+?%0;kr_SwO-*?gaK+~d=r}>O~ zMB!<_6yb?b6!TaNnQ$a27kM~?pMD4}PM#pi62@4{*pYm7^sr15Ijqx044WaEHK9Yj zMN0uFA%~KU{|1%oK6}{l!)FVJR)2cv?9X5KUD>gv@yb5CvP*Wpg%@0-g^S*4jB|lG z`1wd8@?;_dek_BNd?e?2?k|ix#1KbM=1C(b#Io=Sr95oBSP?p2t_T@#ToOPHIT+V| z3>v@t*Oiga|2Z@N{?t}?B(Rls7o1H5f_liu=CiQ zkVPUoXn}?aJjcQXoa5sC&k6{>X9|cuXH-i9RR8Ps!-MbqW#H)FUT=8t;rmAL)z?a} zc4I!+zLyRTIO2heSN2oiz|=>;ViJKkE+Kl=8H#uM-S6`BM2Pzk>MCLgR{PY1hp zoeSnPgzK@Rbx{r_cyhn=h zxJ!$5zsrnuyTgrhy^|lm6#9DJT+wjotrf;=@4l%7pMI1NzW<62HvNPHR$CFkeoqo` zJs1Og?88BjQ!og3^#?KTz5wRw4N{;#-|5~^{lUi_Abnk-+Jg(g`8hu$`a3j7WtM|IKswq1w4*J+IoSY`lQp11e~@1dpsaZZ zP;U%g=#U9P;VfL z4t>N=ioe6ogkPg$b1so+#6>KZeikKS&mk1NQ*f=|L~5z{cyhUPHnFbY7_41(EMZ7~ zC~-_aoIED)Nf}eLr;ff7K!+UYkOSNHJ0Nn|@S+YP^R)*BL7qSw8S+S&5O;@{o_;+y z2X%=`CN2=z^fPDy=XAD=e=fuA0nv7Qi7(cuNo;&bt zR)GR1Xcoj}7f^ZFKyx|nKouPLSP>b0PYO%9AxO`@%s~?t=~VhWk;^@WmI!9E)Y8Ln zy<#e@LOq_+s2NS})%Kb@dqoy2{J}odxk~026XxLJs-XtANbmXP|Q02{f+O zKq+p#6iM&d-YdeD4>9 zhTYIcCtOq|!p}&v(8u%fltZ};&Ilz>)Q^|QyU^;wHl$wHgeWhm&ur4yWcHR?vU*Dm z$llUoRG(g#^J)MKioT?h5uZv(~9EQa3Q@v*_z>rQEK$Q6BL?71RX>Pa;M zc}Rkx4ChfeJxsQ!jUrGq;^mqejIOv6WhylzYYir3m$3xZW7MI03~EfTL5b-teHAYp zav=xFfp4JR|FxjVb}Q6}0rlRy+CMDw_PAjV47gy9h@3YjCLY$ovq$AQ9p z>lr*nB}JkwBdAMCv89G$OqEHCZYxt`x=jjPw@HTYF-q}0hF1Y{p*TEuknaBm>NkJB zb8ic@gJI7zvy1J$3NP2IRRO*W)#0Hh%3<-7`n2qRbvC(8isRJhQ^gf*j?%~wYKtif zeIcpHs3cg*WP~QOl+a}s6T8Ypq;8{-)bmPSIItlH+5WGAX8(^s51qZ2+3kK(;dJnJ zwTIKCdVkL|jS&Gys^j8Dj44@NIs~avf#Fn&$zmg)r7U9ewJK(TzJR7RiYcZt0j1u| zr*xWm)UL8TYPSi(_$mM!iUS97knj5%==MRm_koSTVzcv6wd4Mq_3m~TTKrs2w}$&p z)y2m3Tapp2B^jjZ!W_0qMi3Q==n5r2Pb=k0^nzTqF^_35vzav(7PAwg%gkbQnOKZ& zmekFhd#eoMosI0yM z#e1OKd+*Pn`oNaEO?Epkce+}g9rQae(-Z7H*b)`fTAKikHKY;@21IV54kMJRNpik| zrO7SGFQJKLMxv;w3@0o%V}vahtf0$`=XaY3yl&&G09?p{zx@-S-u)%e?}l>kJ!_xT z?b~>(&1UO`e%I}%N4@t?3GzS+O+>vJ>vKRrePN8;=A9tEM7g`D1Yi z#&9woKafGs?n80XdI|Z7-3)PT7r!8~OQ8ts(knx{s+Gar-O7-*5mi{@i279k0py_B z@J~<%^&m8^`Pa3f)gK-|`Qv+I3){cwK7U|+!#T%&W%KT?g{S<3gvZ0;SVv>h35Sz% zkkfEt+7yZon5U-;pp?kihY)nDGb#dN{WTD|DvDLCgH$~YSskDHH3%bJVNPC1>5 zi9el9j5>vv2gHP~TfwMBU|1llM_m~Buk?V78nB#Nw=qmvQ-@OA&tN-?_>GOA> z9*DnAocr|cuIsBmt-HB-jq&=PovLd#_JXS}-i)i>p|~qSv4|_-$w`-E;c*vJvZKyt z<%BQbupwuu_@H?X(SJ@z_B*Yh`kXGJdY>wzd7Wy2=zJwW^Ua%}>a)LI>i@^v|g4zY7Z-4rTy%$0XqE9s`2C!=FX^2S14q40xOp?DuO%sLvy0 zxYt8WgvSG7r2BnZl+mEPV z{TFDk^ZPWg@8@`6w>bj1?g#?jd;CD)J}(dkEjEs}b_MY^&LGJasy{#rmon|4+Jl1~ zz&P3ff}=H{I2{BG=uasJ8dT=LlFEVCRumn5ZH4r~-(KT@Pu`+{@7_m&pFd3jTfU3| zyT1zs)@%HM(>gEUvB4eqZE^v@TO2|7HhU1g-4?{}umOoX4}z4P2LOE6et_7$51@AM z1=u}SfB-cBQXv@7AmGaZ+26ehklQ{0D4Wj#$@vFB_uc{+LHhtb(h-nh-T;>#`~-!L zet@9D?xu6oZl#DaZzL(QuO(=6uEyyxS7OU>m!m83m!j$k7b9DV7bAK}7a~T<=fjVX z=Oa&$e~CU#J{&XmQvP)KEj|DVJqr}{EYQ}UL$A^gfa$dvumbl2W`rZ4#(M!mTJU2u zD*8b-CGlSCNQ2f&Q=0+f{C$GEKM2RX#V zJ6WvsoACUsYbgaeSCR^`ml8?{7vsxF7hNE zRc`|H)(-$_|7U=0zZ&F2wV3C9Qi5PRz>o9*?1Z2vw6v%Pgq+0NXbSvB76*9+F2Y_) zRT3{G6;T!wOtgi#D#p3kX4cv0-rTd%Q|x20$Ji6`$Jm1jzi_%>N4Xt|FXx2=26CY8 z`wZmSeGf#g8(#FJhy!eaFw7nB;sT$rlA|6_vJ!9Oi16zu7V-*0fV-G3C!J5$(H3AP z=Gpk_+%vJQocWkR?#YD2i(?w0+!WEAhh`o$Q*wL3Rh@0 z$NM0V2Rc6~i158HiVwS)mll78iAq0DA|uaWIrx*=BI++0D&}-r31>3d!W&Dh7YxI? zgoCh2QD^cYQB%s4s3vtvT%JB9GN&JUDF6pK;2{V0p8o*x!Bx->C8!s}WgE1gWIs^* zJ3dr~dEJu5gj^9N$1db$r=DgIvX7CNxI-8|bqraMJD91->q|EXx>Bn|9VxAn*5n~+ zL+YfoDt$s~hEGaMGbSba%$M`K19$g-K*51;f$Dh=g40%LF30Mz&e!g)HrV~DIx28c zo)~pngh)EV#b!;=srVrxht`7;u{)4TehZ>l)R1A8)}}Yes?&R9_p2f*6-+ zvnC2OuK?hoI1sn|6YzHZ8_1vM|298Dvmnr2l&-rU>b-4lmIS(7EROI!qk)C}B7?&w z1UXp)Y!beU#-g_n`J8%eL4Gw#BQDQ26qF&V~QRA|g z0SMo|2}qki0Qo!r4U|y+ueDwS?Sa_%tkh-aU8CoLtEK>l1yh*kNqv0qlsXMID9J{2 z^6>cPTpGQW%H@_5MFJB}DJemh$aSc4Wg)6rr9us<5H!{!@RZoAIc`R+ee8{#x&i4GblNs4b%XJpnDpl}uefnLgGaJ8&F zfr2iPN-2d3A<3ZT6Ke|dNZo~8(r_V%G@@pcMwPGPg#&%#`#=Ka-WurKTfg@^U_SWM zy(+s6SLlb*wSSakMfju-gcWZ`7q{Rw}b`1__R)6;L^fJdQxj7D@R`rGi7( zt68)v4TIL9p+nb|(rLqLI&DNnr;RFK1|UIkU~YIH6hOJRb~kkHZT0Q5^8G*DskL2q zsl{dEnNII*GcAD*gS8O>ZI<}B8ht8MjYME|3JgslC3D0=mVn34m*#Ti3L3jeO=g#C zi0oDkfjv}6%pF#fSR*PDYgG9%02y+~UH?9iZ~GT0+W8qU?f&|4)!y%JHd(JZ-|76* zsa}t*6Yc)?Jq_Xhja6~6<)&nKNl6AqrOBa+l>|1gfRWFV@FWzWR8Gj(LY+(I!W@2+ z29-Zth~|x`G2Brl_7%Kv;6S}W%B>$m{RjU6mYrWdtlRy~we|z676zPF{W9pfWw_hV zuDvDHucj_0##oVI`XpAzV73&J>TR zvV^0`Y{97FWdI5khrD(F0P3wDfzqwe+55K7?l$fC{8I0}&*#VOzB)AOvbnF{*QTj6 z#HYMDDypasmMX7IL*<*ZNGt<}NiHGhVT*Ew*;;`VUZ|9(s!Nqg>RMG|VXrD#HL6UL zjVaO##^f&pP@!BeA3F2aLcdd{&Hs8@yXn)L9a}$L9NqJ;an-vkiV;ZJT!o{+UQk;lKd`-n7tnE-7t}Dr53ZZZ ze;I%UIjDYk8O zF=nD7aZ?G2SrcjS)bVU&!YB?KJxU{lkK|E;M+#^GBgG8AkxGW|Xa~b*>$4}``^)h8uit9FxZ&fP3%kEHEZS^Ro^#%pf5y|9 zF&E&AI~^XDbt*143&g@<$AF!+vQ?C z()mI!()r>kq|3s5_R9e3PyPa`Km5x~?>j375B+UL>+OHOQF-T^_lxhW|3ZFe*H64V z*4t@!oDX8}cspg@3Ght36Y3vxCnh-TPGWe_9e9-A9aN0>9b&x4ZDxYoO@5-w4Oz1D z4Q-0kbyKS2_1aX2Yh7vf*9Kn-FucEFrUmMO7<_9*%h7jMl>hqCn?=8V`(eSa8@|eW zyn8M6iOn|5Q`dbNPrdA5PyJn@o`raXK8yAVcn0(LeVQKV^*B4&{a0M5>mzEI^Fwxo z;{!pY!+m*_{e5k;-91B$?cM5Fo4fTdqNL;L-fo&`3knUwPw!OZ>H2T^K)rr?v2*C%h<$^EXCV`(m$OfDKoeXw< z6%F=%7Xoa5^aalAJb~8+R}iq-350I32a(%sLELt0khtR@NZSe39(L`8Y7bTbw`UKa zSnUGLy*mMS?+ze>77oYH~|4Ub3|BWMXTV)5lzqbYfKO6udYxaSNAFV*lPrE_F+Fc-d-42kxZac{Oc^k-C zzZKxuZw8bNn*eL$Ccu6vAnCg|K;q`Vfs}n81El?z0PFT6)Yh^EV8ZqSWV}7dNOJ?J zXkU;-3w{=#2dyTNL_dmB#XXEDf;|W~CfyG$PresYn{qd}IqgnRciNr6;q=>qhv7E@ z=i!%v7U1WC&%;lKU3@A3I~-O)aoF@XkOsv8W%nh(xvc>N-z@+aY6Z}-b^wv;4$^b{ zKr$uxDU1{GI8Ge%C|Vi+AX1ljKirUfFRVP}ZfI@Vo#2-A+dKT4Z?FT#*|C%ioUc4#g9c1TOct>E6wn?d7<>p`aw7lY4c zoe5n;%!Z#wOhsJCdKn-Ea!7_85PLobcDk_D)_CK62y%tOZN4MI@GnW4)oQ~QOw2AlbF*HXE4)I=ddHuXK{ToXYn0z zXZ{ag=NZ*h_P*^y$A-N(Y@jGbdXrv44J09?07)Prp#?(kz4zXG?^OjA#oi0nv5lx$ z5IZ&$@p<>mf35e!Z_qXC8fVRy{p6fPxUX~Xov5RK;NKbeN(&%DV>u-2ZGcSVL2|4W zAcv?4Su{gPWm!Rj2k}Rwfc`Bw()E)dh4;ZH*XONAnPAerE@;B7E%XJeKl~ZAWn4|Wa_Mg0^_YvVznEc0mxMh?PA8Y<0Y(Y_qT>1u-JhboG} zt8#|L(;_b6evY5h^^{Qei*a$jXCl*rkB8(&9tkXs9Te0i?)Pm=>GK&#>-HW=@ASTw z(aQfTqtW+ndad8x^eVxkg_+?g(f~YY}v3H2EFQYVf;| zRV%oiRT*$6vpn!lMoHk^^gjUsXohe()IUJEw-|NSs?g`N&Uo98Mx5NoX4}0JEhNn+ zElvivYg}zFm3q-n<@vi0XN3D6Oo|WbjZ2H_h{}s^4l7Hp52;J93F^qI3_O@!9(X3F zH0V}#QSj}of{@!;`5|{Q{=~mCMDCggS!jk*&6QBIZ#^^`Zu!<~Ci}L-TK;(#LFHjP zRe!XBWplRDgLbUQk8?05#J49sCb%srIjS)(JH95mIHe+@I-?}4HM=l$ASXZcL~d@_ z&D@;uTRGX`x3e-M?qvQ62;3|R(Wonyt$=c`<_f6OT?;Mxn?HA$Y@h70-1T(8Ug1u+ zqu!+!hRumuF6~empVLCd41fi;usI zxS1Ote=8?G?snFnfFN`o;$@Wq`B&Qr_Q=}1Ns}EA2Qo~|A^J@EBi^h zC%c`jhFaVxJ$0UL&6NV5nv#&9(t_y7{M_XD%&hE`l=R|^gw)#Xn3Rs(sFb0+h?EO? zktsKGBa?6CL?zwM`V;@okhnt<3gs7~?*9^KRbTP7M|1V-!F_92p=r0X{saVX5`pCH5@;U=%HGP0p+jZq)PUOZiDB*KkIv|?9zB8Eb^3_C z)}aGbTz4E)5#>Muxc3Jr~|kBJViNl6H-%1QODD9i9JZ_M&2@6F~`97o*F z_9(rb-^nl!g$&uaj9$!`#zkXF~<(UhnI}V+;Rqq}l zn>HMACX^j?bI$Ge;HLEW`Nnnz2ZyyqMFzAa#_^l7k~|tqQaKIv>23|(nXHCmnasu; z8E&wh5A+Y{j|lbYO^D+4WW>337bP;fYLlJ2x>D#}!zr}x z(G;il`zeg3KLgPycNe11Mw5)#Y@f8~n-l9q#%`__8hyA!^z=QoB}2Cjw{(tL?XA5` z&@Z|`waz%hbc{dc&I~{9%?&sf;Nv|U9>6^kAL4pAJ;LQsVKi;1CYCbP8Am>JB+lXR zwK(#@NAZ;2zvC%AzXuXm3q#okVQAkh^5xJ<;YU{%VxwcrurrTk#Ey)qOZEP3u(9!; zg?#C4d!4N76kNhJ7ed5k4lUr47t8yCKiB4ao^cG;2xXg{g)Tb{f<9_bvumhbSs`qxs~DRaI46NaI2PYcdN_S z_V%!!&7DzSo7)q9wwGV|{|U%nDg-S{vAc)nVVBT33@2~T#|9@?iF8iNF06m0x~6nO zZ+q5DbESkAw%Xy(NydU_G)s?XEIal%&w(-SN2QI2I+4d?8N~54SG(~7w#~CDj@7d^ zuI2NCT#M&txwsc&T+7FAxRztT2Pzj~5BkKhi$^7}k&6=8f&25Z_V;VWYCr5;T>L?8 zedY&!nfMRpd&551YWck<8SviI%vkT4R?hFac*=V|f%HDe!Tv*(qwR+zs?~=aI__hc zli9}x7n7+ThVj%9hS8_XOv6tv5fi`VQHwBk=^*MroDjqMuSsAnlZ%9^zHN{woZh`G zeOi4(+;;<+&>uKO-yb&W+#f_;<`0SieTHE|p5d4iX1p!%KmDzëLe#YTVXVdMC zXA201zbc9PznV$=0R5o(HB8q1bp>(tw_NQN#*Q8q#rn>PVoi4>u!`9w!g(M)FBRmM z#iGm|4Eo!Az*LR{R!S~luSNk!O(Hn!;K5A~?ZGfW{Rcxc2r@Q?Nc1J(iDvsC(;OAm zaXRR5a&2g`)PgQ6O*r&NjvWxjdXEcZjaNmml1E}#4y+JPhHa8jps*wW)Yf={?j~0- zLJk7A(*bOegCHRXLEDQ1hKeb$)r`PXT_5~3P)OI(fkZO5Y})W!;0@Ic z?Iz$LV+b_afBb_Eu#tc8*sTftf8B#Bge#~(+`ryI338Mapm?u5I+W;7G@eB3U&FA} zF${}^g+f8FOxP3Fh`7QQ2^#ELK!m-EZ9sDw4)j(UgUM=ruw1JP_;p%9T(1F?4Qk-D zQ3Y6=_5x?K5_n20fSL_&0>1uAw7qCE1ljF`K+0b5V`+nzry+2I zao`qb1I!EpI2Sqqt&#$i1{yfFJN+W}y3CRW8MDL@=1<}U)(^rR*J=A_uAc~#u5U^2 zTwjpiyFR9TaD71e==M9n70uwRC=OipMIg{y2_eWyh1%|f5b|CKWM~7QX9%7FIB<`) z1=lnpF!IUZR6zq;z0)sBo6D?YFY~9vAZvzn+;xU{(d|3&KKl#d1^YeeHG6{mj{Su4 zo_&w{fpe4g!TooDD{^8kXa-NUMG&aF62eW853-g;l`AC(anb^R4yqpc;(!-{wq+#~ zftgDI=Q8>)dY#KGwUzmk((5`y9&-EPaDx4vbcOSc_{e>V^wNEj{M!8~^_}~D+B@!b z`g`7Grw_jc+|UdR1#$3ES%~!F|GAi0OBskEC_yA$6GGe!!Ot59-XVD4#*@G;i#p3J zcKYdD!}vjOah;}iv%gUex_@yz&izcj#QW&*H}5U^mB%>s4evhvt;eX-JI@Qw@4U{q z{2pK<7sQem2S4S75P|YVoWTZ2wAg`skOIU}H6W5@0KpzO@C(4tc*i(Q^U`Qv+4(M? zSQV^~j0W}x=MMLG^a0)*+OX#&^_V(&Q`fKlN&ToCryS(9_WW3=YW4`hI z9l${|uy>0?fYL(bAeKWC>ZGLMwnG|T9+Dl^A>KvrXQaE?bg-Z8XF-I+2cIO`TaRqU zB&Wn}!nMZz1+&Ft+@;6siSr@v$4;kx9y#6Q-*bM!zv}YZ_bg*lFv6M?9Cm%>f6(ps zfIFIjy9@P`6c#|d8uB1|>mbW~E9&O#hD-;QpQ%o|UlZ7-A0xeO-h_mZCj8@Q&-m%g zC!PiDzuhZ%_t_0zcU(L9H<<_gMj0c5tBfoDml#h2PO~Ngj=D|+4zgbc_H$ka_PGBZ z;36O8DJury|Kxwv#mL&X7V=D`A>U@_Y#wp%*KC^hhcuSaWP*p)^XNe0nmQ?P3s%vUjG+Vkp#Eh0f*NFk4}V)aTtC+U?a7(dl_4vfbliRIBHcs0Qz+ku^R~BP;n&Bg*;X zzXd$Dh=5?5*uVSsa+J}&d*n(hjn;jyHk1BPV=X&TLr{2JMN_|1#4@>(?TJ5|>Q5bs z4|hEj70=rrmd@`D&JXAeC=Y4%s}FDT?TD)9AB?W`ITKUOe-KmQ`#8GP?@4sA;7L@W z;A!OV_;-dt^qxrCDG7Od7eTr9GPD@jL+x4K%QSDke!}weQ4?mLW zM;%HDVfDwy@VcUt`EB9Z0gWNWp|wHP5mf=L(dGUFu_c1zamD^O;|c=CV)FtY$K(b+ zj?M{u67@U46J3X3p=tK#} z_HdpLr9UHx)tMa0Yl%gNXhe=tsT?grba-CL;mG$Wzv6EdJ4`z5IQf{^}Q9rt2PdTW-JBN>n*p=VW-a zf^Dx2B_u~YicgAo z6qgh+7Mm3DIOcbN7n&h_lPF}!h(oF3e5g}f^sPm6$(t_S<>UQEtN!XU-*%~kpnRf< zW-wIaYTaAr;n-T}$E?c<;Z|fs^NUlH1M-rxLo?%xBU58*(5}Mv__&zCgxHw#i7~N% zC&a`&ii?REi;ay&U%>Ypz#GjFxj_W7(6ioh1=M?2S@5}Cb}`D8KKFkg^_W|Rk6{Dt??0w2NJ>( z&m@K>{f+*76c?T_78@S_IOcbN&t?&bUXOAw`g~UHk@(fDH19+A-UZJOsx5dhthMy& zpsDn!e!S99r=wm^3&XOxj^j{W?b~3>~{eD6~+M~F@l(E>LgM+avL|e~-k|;k^>K zkE<`aaKw1?v4b`W`+Lc{?H$e*be*SQ;aUDNYU!E652C$}5c) z^wb4L=q@@~ibC24zGa zostt7J*6gfdc<(!p~Kel-3LfIO}%vUicVKTL8}KnqsiATsXo{ZM* zwa}yUOR;NLHVdD=th8Y0yx!WbGq{}%C+yWrk5Y_s4l!(!4|2#c176Odeg16!o-hyZ zu2?>|GtJ+%JwJ%iRu$^h)*eo4I}}cBzZ_0!dlpV_d=~Cp_blvpK-5ZM$VK_TZjI2_ zz7^QL6AQ2#*A`&sZ)^}6zOFE@@2bw~=1XQfD$nCp@=lZWQ%^Ws#E!TTLWe!6g2R3c zuR|eh_rVws*1;4X=L30u)B~0NTQ&jLxkFN4UPzXuYQ2|@8v3|g0B z_lA&vIE^|8H|Ap__tpySzq?za?T*%pnwutD3rB4f)31Q%>aZ98Oev5l*yv+npHn!JoW{c;<~i`i5_J=#B61fV3s(K1gBr zdnB-V(}OW%_nCOV^s684^*>95%aj<3CpiLd?5>|Tdk*t|)wvV4*r3haAK) zWk$)&uQi9683ebeyLyyWH82l|A zLl{40ORK9F*Z!M@ZC@U=LwZLtKKZY!`pU=3EoHeh+$7A&sf z!Qv4faIfqDH)Z$J;;Y>^tM7Int$*NO+s@cc;^*f7&p=U?09PF;;F_!g51Y-vCCh>v zQxTY6NcV*3043f49I{M+SZoe&d_wp^dQAA?Fc(0P5do5t7&z~h0IsGK_#hwTYq1IV1X=KOQUs2>IjEp>2%MA5 zftF(lj-@u>P>WP$E8!QRhd66L=mH_Cm7 zY3gIZFyaECn*&S1Zt(4c4UfQJNA^HUQl+z3Hb*FLiGp8}~2j|<4Ul^CEUzulV zUzx}0Us=bT<_73!2IfvN@Y;zS#6MjO?d1?>ybj{6wn7Y14kG9(--FrOU;OxnAHBnH zZ+Qtelk80Um#iX(=Zq@KxN{Tj3BAkd5$&MML+WwHed<-_ZR%6zb?ST8McQZAlTM%A zhMhmN2VFj~4>INkoVJUAn+(d{DE~*GE=Ii8Qb;yj11T2LkYvAeI-aUD6~)wi8{%m& z;U8rFj2~n3#3PmPw|k!BJ-0I2ZDyVG4Mr>TnscA)6{jO^mz>VAFE~BmoN<258F8L+ zA7Xsu4lqA*dsrX2-L7*3&S(aAGzG_Gw?g6Qpxgf5}egv zM052XhYHLd1Vq@}<|hzGJu@knxrNRbIF-z^ZVhgySe={`%mdsJ#tGh0#;C_(<_nJl ztoNQhuJ1iN+}?Y(u-|($b3V)oxNH*v50txuQND=ZgE|Q6Qjnv!Vm8lo{pUREt#7l5 zaxcD9~LtU2MGOeX-@H zmqm6mkMk%Bf2FguuO@n$o{tW&IT;q=a4aZ+en^nPJiyOq_j{LfdpsLFJ9(YnZQO(W z7Vc@^ChmRTdfsc_YL7R*6`pVSWnOQ6N`J$@Gw@N?iai4ViIxeX6y5V2${Yp^cGwE08^V%<;|Hc==p9^52eC3Cpvqqz9k%9bN(cXD86`G4a zRO>B$S!=Rttk!bl&2qx-iv>=aC$rebM^b!j_Qwa3dZHug?cs^6=8z0dePDq{wSR?A zxnHASiEod8k?(Lof$x=oe7_d~xqh$wvjvlaEWvBP%-;ZRXl;L#H)B!u%|?E%L{W0O zT7ALWdL5}}O$N*EH=1p@T5T_Pri7+(G|$!OK&Gd4Pl`XOEk2Ci7!%8^iA?2Ggynjc zgp~Re1l9TFA~Tc~Fcg>(a3Lr?@M&OL;6y-b&@2Cxph*G3Z!XZI4-p7Nc`soT$~x#X zS}rgCxlVQ7t7eS_kK6Z2-D)vef1wV)>v#o4eW-|O)SJt*YRm8?Hlzg8suQA_WwA+| z!l*3I+=ya+W>}3NEwn8lIpjc4V#t}`gwU~|gwTmVbm~(;eCVX$cK{dNhY*x`lhCu? z0`xhp+Aa36QAzwoyNcw)ZjFW4+KtzpX|mlpT;r%VP{uIqD0H`M%=IBwX9UtpQzMuK zNeP_n_zchV*aCiXbfq9ZswFTcav&%w@>FnS)C0ssU{vHpKvd)_|ES1G!CZg`T00D7 z&Xn~idv6n&sof>|u62*-c#opkU;U~JE_E5MJ<)0{JJ{f$+Ee9h&{D>>s44U!l;!$U z3$nr(+3B(Dw3Jkj#H2i**o1OHWPD>_SX^IlNZj$@;J7=8mw_R169J*Iulz$}CIxc= zUTEzI^vpYboe-365uR?45t;0i6&c&VOXS7@<@x7&4b}{ITFDHw5LMdi>3a25Zn*L? z5BtI*KT39fh)ZftG&>#ZK@wbLD<{W zOxssg&oVEn=Has|_>QTiLC*0-k#121iM-IG)MJ1p${GFg`OT5QRw^;MaiQ>x+@3vn{R9D zwNt9?py-sgGE8$D-EGtAydC1J1L#qe;jECdICuY&bWeV90pGK*THs#T9>6Xb3UDjD zgm~f4$)6N(b0+<`*>eK{DElWZ7lOhS*w2QQ*ktcA?B213*p(COg-#xqml!&#y}bL7 z*_Orwc=_@^vQ~b#i*b4f+d84ulNi|~pawUDG6Z!oZr(L1JYIF47rUyG&#Y|qbE!Ni zaIU<7c;@F){@RaO{MwgQ^kz;V1ZDp;^z6TEDfX#V3j6E8eC*nZ`PkVrYp~(da$@}_ zHJ7%Gm~5&(Vzaw&$U!~hfRjODzpF)LkB41wmoM3`Bbe^h9>sKTO=i2cOc5wxE;G(Q( z$9eU|b!QCMmz=cPnKeRGNjgf?jXKOS4IbiJ`3?HmdmacRbM{Blnf-|jr@m}gN?#er zp|{DM*gL=_^qoaK;oA3nKzZ`_LYUBNFHw1k^zoU4nIv zZWC#^rn0c?ivF6Mi@5E{=j|1u&QdgkPc!s=Pr93Voba|{9}lo&j6{&=#}X-yN3-ap zqovOF!;K95@Bq{1=o!RghV}3Vmd)T7SDS-#18EDeCzT@DXuBA8=71P>1f7G>kItj) zxVs2zxF;=Kephi`?j79~DYwivN8iBj3LYgZ`(AU=;$30uyIuA&ak(5|LAw-gO}?0b zCtS=T;4hYvY%VsDtu78YT3$NiXmRN=+2XEUKRj)1_VAIl+5NZHCU?K%jqlFP$%AqX zyVxd-9T`9!grg!@6LJt0Xg^Bf#0sIzS35)#CsYce=#w5 zIc+=_R~j(vXtxm7J0yfPo)*T+Mo|ai$$TvB<2s?Zk2}Rer<51^P3bK6m@;1LI%T=p z>66_y@+U`G!Y7yA_)i>p>(Aaw7M}x@%|1t{8h=hyH~f;Vq4%XsYv0#KZJn>ZI@;ex zbhW0h>1uwPK)jigk#-F08^Ex}5ezH6D1>F+6~z*NNMVsP>xBbnWW>CGD$VEo)Lh~+ zYp{~?%X}^Im(4~zkfhOB6sQZq+F=S@86)u7X#hcT`ygsJI&VT=6Y><)p+Zp=nw3ng;F2;lL za!asSX%6_+#^A6P?E_h_2doV`;J!%<$#=B>LmKV<*s>Rrw<$v2c6q4Su?LmEc0-Sh z92}9|1;^&3`5;;!^)NE7VpueC5JBH1FfZgmT#*N%!zSb)WW;PiLEHk=Buzngfgu<# z(gU274%jZy1maRPpe$1Xm*snby#fWpm3zQ{l^lexmW6~hJ0N>4+O)oI8`Q4b3hnE+ zK-Zj4bYZQ0euN2 zFp*RMi+Ov%cHS-^&X)zs{2k!3U>mR(Yyt0un;~$~Mu?JH4=GY>A!jivO(QCn%mrA> zi2zPT4DhPrK+=!|nxPapS*}9I(QX7s=5`=?$$?#{BG@FVfMt$4n3rjRS)C4;w&{XN zuO1i=>Vwg7#03K|yln`E&k*m7!0;<#)(FtQzng&JXEQK*Z$39*vr7almH%-u;^3$* z2~PS_;9{`~oQWHO=CU0eJ>-BGqzHC#s$i3)0hT4&fUD63^Ok*J)~yew2MoaEs3Dk~ zH3H-7h^NM2{08v_@zeOH3HmQ{Fn*5%lh;US&BebnSSyJF33(8zs^q^uh-JPCn1qes z?6e(d+}%J9Py%9%D&W&K!KP3Ltg7~bWurdeIt>6fUL70n;(Wr0J~b zC({}9&o?tL`Cx(mk0qErw*vFI09$k&@Cu^;ItXXvV%(4q;+U-hj@?FJQMZFLdpFSh zkot;L17eC6*yZVhZMi8lmS=;*=k z`m9ro&I~n8|2sL~=o_ir@%SaH)Y>v@zL&(<$L@ot9SToR|7;)-38*@fIV_z6yyh42+uv@AW&ZjfdAPlmY^CoU zKlQJy2%S$ZNd_P2*~agv#b$3Dt8tSK&6X3SF6)=X12)fzBevtj%eGI5W43=2-{S8Q zzS&pb>|GOD6X7zgi*$i{kbI7M((x>H z)bS+s1!aW#iF$-KLp?yBq4he?(7K#wsB-~il&xG*=JY{?psW|8B#F+eT{N3)xZ+Ea z#rk*g_FG>@(d3?lvXvhO@U?I8LkvefW6dwSr&*t8=h>ZcEhU~});b(_X{8)?^LwXWm+#IUjPK5^%j^??jtp9+~z>+zT9r*cFrmZVj{n zR;N=hbI_%md79D5yvt}~y=JtqzA@_EzApBgnq9aa*sy| zt&>~h)W&ILG_(6zjqDNEdiJPmE&HWwHRp?Kx%(GZDLM?jm^U}zyjd7LQSJ^wbHr~! z&pLOBe$U%0@xDlN!HZ)3C4U#1ufCCOzx`q=UGZcBTWdJl+i)-}5Z50RY1{3eNNo4b zaBTJ|pf`9`xYT+!GOKvqZWX-4Xh+{AP6=+55nMs zGH*D_ok?h(Y*~>nMG9iC%hV*FRO&3eQ);~Gashtp=`4!E(G-^E!FUhD-e>`?BRtHu zB_x*E5SU7-5#-V>O_gVKG?=kmmpO2hO{wH<@|Fc^<|Fi2{ zpkG0R!FQD~M54@_x=9HA>LEN;wp;XN^MTf3<72pp}#1x1XElcZ!$d_kf!$c;}w%KgCJ% z|HMx8|Lisw;D+vl0OidXlzG!P2>mSFBK)pWR(QOAkI3CdWy#Am1}jdLTS*@-a@aGF z>#Wh4!O?F@@iwnb2(YP$jUX0B#XIJQr#oeb<}=fSE8J3on%olu`?&Fe$9b`Vw|TKa zZ@4kRQ|>WApEwBiTmT2)LHXW6d(cH(Hb=&Nt{SAE~z3 ze6WnTyStFC-jd^{Uz_1+R*@pGDM}0_?txKLoS?{0?709Bw00=U-6<G9`l%ioSv( zFtLUsNNDH!CJcG_BwRwg;PDft+cmj1n))T& z&E5sr#r>gqOL*Vt~oqoUbCsi470E3?+kFuBSD7hCRU8&MiU3@(nQ z2nv&(yz_IJyu5NZPHr>DEqA{=EB7qo31W)lmi>v%&iw2)7a%~{KYlR=dFa`D%{=U8 z`#kLOz&z~q&~ohXkc>#*LA8Y~1BPp=dn~sVcM=t{+h`go%`E-cMy^?S9p5^rCdl5e zD$>ELGLhz9p6%>bR?1|QHo7{O_Ot1wrx1_a=%pXroQl4Ha}Tuq z`#nu~eF6)1Z!^gz9=cuUam0NZzT+Jo-~NMc-#j-EI}dx3 zFN9sM6~)eWh+)I~MX>>tzdOz?z#1=X5UM=CN4(&i*5dRtCTrtP*=!3xLD~~ALRaxQ z=BmXV_Rw=Z;%Dq~IK-TGDAtO6D9x5QRA^^6R7LytajgYJk-6dHKz9m^6{O({m z@QXYbKT?FTJEa(Qp-BWg+9QGup#0r-QWUGZB8iocuEO$1cZ#N8Q(qW&)o^9lWy?+e zmk2W67iseD7g)-y^E?fwbH2Kiv%&ffXJd@*&!m~!o+-pxovF37IMZ!~J2PTse)g`F z`I)y?W~aW}n4SdNxfsjEu*)?VHqtJH4Gf_5j|yWo=ftq$8}qR2`^&J@2it{X?<-4& z-qTwmxQkok`Ip^h_8rO&#%+ci{T5fz@fKf&cr#et?q-aZ_03cr%bSII<~M8gO>cD@ znA{pMG`@Y;z~tr|1EU*XjSR2G$SiiQ{X8 zqMpf!1U*xhvw0dIZ#f>Rgd0y*HXYAZ zH6E{2Gkn&jZt!eKL;v|D#4`>3@lWdep3Z2_#o0R4!RSC8i2Z1Nlz$5@pbo?>aV+MY z6c+Y&y^!Fotcd43Wl8osokcG1jF(g1S*<3%BdoJ~Pu*nwo+)kdp1aNbz3&c_4%;(z%A@^w+5yrHlIPJUUeA0IVDZ3x$ORay{uE5PWtTLUUuQ8f&U1#u< zw_)E;zm3|np_?^l*2}`-4Ki?UPU_G-C_o-4899hhv>(R% zDTcZIl)#)|G3EfPkb~HS9E6OB2`GpffU1}-=!k29p@ce^NveRgPL$og42z_%>G>q0ihhG0p4D&)=6qmOc@*g4? z9u{I2$b%TeS|L4<7S;k;5jBt(Q3e%J1<(}T4g17)f{~aEn2T)(Yw@jMFD?y^;v2z5 zVm)vq)&gI0HH1p8gapaukTq{P6wg}*)pG(y@*-e_`~wbQtu79>x{_dLE(Lakm0-(Q z2UcF20T;3zOcP|mC|eE;OZI?%tpez`D1u(M66`y$7xoP+gYFr`RTa<~LrkiI&L_l- zD(HgBFT~G%p!!uG)V|CC81E4Q6XYK(5w^(1*lSCoujG*eg8fRccU}j!9@1bHxE*k@ zJHa$#Hy9VmgHe?t7&a+^LFZo3A5aGU!z!S6QWf+rBOa>3z6r!fHPHK^Hme6}Gy0%9 ztqV6GqnHpoHz(~BW#NrIz^6p-;Nfkaym_U_VP>%Rl6qILl;bq|>3 zD}ZSQ(oyxwVBDqxM!l+FG^hrKBkEvyK^+YK(g1_!8nXr;G=3ONYkV^V^{;4Mw7w>? zGg^}-{{>8Ui=ZQLMZgX@2nR*vVvv(^GLizi%}Ss+t_QN4G!T4u06ts}tdr!ClTrd) zsWOmyq8y$NW)HU*t$ zW}rJh2VjO~_{T|+c8LNNc@P)W4`LZCLPr9v02k6apfS)}%}WMI!E#_9rvSDYdw*FM zs{XX9RGYDE(D-4|t~rhC)B1)xr2W_r=V><85-{`!?ebsqo4*Oo3gWeMy z=-;;lgS&G8=E%$0{o^1|{&qnw#7#v4-1jX4ck|`IwqFNKx->X(WoD`VyMK_Q6sL(P zDqrn$)jr#mYE0SIXnnM4)_HH;rTfnMz`i$D$M(Ipx~TWc>Y?6q>&bmjt-k1wS^YA6 zXbDEQtikw-4H#dt{V!mNJPZkCT_=<^-FAus5ABok)>#1FCd-fyTKmhLviUp9b;lPM zpWRdRP^I^jc-1%L42?-rq1FVkLg$5j!@lSC?Rw*O{rXSvM+_e0&lx=NO**?Gbg?hIkp;v{M;4WmPk1|7ii;2eP6HesNn=ge$`54s)!yG7Bty^_B|^`yRr zn6LN{Xt#dSm$vnp7iZUFuCL+)w=lIk%y{h^&gpvB==p}1spZBODRrjj9ox*$lKU-A zlZP!&I$W?CaTv22Ccm>jME+qjNCumJ3Tl;6!KQN#fQYic3(B1TX$fCI_F%I3{Eq%j8C`}nTO4OJ)nW!)IG!D1&K@@T0^$=&-Oa5G?vwS~|lb)fv$GEYEhdHUHgKoLF z{j5@}enyQ=k4r1Q%emja!}%DY&G{;^+4(uK(d9F--UWy?3?Ni8flx6AX!|3&4=8K; ztrdc>jY1H+O?Wz4PULNxviP%fo%s(_O_yDdx7%L%5(4ms4Y&XcQHPskOnA05hEXB|r1P|J-Cq(U|j z{}-U4*Uv?{I{?iQwI1b8X`xT)GQtzt3Zi4V>Jqmy4VPX@u~~mQ-cjag6jO09)I+^L zP_VCC5Ng!Uk2P=hPPJ_C%(1QImD*QvYl-FV?GB}!1CB+U)09Ha14_R8d&fNYpN=`) zU*v4=FNZANe*q_y`8`nH3`V&#cCFBC>PDgW*;|F473>neU$|HNYOem`vl&+FjwL(n z7>sjK=!KQ_oTD>~9LEi%zI zDLj)9A6i6?4ymC;1h><}f`*(zf-X9R1V48Q3Hd?`37(;b1plNU9RC+!qx%qma(65u zV+r=YND3Q6&%Q_NS7T@Ec8MIV)>yE=+;~-Iv5jTQsf6kwW~ z5N??m8)q9ElTL_?DsTvmtfB-(w9*5@4>}3L&mo>V2_inx1Q9b-|L~ubxd1M@20UPryb@`f7AO%)Xc*}_tqQg#tb zBQ>A5FFx1TI66BR7nT`i9h9DACrHgE`J|LNdL}ngxk-I=PSQ!lL&OJ~d(sS*oA8r@ zaQt7u8?7I)5SvPyhutd@$F5aNVi%evu@fCju%V8v!aeQElFco8%c>i38;a}fcjQ!4 z6jCb~YVl?6x>3bGhM|RlW&!yTR=&9jc(0sH0yn$Zft^)PVP*DE8JWjvjLduJ&v%F! z3M+lqk(Ksez;_`AG4rsoED`KRDLVh49_>G96UPqsNMZe`7t_|QC|=j8yQH+uY+YUp zep^~2c~3&Uvr1GATPvi>OW(gjV8SmC!+Dg(S#wI#?N~*HM3k|>JZ%@e^cSBjzRiTVd~1#l4;4b#u_j?`e-~Q+0EX3} zE=I+P6ggJ!I{#3gm3AhRa)CO;EyJ%~m$OTCQq*wN=gN>X5qOwJU0dS0~gAu6)tZzw}E> z?-Klni^Ui=T#sQrs0Y!2dJv^3e`cK(!Q#=m9HI9XU;&TTVEjii!n{W+Vy=($<~u*a zEv7!QTjB7CvfBO;bG_{&?k20heWh`aLbsVcirZoQC{xz(QRy!IN6m72V+Z8+Jw7kD zZ){vn_wSFpbso;hYd@S-_+O4A57dVoL?dz#rKksybqd4ct_Wkne@S5ciN%=5#CjpO z30Yy6SIT0PS2~iUS0)Sblh%u^CyC1}Ch059CS6yXy!Kpc_}YKH{_BX1y04QrX}`&n z)_haFMdNMfR`s_>wyC|lwoUcji*2f}zaXan!vMMm&B#HNAqSCl0-Zx~33VWD3t=8l z#4+}#rI^d74MNnbtA-`tjJPkS!c zm=0K>HXXG}<$LPtz26JhDE_EhtMH>|o&3zPb$e#6t=~QK67l*!w4!@ZjvPc5auBf> zFf0%`0PZ8?0FVcv{#uF=kq5DbZOB3F7BNM+-w?FL^uSPD2h1fj!B$cYNb^*HHg7Mm z<|~570@T4+CP*=7ubo(0$ChA zphq5pBOwiZiH#5@xgL@v(H9uZTZ_EI8fcxj8U_%9{~;H>2jbCwoWN1E{zD9-p!ID& zV3;`wV@Ak>=prYi32TH@VY84D>=4=myM=dwlJE{t6W#{e!qT8GvI$H?Hh_i5TEL5} z2D0c%a1mVwJkh1#FSZz>#iSq|Q7ER52(fSCpe|(IlIN*#W0f%1%=JaJ?!d(SM0qa0N zW+UuN-we71TR^7*U4r`Upw+emw0dPg^C04wENGrX+}!#9*gDI&xRUK_R~zo`*0{Sv z1C2KB1b26LcXxt?Ab|u)0z^nih9$;ue9%Mv;pC)3#E-swd zIe-s4Mhj#6ba6~CmcrI`Q2KT%wp=fdEjB7(i%CUnKBt7ucPL}CWfg386gaQ)-Rzd? z7qcg-AI;vWyf^=@^41)yzA?jUug$Rf3p1>-7HqCTgkyM#m;o7t8{{CK`V{PC$%4HY zoY=#c54%PPV@8TNcFdQCOiB*ZTja4#k0Q1nR>D@}%GhdJhEZe)L+rws=c6nQ-4Oo8c!{;))Pyt{ebpQu$3wiJ1P>f8+dLncprX{2l<;b zV}Az@?CZ&gy+VYsM}h=)&6dH8GU_+S2E{K99m=2W`&B;LZB+eWJEiu{c24~*eW%77 z`hJbq^plz|>A!0}r9aSoWcxYqipAmllNA9~iS-FNTM zxa+oF>rdB>+P7V%bZ)wA)xGY#Tkop#Vf`zN^9H{<-!-`4{L=6z#uww$3~YK5IPQ$i z4!Qgj%z*sE8`ivo!23o*E#hE25@0+M%~?Oi+jGB;@eq6#5hV67G*;$rP=>0hPjcZN&b{>%=|jdhT}=HGvB=gf6<#Uk2o02kvWW z-olrXLZ#2e#wnhT%1}EVUZ8zAv_k(taD&nQzz);B>(-ku1#Gn3<-gf#hu?zDg5Q4n zcHgt~Ilnu$Gk$MuH~W9Lo$|-FaLSSG*uQ|iA$JG?zZVa_I~{6~D?|8Ds7QQPtVz0C zY|MPE(3bP(9Cx8J>48$mlcN+5#V4!pkIm896IEicE27$DF|65qdq_8JYw)1;Owa^< zOW>T{RN#{RrgbMA#shCSj0V1N7zz67FdT#(27|Hvz`uZfp$6gL_g2??vxN~Zlpwq= zlOsH+P$Ax^)Fb~=X2k(5yMjk^{iIeh!WH(U#;fm0%FvyUFEE^oEjOKxuD6(qY_r-F z-bWt`+h{iuy2W8QbcfSm$YDl*$Zw3k(5H;vu+L6CVc4lF96NS~V~4fi0Px*W;P+Bt ztvOE+eJK?|&#ENRpLJBiZ*`iavsD)C$I6@p_7{0e?#>NSSjdV|pG!;8-I9`HG?`dz zHWpuPITG7!JrvVz+aERL&=)!B)E&9qxiezLr9I+;OIzeam)59{&Mi@x(HxDPn*IgY z{z<^G;CC~?cNg-Z50wJwVVy9#-Xwu8G^vwL)SIy#sCM9AD)W?>FA9{O&5Kl<%1+c7 z&&V_!Nh>fLOev?WPpY@+Nocq0jPG}BiydP$$IiJl#_V;ik2&jB7kl5WHtxMkZ5(#4 ziO0^>@tCm|91Ps)XHc6$vNZ%Mb5^BIeP)w=O?sD8W!kVyS?XrDlGL5yn~cyTXDtr`-I0 zm#n-Aw~V}bkF?xF9;ta(fEVtmd0*X9^RR1L9(Gv^j)od!vZ0ryOz4k#3cAqFj86B# z*@uHXXwRT5VSYfHX-l64*C_N!4Rm-&ceey6wKj!mG}OoHRoA8%mse+76jc^k=a*O8 zWtX)$W|Z_frxcI5B^7UTPbgaPh%f#Xc;=o^{KYMy7`rAGVVAYwST=-nSkUbXBD&N> zL1(&P?SFs??cczKc8*9BW;bY3CI-zohS%E(_V&0*wRQO^G_;4PSGPv%mNg|A6*gv? z=hhckW!6^Mrqwh$Bvth=;wv}0##GF@MOEx`kF2-|Jb~ArT%#(mOLRGQUJFiOL0|I8 z=vp-aoo|D+|2_&jFbwApjB=oD;}U3UOr1Qk(S&{d2D(7Upo>J~dLL?aZ?I}vccgY< zXM#aad%9^xYo29FOPNhVQ@wpmW2aMO!v^QD`f1nD`n_(!_2=9|>L0m=)O~absl(2p zwf_XCP|)LIgnq4u-v3S#Iy68+dp45L{3I*dG9`+}rc_7+n+#by$F2FAMj2w&BVMwl z!-2{LgW;Ol{c(C}eW}Jty*cJ_-6d90UA6S^j&}Rtjv=SO_ASl(M}vm){ohcWfC!5WF9(LZ524sO84&{u=DAka)3iz9XS zj?!)R0{L6r#4~68WK(8>mEyNVsYh>4)DEA@&<~z0Fb>#MY34i8Z0R|^-pYM^la1^6 z4!X25NZ0j>+Y@IN7 z+`4Gtu=NCR$J~DIjfMTpcT2k&Ok2wzRS2Dc8Y~Zi|A#Ty1bqkt;Q!ho2dO>8iYg9^ zprXUd#GJzhOlgO#I1&yz@kJl-6b@SnkO*1{lkwjlEAO)}RmpQ-uBz*DnFeFIQQL93 zSJ!TNT%W$Ypl`E$%z(c9r@qaeHwM;A-;Atw<9~1tYH$GhFcu((nSvZ<2*#iTdNFF@ zT+E`A%qaT|A4)$fM@TxOO^Q8Z&Kz;ZjwATA8*jjAKS7_%9e9H zSt@ULvR;XPvPaqa>q3o;-WgDo%y!;r&t zLIzL^9=zxT38h00689S?ivC>;g3TVkhw*zPzr*hd zLUzArirD;KEM|4NPQv1Hmz4SCQEAgF+ht6y906|1nEd`i#`xDyaz>XhbuD{f3}#>q zMrNT00p>s*^nw&YFGf0?4H^UIlZM>kLV4F9g4bPLqT5|_GUKi-lfyk%7Q1`C zY&Q3TIjrtQb6MU`<}tgU%V%=GT)^l-v!KC)ej)t_n}u~BE{W(qJS(E}@Scdy{kI}I z_rCsvt?(R-LI%+dIZ!>E$+4=Rb`E+Vej}g&IG@z#IS=xDA&y*MC=#4rY7y;T8k4MF z(a4ss94O|m-Iz>Y`!XB74rVcU6V0mkCWTGsZ61f#+e%K2w{2W%?}oWm-_P)BcdtnaNLoZ4ZWB{xBk-tL^1mqxYFPRbJGY_)=B8qIj$Ps8? zRS9O_bcx2_Oo#^GX(Zk6_K-ukKn~$efsFx>Lqswu;Uvf*a+#^Pl0_D`v&i6KR%txL zDuws5O5#(j68QQ*SPyfs5i*G4qoDs2=wC+27xbO(B4qo8f@qL~m|;Q4AS94JmM7?7 zHG&4#A*fT1umP2w_i6D*D2@>#L z#IPYj1e+5Cu{D7o-U}~wC-Pu_A}rbyIdC$O4HpnuaV-fBOeZnpVGleX#8%441Brq$6brylW;#lV}a2B`@ zJQDw=^G5u$?icZox>)>!E|z$&gC*bTV5zlW1JE}I|G%06toFvxSMwkm1=}!~v9%u? zrbTjLi&S20Rw#f?s)ev|s|Ysg6~%_bFboso*kD=$8*B%5OJaiqz$wYE2A3s28$6Kw zVDL)vt-)u>*9KS$I#Z-y8eo|h`dD@?*ch~F;Q#61GwmS@af0!1GN)ii2WD*V&5muu zxUo$lAGXRB#FphS^o@{Bb&6xN0f}#>8zsM*PDy?-nUnf#vP1fl$v)|iCdZ}UoBSgE z+T^bE3zL`9Pfb5dKQ_U#k4!N2p$V3M@E;W5`aiJwsy^foj^Mdn;C;A5F6CiD!ESVB z?BdRj8A06GF_s_OWe9zx7mI$jsTKcd-74|ks#oeAZCLsZZCvKH<+RKz%X!(CmV0EM zTON^pW_e!rvE`q#_h~O=?^u4K-nPIBH!ZNz4GXM%?O%XnnF-iIhJan5$J`UL2w%vh z{0yKshQ@@w8EoG@{J1~6Me@IQP7!|NlqdGmp+e%heWTP07DyY<9`twK*vNC;hDaP5LeQtMq3IzuA0L{>28XUbMk#=dH2YIcu!G7EA|y26%4| zSnKweCBRQH5pgh#N01rQ*LC)+@BKWvUipOZKl6+ie&n7hcHgyF@~(4@%%6-_*;`IM z)EkaN^4A>375{LUQMzousQjz_KILERPpVwBzoz<=!&B8$_8-(w*kjG3_E_r(a2PnW z25b+0-wk}f9}o<+2#3CqNDb20NF&PIaBG(5AugN`0|WT(1Vjnn^i37N>Xj#b*`r+U zl6!;vMb~zv^DceLKe=pBJ>xv7c8W2leuA-E<2d86=26B)twYWawDvRJ>MS#`?h>%u z8SCx(7cc|-z8Co3K=A$%P>Wbu!uNO;;@fy#($hF|rhCzj>^CEPc&>zo3jZ9GAaO1r zQ|63ck^D)YDy3syO{zycJJk<)^lKh)AJy9LzFBA4ZC-be+djSBZm0Eky4}`aaC>F2 z&HbCS=LVvs0oZ+0;ZlhVRW5&~7 ze;7}BJu}(l{n=#P8=H&)qdwSpE!Z7$hjrk0qrm$o!uv=QMXxiZ(fw>i!u4z&;x8FA zmNO~N+{Y69gb&7sOD;zx$n6f#P}&h%pf(>|uDLa+UT1b)o8ELlpTU&>h|wnh%_if1 zi>9M~2hB$Oel{QWdt^T7|IvKFADj0FV6*lA!4$6l1A9RYg2C^t`u`MO^f^laJ;@V6 zw+f`ur2-A&nOt+`qnVCeD{0wLj)yiGjD~a> zZ3rGT848*(>kr&!(HFRE*&BG4)*X0{)*1B9vLgsvwg+Piz`Sh@*cWn#Fz|Z`@IErQ z(7QYybiWAByDJev=SozG$BImtR`Tq)c4vDC&Zn=Fm`jP2+nku7v?)GAeKa;-YeRIo z-e6>%;rfU+ZHbic=s*BudQxkd2raJNly(;RJO=Z+K z>&j?s1<=-l*TMZqvm>0!h92j!psOV;=u#zk|7t#Tq(+{wT%}K0EVtsAD|Qx`EbtK@ z%?*|v%8FK8pOK{2otCN9ky4=7np9!fl-OWWAKz(S6E{e!jGeSDk6oab#vZaQiM?W5 z9QT4=9QW0_I38R57aCmu2M&T7#Id3e8O-QLAq8D3XGZ60!28#M_pg^D?5fitZ>zRs z-(2CuKUU%;HdGiW)0Y>a*p(Bn)|Qo~)s&H^SD#jDSd&_73I|?VlqL1kij&4|3KF-| z^Aiu)<|baU%T0Pl&rSMblbeLCbCcmO&l;c&7UlvodYVl_e-x9^g(@a=s-77gY2-$G zn`8(JjauZHdULjk8VA1N3J=lsr2*1i#bFAq1+l7)c`2H;IoZ0E*~Nxsnbju687&qC z>AhCDX`?pTsatI`Q}^4Yr(U#6PkUmUp7xoZk%p}StBmx&gTq+hGl7h5=OT2ejD*hA z!oL3|CbZJRiFUS0p{=bNq{(Je){zD~-u_xQk*+E~sn+rk`G(SHm73xtjf%od-IDwQ zgTlN@lf2v}^Q@d6T1NJWO={MxZF1JKT~gLL;E`=o)<=3$7Pd*w!q$M*kKhOj`kYEY z*9&0Xy%P5QH;~c6Rx;Yt!GY#G#nF~dHPTqSG0R{Jox8iyMYy%zN3x+Nh+170sa#%> zpaDN^u9IJyXOL4|W}Hz}Z=PD%NlPjiwvNxArpM+l*~a9bwTsDr0M`%jZ)^it16DtR zqeys^SD856ZOGi)Y0cH%#t>|5@sg-+3Y05r z2v;hqi&M|5N!8A(%F$1&EHO&1s5MI{YqyLo9kh-r-As=t*<~AEa>_QW{)9b zHfOIHwC5}9cM~b>^OMTy4WXuYM=2+FCaTAGWN62<7wAW{RvLx2G@AxC_gMrqPtg3D z7OZ@ljsdr=d>Y@-d>g;hd>gRkT3)0f^c#%B$vQay0Q~n3(4PkXw-NkT{{#=}+9X40 zp3o%KjGM5QjnTOaHo6F9k9dox4F}334n-)$4#cZO^`~istMPd|=xsFe?ddV` z>KQZh=-zJO-hBkPY3|Dx}|Ea+eWDs+pKMo#zJ@~KoX-?EM zBaUijREee2hD`ZetT{3_JMpDVd5R=V21rCtgvo}F$0~%3rKqeM%~AIoE!FbgSg-3o z(q-T}veC$S!!~2ahC{|q8?GBW4Zk#Y8v1JNG>A>sa<%Y3HQ3h)>;D6={=O0XKX~vC z@W6HREU02Z2o*0X5^@*x$QcV3tV#0@T(R5T`6IXaiG*$qkqDfNmhqoWqWa8cDSFHl ztGG_rsxziLwH&vMXxnd@)3w`jK-YHq4_({MFLdcsU-amcSdYF5>;K3FsKKFD*aHG% zumv)J4V%D!gMK}D(BeJNgRqwuWiHF2)MZU#{IV%i^jngNBv#PzNUE^ik$f?mBb5?XM_Q#Uj|@qhADNLcJF-vK^vJKWW{024 znjHKjXM6z5uVr-}Ziil!37CWHAp>ZhhaQAI&;tQENaArghXKxMU3Z=j`CpJlz85qJ zo)=7rZWrie=L;@OP8Ymc>@Nhd(=SAET3<-ww!DzTYksku-}GX$fbqovA;XJXgbgk( z3+w; z#&r?octa7{-_Rx4-ZUrL+_WRnZn}~!ZuwHoZiO(J+=^u}x|POiaJ!IA?{+PR&h2ha z?LWu4H2+-W(!6tmOXJQRE{$8Sxzuld;a0!-jps);ug<|K_&rw00IHz=xraa>dND%b z_es2=AH?Yz6S8~6jcgu?BCE&p$l|dkGJRr5Fn(%DFnnrH)O+Si(s|}f(s~|3)_5L6 zQG1?FQF&3sr2L|RS@C5bv%#^dW08CLhDGkhyT7qv^%;OOL8?JN z@4xyX7ZCD<9K`-QLbS&eWd5EDnYrjUaed>|oR=*7^)d`Jz8A!RHJ8H5s|VhtpN^$;{fKn_78h+;c} zFm{C;!WVLgFaj@5gd8HD$c5{GUdSURAdlD%tN@4qM%^yh2LtmjX|?{pfc{m4=(i9u zh8#reB?4W9lrRgDhdf3W^Mj@+630?V1XGb9R!02rUU;!C;=#s<3)2t>wkNP*cLFnB zMbEAUC03Rw24^9kc73ZNm$F3j5Pw8uv#oL zR>@+)%B5^rsh%Axb#P#%eom}70&L>KiZj4GH&$E%4g#lu-+}wUOYZNApSZs$VeU_> zC}Ljd3|WQmN3bU7>!}d10b~G%kb@ZN6R`n}g!P=sSl6EkYezF<&2(0*Uc`pgYdEl4 zD<@X#<-)4N+*oxS*usNVxA9<=UBG@`ta1YQh4;J49o{dhFL*zye&l(thI!tqVm^TX zjVcy+qw;sK4(J;y60iwi3i*SXE?_~zCXN(rzUjB| ze%0^g`>Z$2|4DD0|D)a(fe(7y1>WoJ7I>?7P~eT;S%H^&Hw2#PJrQ`M|6cHc9u~f@ zheht{V$r*LSnQ7O-@(SK`r!Yqr3u(>HH(0G;Q({O-U>1y7ba{Sz=CPf>|ZR?xIUQ| z@Vqyx;(cq{#Q(;mOW>7Bzrai5je^gOCxxCF&j~#>-XZkZc)!pi<5NNpjIRpaF?lR> z%lMthbz>}c)d)*mF~X9tV@UEhBP{hJ*lblF@&`Nc-_G#bRT;7X7!Ox-*e~V4^x4sy z<-L6v`)k`ot`|1hyicu5`5#-=2|T2=3f{Nu6}o3RBz(tWT;xxSX^~qN3nDiymPM~y z92dQ6aars)i$`KVTfCDzXYpP7tT~oBV~%A{{R^13st?}V8SsEC!dsqzePC{S8<9SE z+E8A(xw1TU31olBh~d8Hn8y33LxI3eyGo(!wvEDn&^tsg+w_b5YBM7Ki}j?$MeD7S z=dE{3owGh9b=LZV^eLNr(#LGx$R4)-PCaOiA@*aor>C+Ktc0MG3witsw9 zMW8a_L!d74MSunQp|2D39dBQ@8=m1@SKJc$F1ch2U2rZIIpErgJvPbQ=$Q`zuryjD~M?GM7N`AlnO@%%7FBEs#e^J_DkChjJ1z>&+*j^A}SMa^Q z;QfPy5DpbbpTenx7vbuJ`=N%Uo58k}%j-PYF8T*?pY@3mIO&xta?B%7{D^y*#nfrPD#a<^Cgn{&U8>{WgX*K++X1W?uD2D!L5-IV$y6D&=!(u)ZjUNc zYl*1SYz%MLt`Fc6 z2>p=`=k4WC(CK{e{)OCVxkwr<7HSb^^UawjbL=@svpo2R(gQ^LQo|*?lHz3B6H^sh z;&YT6b9bUPWme-`}aAgsS1E25y45>B+MR1$3~RVQvPHenhqu;m!abK~pF_7myM43TI} zkCtgnO_Hxm&Qz{WDpadTtkf(`Xw)f+@7B+c8!^m{oi@&n-D8p!d&V>~_P$AG+y~># zIBb*|j|~BXAHnO0=zR==c>sPZmw-+bkgwPZ4-d8eU&wT;xc`t=^_ev7lQO%0xYDCavjWS`R+JMqu zX~ou6=ET!d;w98jv`)ObAY8gUFOFKAld4pZouitYRic@dS*w$t(XOA8K4_SfHf5ZU zw$n5|?SyGu+MmE%-EUpwU(@{)ehVZ6&`}sW&UDirJ+(q#W8aEg~^K91zD;Y`9&J3dDYrUxvhHfIsJyQ z*_(``vldMvvyPfZX59kb7)NDcBfxMiI0B(tDKHLs(0fn@>)tg8EjEDnZX%)aW_~o( zLM8MzYm?fW%vl;6?6|7y-1y6Cd_{_?f+h1Rqhzzn6BW|RGE|aF3e*#dE45;anslQI z`wYSh#*IP?=8Z!N4x0oQTsH|Rcx4=t|J^7w9~%M&Kk_abp=;^Te*xb2AY>4`K!0ls z_^visd+y*teVsC>qeGL_*lxmH-Ad;uYjNQ#Z1NV)X$Tb0sE?3It&OK9)}$%LRp+Ti zRh4OmS2pN`RCMbFm5&;(E8Aw|Uv|*gzw8gVzBCFb{bm?YiVfECC?1{%7>DDfPy@&y z7C?V9_`eO^;5~b}P)DBxYU)!X*7O=t%DS!D3c8$lvO7Hm)7t|?liR{1<6C28W13SG zBAT+5LmNxfgBt3z0_r<;eCtN^yzAx+yy{jAJnMcl@T`4q;8pWQ->U|%qW2?r;F&xJ zbMeq>4WI{L7WBu!1NZfV_ZnnJjl&|SdRU22I;2l77_?-`8gSrD>v!i%T<<3w+ZQ4p z)e|KZ)}1I9+?A=YuCqwlx1&bQtD{}hqkUN0t$jwvrG1~SOZzXnE^W_rU0Ofuy0l;& zm*&574W5USt2L;H`~mbQAqN?P9HM;$@(0)-R531qiYKUqya{bm=C~PC%9t&C{HP0e z^hO{4@R2~_kPQ*y>xSc{{D#wIy@v|qJqD|kTnAcJ7z2aqjssgX9QyZa*biLLuwVZ~ z!@lp6rhPB|2Nz*39;${KKn5`l_rDQ5cn|0|PQd!_7Vv+tM=EPZ3Z>1e6B1{P$T2h4 z%n{R0>>*n`xdXQZ@cVBL6Y`#l74?`(m2jQRm1bQ?teXa;1AqN@({Vve21O1{! zScBgKJrFCf58)^od7R`xt|x_&%P9rKIIV*mPMZ!yIH#JD>+-8KG#%0oEOd9*8p}WOsoDS^mm}EPfS3=D#T-v)^@)$z?NSe8rAn zc*T`qaK(?P_eUsE=Z|=j_SG!1=G8Ka`qdVS>a}4em1}cM%GVD8H<*;JzF=0o@`*+9 z@^{uBnSdN>J?J-qektf@>_aH#2d`Vf>Jh7c4U#Sy8GvWRkzD~K|WJ4n(`MoE%S7Dy6LPXM<` z;*VdE#2Ll=e9wYL8 zUL^8-ISyP09{r8B8OWgE{|&Ib7d50Zvn z3<>DP5QSb0VaOi@u_j~?28a)vLk2;I48jF*Vm}0p@`x3uAr@Q)SwuSlejpt3h^@dr z0tuh^8&#m61Nw2G9|#$gGu%53?p^OTLMrzVl6{Jh)Hj60FaST=x!WunPgDxHiai+~ymmTU!jfgvU=ImUz~HUnFkvBVBw88`x*W5$v=;eN*1%fTmx2z{6QOht`7Kq9eAw^*^rJY5o_6#u!a{2tA&!WN&*Ed=TNXx8586| z%viA<=w-o*!z|ww$63EAZejhZxQ+FT;x5+DiYu%i6;HChSG>adTJa(43#B)#&y>Eg zJyFE$j}jsjsb_@mbAg1pc zMa*9{s#!j(H?w|J?_&L+-p}@4Z6n)TwMn)&YP0OG)fU-bsV%d=P&>~4T>TRJWA*#& z_tjss-&Oz0`KLPOzO9aVZUHydG4GFHy;W^k?>B|~VKob(LHja^5oP=e%h&!gbwflIyC` z9QPH&T|B=V9^(1c@Ep%YqdUAm8NK2^ZS+O(q!AW6ZiI!88DWv5Yrw1eR**l~0Z!0+ z;35O45I#HW5nel55}(>L$PaA&nC{RcS#DYvt_{(W+#P?nO_q+Wd2Ne#r%`#vN;ypYmUYD084AYmXLke zg7;=X2H_5`J;l)%FDiQFsX=(;ZcO;o)sA${*^}~{Q!vX#hgkM=_UT+_?DBa|+LrSj zr`HJ_wP_VRV$&mZ$a+Zlfc1pPeyds0eO9}~_F5elTeA9De5dt8@dfMmlH0AZ)K*{) zm|X+50c{8H{;U4m8(#YfqBs5$=%Jqiy5*}w_}!aEJn!W~Iql)kdfY97^N33#&p}2e z-+re;!DYuvp*;=_BD)>hMR(fwi7(oZNX*+!Np82Bm)dH#PkPqwtn{?q9hoWnH?o`T zvFx}#mK%fXTCg2xJA?1_2LJ8Pi9QALpeI2B=w_e)jR= z``ivI^|<|})aCY6sl)w?QoB1=YWKj3ZEL^`$R4~Y=wl!m{TT-9?vYIBQWTu`54|Zz zW2k6fj4p9kloe$@!kKk8)Q4+xa0uU~pctXib;%+l0a@ZheuYv4zLhfTeH!I@d^+X3 zyayFKyf!Jdd2Lr|@mf)B_PU_j=>15w!TXa+y$@EY^TEn%!7kvt{Rrq0{CT++MMM|l zDClfFGddp6gZ3xNpj`=CgzfR>{dy~6*4T6q9g{VytT{U6Zoh`y~u z=+AKIcL(o#Hj#vmCsWWtIE!F+iX_^es!o_rHX(0Hv}M^C@5VV8>(AR49V*xz86(;e zo-ENCmL=5`S|r;LQYl{>+@x3?)U8|*v_Z8ja9X`2aJNQr;3>_*z&o0SLGLsQgRy!+ zFjj-B>W{4ciG2;;`v!Qv^Wc3?q!Q7=bU5=rodYdoh@qJb6~d-;BhpB!4f8;Wt z4{v9DuwZL!lt@!df_QyYx>QYMzHDVgnS6P8y<$mNhe}b{kXnA|ltymoPR*Rq<67CF zx3#jv-e_irVfE~AthN^HjnKnT@cvMPGb!Nxq5pmvvXGr{7QtMOFq+CyB8+D1lLj+s z%suH&>>a6IJk81L1R4^a*||WQkG0!Vj(p{acAxR+W9ao;o&;;?#R99H|0?*Rzij)HMW2JfE*o-ZH# z26*t9LNb~t;zJvX<`B2s}`R$su`QKO)EO_fOb^UW#ENYbkbLi=p?KWlZ4fO#;Bn zY2f{Hp$71^Yg;M!&QbyzDdR!?<ymMO+(G$_ZUcdJFFZ`24+o6`zQ+piUxb_sX}ufJ%7rD65(H9Ud2 za2dwoL?-O}FMt}9K@BRwa{&EtPDyvQBxrQ!>+0p0Z+sI@^9H8dy_sv7i3rFE9f1+@D7MxDU~6@2^CRdvE_-9QDvDj;iZMtkdkV}z~VNQfTAHazoISbK1F*pyo=6h_!K?{ zKB@Z@U^U+YtokFjW8gC(1Lh*kfgNz~GmwLfHiGYhufZ)Xtf;P42vxSp6H3~2i21E% zl<8*0g39&ZI^k-nfQ9!RY!3k%+o@@zC0Isi2yC*?{UwdEe?5MX#y>6_3g(Rrku> zYHpQhfrn~t6(3dI%ke5IKXP^TGa(1OH;ln{(4TCE8h{6F>wx_ST@+N_&4-G5WKmv^ z1|hS@n4HpW!<^8?V2|naEMWug?7;(m+yVW;e7@_W1ijZM348QqiMjTcNHTgF zq#b&CWbJw;sJ7jURC@PG;2xFU`A(kRf#vD#f9EvRV7UTn0JWchdmre58h{n48iq3f zpdTT1oE;@j2&33d3Mgum4k2v9j1)9sO9_~8X7L^OX7?Hos)QWRg1}2igMnzX5Wf zu0inskOLHf4^4xfn5bDY3Y+6a!E@3maH|^f-)czk*=j}doO7hO&ABr>&-t@D&4set z&&G1v&Zcr(&*t;eW~%wkXF3E;XEq9%%xo7no;f0HJbhc(Xv-TBqp9zrhLc$ANA|)P zY=!Y31N}bGZyE*vI|=@C2K?VP@PFV1y?1dS&)veveYZSvUD8I3B~yaq5}jzj<% z@*-O=1v1f=BAG3gl2}cba@dV`S8y2aZspS7JG!10{p?E zdh{iW%Hc1p%7-u;!2W-12K`~s?*RR3$iVX9-jjAB6b9Np2VfrtWD&HJWMudgE7Cj1 zi*(ORAf59{Nc(~g(z;-ZG%wPT#zhyT_OmZi{W*-F{7V8s>6cuB!Y|cC`AgkIxl5Bo z*GHUmli>5U}rgd*`fiAd~jJ`%ZGPY}L0 zKoGh=OAx$&kRb5jI)VS*3j+V0&wryA^cz6GXda${J&=JN1pVU(Ii5zy;yh#kkVU9o zK}hZf5lKB^LlRGUkoYrUB>GGm2|rgvg3mROzzYM!|Iz~SzO+X?uRIVpd?Dd_oq#ys z_a&W`(RGN_@6__>}P~@E<^vt zbp-c`kmy5%gg!DMfzOCC6aZfea18(-d>9^j45B}P<)Ht8 z3fTh{ysnHI0ZZr*u$Tn_i#QRmkRK5XL=rK7DiQPL1C>A%&0m^`Spbh8+hJaCE3YY^HN#BK+N#BHzkv`%msk;HEbDWuQxd8Cija?*Qh zJ?R~_jr4}vLw-#iB)_7Ll3!3alb=(!lAlp`k)KcxkRMaek{?iSk?+VqBj1w$#B@U* zGhdU(ELY_*>mO^tnxL%%{$CgJ2LpI7h6(_Thdzvljy>UvmKX7ZMhNkZdOYc+S|;ha zY7zOVYBl+>Y7_aPYA5A^%6iIul?{};Dw`;GRAwo+Rd!HrsqUxTP(4k#s(OR+o9Z*> zpH)Ayo>#+cKdEB&v#OZm%o?yR=o`R#zcFA2`GW=2!dw-7H!~o7FtH}QGIS+A)ej&( z)QckB(@i1Y(aj~_(kY|d(5a(b({5$@L%WCRvi2bJZ(3u_m$bI9{H!(4azSe^>v`?t ztUqaAW<90znC-aE2aY2;nCp-Z<~|4<_zygA{R?ae*@p@A+*?8hVGa2MT?}CxDtc$F ziJn=S5*}FC6KpO2PwW-Z9a~*=&DNT5iS9}~Zyi8BV-?ACl9t4B%rcwxh(!_mA&V-G1LlpK z`_0?Amd*RP_nM9HESXL5>@wTVyVGo$Z_(@&-@Mr^{;lS(1ZK>?3Bs>D32pu_Q*ix1 zumxz_fcJNV48oZMeRknSPhAAiZH6TJjiHXtIhzqqF&v3U9lgm19YUD**~hZ(wM}K; zP0!`rNiX4Aw5jHqw{GIyZr#bZ)oOr$)@n>(+G<8{i`6cnDXXKxlU7%R$E}}1JfgWK+>&|5T(kH_TnYt-T`GhIog0Mv86Bd1jDE2m zr*ZKvr>&BmPRmm5PG_ZB8TX`G81JQ;obf7>jcdSm@X_rGo)bRWudD;l7eqp*gJ92p zAoLytNuiy=nuKjZX2j`0d&*>h2g{h>I`$FYaIPWmc;0@mH2yx%T)`fX65%e7YSDK0 z7O_^hUWsP65vfMEY3X{m-74%Yf84-2b73a1k_5D`L{`x`1Q*a`Ax_b_{~%E{0_{%X&>v?(?v8O_ZjW|n zYmW5itd9ugsR@hWuL?~TDi6sLDG4bQD+;cXEC_0r&JF65%?=!+W(IDP&j>uAkRJG} zLR!!>;43vP2+RE!S-AeiXLp#3fiMpu!TW>$auWDn$N^>(!TTqH_fM8Z{mEK{?j%!E zTY@c9W4tSCZLBY6RZK8Xd2}RyNo0axVPv{Uenh@lPI$RwR#>BSdRVt?YUoC4a_F2w zQs_R##L$b1387CF62rd8Cx&4vAon9Ly};mnvEvGnLt$&{#i>7=Mm zxrE3K@^O(f3Nev;6{9211CJGBB0tN=L}4l*_ajey!TUoFa}w&m5A+u^!1KY^o-w!% zW^$pPED6+}tx9OjG9=byT2U&}9a&1!JlG3T{JCrBq2pB)7_jB@IwRlBN`bl6ETwCY=Tz!0Qk4L5Y|eoQUOq7=6 zgHVHA*)R_ZVBNb2);!_bUI@JhMM9{iSRPdr>kx{I%*c6#woF+CE^O)f-kiyKfjkMh z5&UsE@j}trX(AC>`Qo9O6_UXj%`$-*>*WH{C*}RpcPjX%p9Jm!@8$i{FcpyV|ACwE zJe*E|ama-Ji+u2%C9v*X24@eH!rFfsGpa7bffY%D{^eOBK4m3ho@MnC?xo$*uBBr#&L#7*jFO{r zjFQ{3&c$zKor|!{S}wrzco1r^1L{8w`WtH@f2arF1r#?z|9vwT%4`ussjW&Vu~nB4 z+hR_NYO$q+H@mO|H+i!KHU)C}H%9RIG{p0H)@KO1*B1)A)YXVFYC9zyYd1>T*KU)t zt2qqZl(MUSEoE2rUCOQ!|Bcg7gS{{Y+bW<2bx?yQ$RAo@?Y9;3hYs*wU2G_!TM)(e z$f2knO+t8&F)_Hun!K*tiOH|qgT<%IpUtx?l*6qvmfNK>mDi~wU%*8EMJ)`b7YVR#OA!q`uP{s`nS-H=1ncY*iof&PnrSpSCK ztO+0DK_SDEC}@KU3fQ1e@ZDfR^cuD!c?`Q!T!(#_ori*09fzXW?T3;$>4Q1k)`R7| zmIE#P<^zKQrUNsACIkC{D}pBLUkI7>eHAk9#lprt|DWYTr~z1@3D92;_uT^86$9Y^ zhY^b3h)@vNBEL->$a_*4c}`K0`;-=PnKC9YrmP8$Q%*$tDGxGzDu7}=6~;uHjAyZ! z%w#j2EMYgEY~(cD)X$~AX$!aBre)v~x8B5aZr!mjJi4Qp_eWNOvrxyZHQ&pkxiHD1v9QFdzHos>efv{ZwQZl+)V5-F)w%!A9E|@) z(C=9f{u}PSWE}kGX6V6~gC2l+=z-XQkj)Z!;Js|fc$ps=?UO4N>E>kC#bGu6IE6!iAo1LNQwu>Nb(1Ekf;aFkf=$Rk~*1QowJhvFRcFAB(0BCNb?h2()`qfG(Me1>YvR9E~NIkAF1Aq zB-JlcN#%=TQu(rpWMB1@?A8{N-a1N3x85VguYMq8T320!H3+9bttHN zGU^=$ZLjml0l7?Mc?}-G4dg(42>;VeDp6}_^-M>@Su*s13QIvIMDWl zwmr0GKzkCr2>s8A)W0HgU8_GH-cRNo&FNuU@GwjlkpLV zbu~?iSG6Qw#gTZKFXN?=j8{x$yh0(U0?nWc^n#UO4ddlEfDz*5$G|>t6r7=dlXaMbCF<1r$!CJ7H{Uh7X{*vux&t!+_PuVH@U3QtC$UdiEWcTSu*%NvwCXcsq>Cy4Qyag5iQ%DGy7SpD zy6x;|-No#YZa@1`cNKf6yOurB-ORq%-NEkZ?qlES9%pxS&$DlJKV)C&-D5ZPpKzb( z^W299yx;@yz5y?M?=^UH=vyHEH;4Z)9sa{~^vU#T_}ZTSHua>ZQ$pFVlM>h?lPvbo zxP*OgT*JOIZf19kJK1gH9`=pV3ih?p5c|q#1N+iw8@p+|hka&zl>5Z^Ecc=D``ita z@3?Cwj|Fc};)Rzc@uG{9c=3h*z)y#^CHx0##Qn42Kg>eA%$!1hT3gdEmd^BWx<7qq z5zW4}NM*Oo^Vt{X73?#!diJqd8~34EH}}3-A9urSKyck`Sn!V7X2DgnF~Qqr2LzYR zP6;oXT@{`;|3-Mm;%D(Gi+?01EO@!&FF9tx%fEuRhW4+V!AbVc6 z=|AxH68KOYa$*GZ3w(g@J>=<*tBgLGt3~g)nbJjfJ9^9Ai=A{2<&L?<3l6)c3lGlC z7w>Z^m+Wz=mD}yyEWgvaQ(=cwkK(A)N~LX1Yo#MjTcw*F_sTXoolxH3bWM4k(_NJ{ z&QDZ_unF{PF!&#MM=alSBYNbGn9mRKzaRVuANUX6$baxPp_6_#bkxt49qW%8=Ve)j_wNYOCCis;zXp zq`utkYxREjUp4yNd5xu@*MnDoopEdV4=?1qBmVvbxA=FEi*PwuOlJe(KLk&pLm@M0 zPlzKM3-;l*1%(Q?1jdOs2BgWY^UswZ_A5~w@~x5%`plON_$*Lf;k{UOnfD5{KJT^a zz24h3mUti3TLYmxU&t%W{6X?6Pi4S1~%UtaTdu-uUzqW)imBJPbuu6H!%*^yZD zkM$`BB6Mk2q&aPmv}2niJh=7Yfx_XiDDmphB)Nf*OobJ}1xo!v<+7!LwaPt#&8mw7 zy41P@`ZT)yhcr9gUTw$?y_%2*dNrYc>Q>{A!~-3m z{fggxkOLBieHUU8b0#77PeH7KNAB(f>^YFAP8*U;Xibt8t%4u4EWul_G(K3oI4)Xl zVQi8@XH2Hjg6INSYgD;PQ&g?m{KyuK`iO3=+K3f8)e-A;=S7U^RYV-uFORrpP!@4d zzbx_(y)tal4s?OeD<1iyAJ7I@63_;zi2XA#&rZiYH-)2(sVcN4Z6Xb%O{e~}Ijkqe zL(rWZDC$UxkhCSnE3_n}DK*CD${ONIRBGd@)T(0}HRi>3X;sAZ>y*X}>y^Ze>KDZv zH7Ja^GNB;mu0cWUQ~iQiUJvLNzQzOe!zVF_{ZaoD&_9rk*dJ`h`7rhg9mq7G{w#A^ zl4;8pX1EI4)BQv(Xd)1s zC3&XQl{=Fy$Z_GCvwegOS;6Ak%qaP)j6}tX^mJ)idcJaTTA5m5YMn-YYMWMWN{?=K z%Aj6m%7{UF%E1X~DHp-F;E6$63a=0J(qH3t1g-~+gVR}v{h_@RQ=ZLWHO`k6DAJMw zE$S+qOl^hMtf|0}tIzioR_6tZD{~{{%5visigVJWh1ogEd08c@Ia$>jS(zMmNX+2%lQMX{*SUpj@e0Pl$y~(#&>k&A%vXw-59dpY z<*2hno!Ux`sj+kht1F$uRh764DvJF@B}HM9!opaE{K6EaoPumwR(_Fcdj351)VwCG zy2_|+gbhsXDLz1R61{csU|f4l%=5&Byz5%+?n zIPWSKQEP=N&99tDwUrh$uhN#4Rk{j_D||%-|NrhTM z@qEp=;!f?DqUE|#MH}=Z3ilX<7oGt(!O!^mAH9eIejMFbe1Lv9i*axm)8C!&VK&u_ zdq9Y}st|W#tyF!DGF8{=Q$?LAmDbH~-wfyUbwf$iX1vsOwYnNXNJOFTm^g zR`J@exQy|C9R0r=_21HnxUU)S{y`I{Y(@OPK%Vm2)hWBfh%!2-Q);IjOX_sx;yZi< zu^l0zsE%k!M0=8a=z>he;01-!z_xkHer?UFKCMgCy;_GfJz7S!+*^)oxwX8f<=*^* zmV47PE%!$L|L_*t;1Jq<2lUss;aXULxT_s;PY2?zZp6Ne1eCS}{zH#GCBln|>zzr_ zy-qB$*OLqD4G@I%M2LcV;>G?;(&T)X?m-REAwCF%lWPf5qJ+o3q1#tMeYOH;<+nJ<(ya6%R8>@Qk=7LrIOu> z5vlEp!?M{cu1aSw|6V$K+3&L1eZ2DP9KskF_o3E7dwCE1hhDS+w2Psgz6$=s8pQwW z#N@wTg?!iRlh1~!&UKHY^a^ zY*;3--mpo|a>D^|SyuYz_DbemUV4a6YUAPgh^ z-+=gk3z7FW=8VlG`!Of79rL8wI|FIv&PZmnGl^O4%;9G2 zEa#@}Y!+DT>=l^pTqiW$xmP%K=Xv4OvD<W4 zu@3R?CgfmjMGnXgyn{z*=(3k+_5lG|9g@+ELs~TbkP%HgY)%%3XOj65M>0F&Nv21F zXzI~untU{sCLJv#lcTlF=;%Ubcyu+JaBPej96QYnj()`q4*$aR5Bg2ap4D7&#cnunyw{)?mCTCd0R+G~q1`GI(nu>7SiSdS|Uj z_uL%PIpVi=z~ELelSLY4^I>O@Fw^H{Pr)sYvBQ* z&&NHWoPF@1pdEA)^*@dFKZ_iYi|_#6Mh?b1L~5`vq#rSo|6D}kn^F?qR3*U|I)nfV ziKu3HA!UO_kj|JK`(k1gg$B8w36}8v4#|fC}CD5R

    {$&C5t9HT%M7t-T z-of}0w{yrrxis$oAP3}KBJB@|R6ZqA{DMgIHR1EX9SJ@|O7xv75>d7ALTUoMmMQR9 zEQx+_crj7`DU#^tEP$K;FN^Tv3=RG(2H0Z=J%+$bYVfUb4=4$B4~4edW$0f+4$2Lp z2_GQ`@{iFa589rMLpQ!`C#R&e21<{|jM1On2AHn3{ zc`gXb@jNGG2uBP$9s}+rycYZiO~kpXKnnjs9`UXi{(}Ji1BHWRkOwM216TkSfqpOm zhQUVqo3_$3+DU)V0eVVr&=WdOztRo*1&#JI{ym~UQFQ(}Kj8Q}ysi=PIulS1paL&K z2LC|`{)2)$@seQTMe)Q7vp@;TRRdZ;7wDnCx#jeXTTOp(>*y)Bg`RLb=vTIver8AM z5qpb%VAtsZ`?M-O_5KFSQoZO|7N$xzE%p zwD!@v+HcT1+L!67&X;sa_eXYK?-@I*$8%@k$DIBT{8X%cm;(R73?6_v{0B2F`e&*U zJ)S&`9vaV~yGCC0wP6^2VVFptPRyncCl=FthE?>gVI#d`xPY!2E@D><``Bf}RqT@C zI(ETugq=6q#m*WXVP}lavD3yMu@ffuxnm~33l5v`f*0hG5F>PXRnzpl3 zri-}~rpvfDOjmQqOgC^xOh*OS2}5wu>`lP|v+IJr=68j=%zqP(S@5D!3tqhaKkzf4 zZ;eZJ7S`O)7Sp4d3Utp>g>J%|x?yQXSFG&mg0&Z&u@0qE*758O>kM|(x_~=uRn8r< zs^tz?H4FAxbqe-c^$2!bt`P3B92SmQZV`=I?G|mfIxgC3^|pA6^*7>;*1t;DTk~@3 zY*IFvXbx_#js)*TNFUkaZ!XTp{ttF0^yVBJIzGph4$lc-2j)bvz4po6 zF8eHQ%&tfyIj55 zsB0+OIya8n?2;pV|9?9?D0a%z(dIxUhLa9S?E(s5W}x#L#Fe#iZa zeNLy9dYwL0TH^dry2$yNwA+~%kq@0DBK|4?pm=apMLc-iax>4e9R7h+D>+ZRJH&kx1B&LGV7 z{pD%QUz0`xC((vL8yXIDVS@p_+)DpY!7~3?VV_@$sMj}Jyx6x;((O|rztE>nq0_rf zvE6%-w9R{kti@}+a+BAX%6#u*stw*(RO`L(s@C~DRju{mRbHZu<8yvO8+?VjUxW4; zJm!w#k-HC%nXwQNZ3$7Ob)iPII&22540B-1LOoe;NRVK0aHOz1I6>4IlrC-$%#&*i zERk;ss8(zWXi}Q*-zBT}?^mw%A6BXM->x>#|FBx6|0VT`fN#~y1D>dr2k@%pfzMIF z@i`CiX!{iPz6AY~vG70Q5O3i)62WL)q%sXg8PM`5b6Of@ON%4j*uqFZt|KB$*cKiu zY7R>gH-=@&HG~$**M(Ln)`T=jt3ujkmBGC#<-w~}OM^$$i-Qkn6a}BxC=B@;JXS9Z z;niNEisN(c`Jf-d5c|g<_D?|UpM;nb$MyJnFh)wtVs)uEZYp)h&7{sa7uFW*%{9ja z3+6{hiRz*f#Whjsa#fLe@|BTgiscct($a`l<>H9NDuv;z)bhhOtLKLA)5r-wtC=0~ zCHO@nJAzjSYOgcy$Blbor=fcYKFH2A#GE(|#UtKHP^2F0-?K1j61Bq*YDsitjftLI zT|%IsIzB=;FD_189+xU9jm?oSjxAO!jH#05$22MD#&oM@N3T@NjNYh`5xqwPkC8u<# zB&PJM#V4=Rh)dq78Jm1kD<=6v@FULu(uhsw)q&b8#y!DtKkf+XzcUZ-Tfxd)V7$!_9h!rOlBuf(V zv*qLRixs2us-=;6t;*rKJ*uI(Luw(pqZ+}v$25X+uWJP5JkSWvd8Qtm!>fg4|BtH} ziziU$z0e&g#kEi}?g3%GUj%&|%S#lgxKxAk%Zw?fd^%;7+p+ZWxh$pJhf6987Q~lD ziDJtV#nGi1a*?G43SlLcN+BgpvY_HcDgniVs{TdW)clH$s{0mQ1NYVa3jb8|E9BMu z3V795T*NhS6m{MS?Tr%uHX;j#CP68nRn%YvS;Opibv&PRrks(svZ^h!0)Oa z<-Ce#+5b3$HW<&z+Fpscvl?p;s>VGa#2g^M7P;T`9Hq>cQBtEW#lwqQmop3K@?w5nft*iQBTHqzU^x$Y5U(>Fu;m+fnDLA;jOP zbK+*?KtR`fCvs2_n%e9~419>l=qU04j!Q`6gpAZrXp$Q8QdCb)1(u|8(w@nOo(P9G1e6m)IgAc z7c>Q+5ul^L*aEKv75fq&_T`rVWdY5bUV1=i^Oz$TgC|h`(^!WB-N_f>|3Fv$I{cUS zkc06NVe${~>-3cXo{Ky@7a4*AHTW&sn3zt4FJcN$#0C>(7rd-PIUb@MKNP{UXaQ*S zAJN!99t3ZJ3(uLi10E3iJMI`fsMGM^&O`q)^smAHxq-Hat|D{=4;uZSJ|A9h40>}pN9|-;fHPb)TNq^B2cpl5(d90$};e$M-P4pXWgZHru-p3($ zA1C2_T!!~?6TZg-cpgvbA%6P-xc?t`b;Q}Ki20>JjB^3}2eJWkK>&yW>7Wo^OC{Wt z27KBAXm!(LS_-Wd&{|DD(RyfYfz}wb_Cx15n(G36M<1c-?$K@h#P|s3m)}U@p(CR>)7DKBa3Ios@rn~Tj zZqs&X?Salw`ijod7x05V$Nzka*7+E<_>kl2gK-4^gV%xop^2DZ4gP}~_M%ippUBW3 zFa4lDKgfLOo-~5)NR#MmX*PW+EvC<>yo|ou&)2>vUG-E}d3=LMPRDIsx7Q$KlDmf;Ysv3j-l?U*JC&q8t;{=yyGR z`cZ2t-PM>yx71zfGxb3FKs|=uRZpdNH1g<*Mk!s^sG*CRjdVe?oz7`4qO+R)bVh5C zPHU~FQ(D{Tg!W!~L;D0B)4oDSbiSbjy1%e}dOX{s&$Hbx*@fdcyb1J8;6F^^=V zdTOjh4^fV<^$h3}JqvnQe-2%l;7u1NgwffF33Pg52A!H%Kqm~#>9}Dn9W!jABZeJx z*l-CQG+a&x42Rf0qs?ru(N4A-Ud&G83vA5z3%1SV5jSG;H@A5b&ux0iMjXfCr$FBf zm$d~h{b{)Mn2G47u_C=^tU;G2Po}d|X40vtu5=t;%n{QlI%t|q`%SZGuW1qOHmzX0 zOzYT~X*1hl+R3(?_OflJ1Kfz|I&QPsc5ai|0d9lY8E(DVhupCFL&1>6pMpUPUbyNd z12~Q|u0NjUc4roDvG79PoyF+t3^84>P@z*ZCel&M>2$!#k@i@5(@uCXqgHXW%_^0R zSmm(IR>f?iRVBB>3_6t^94haS=w+IKU_6S#6ofIy&x-RUuz9(90 z{ky2ghJVfy9AClD!0is&HyzHBm&dKLE#+3)RtZ+v&KE4RZ5Q_0_6V2S4v2beH;5M7j)@o99uqIL zeOuCHcSq7;_nV~Mo|i0mNgIyiU>hp>7Pr**+@bGbPC5#qp4QlhXzHMyIdk8=QXyynMYgFZVh>+T(Es{VP83U;Oc|73{|OsJoChyQ|V# zkBKzsIgM6&+Ec%$JN0@5uq7T5tlK@F>vB&ObhzgT7Pu7)Tixb~THG4MO|I>d`L4Zk z^{%VsYh6baYTWiKRJom1oagq1Vx{}fiWTm>!b{|Fe9rf1`wzY00pSsMJOr^0*oN~> zD90Kf`CMSd>SrhG>I$-=_8>=U4f147fq|?cFoLTKh!fNVqzbG2b3~Q? z#o}`RDoLq-lU%XiLis|!<%$J<>y`5Sc1d&nPfBzAKagen{~*l{_)94}fLDB-TNndZ zf-w%Dy*~#2M>JwioUcTC^oGb&H}>!82s5JAu<6tkW={=aZmc%Uk5z?+a+M*`g7T0g zVQEOFs5rPlTo_y-$q#Oj%MI>O$PVsT%nDkolo328O%HxUmKJ;iJOt0AX~Db_P<+Kr zU&Q{1|Bpd`H>O{k;>P_K%rV39-aSH03$TArbL0eSfFD#FHJhrUTv$beW5zgRIUeoz`2w^bGqe^?eC z|F&{?{CBeOxZkA_ar`(+uegRbKLM@1sd(R;j+iqIZGiJuOuy<-p2`&D9;9eeVX6`3 zrB0)qG+W9{b7ASJ-YhjWh)Yh56eOl52;)=IMX@RQ;^>qLxyY18`S9d!#jxZ7rI6$; z(%|HSvY_Nk;I1q<`KdHGiB}pY>ZQ? zKPWTHjMB4aQc9K+C1rWCgscEAE-Rdi$%+$1Wu*!uGIK>?nPrlY%zC+?j828Xj1`Lh z8Jm>+GWJV-GcJPLQor=yl>E|pCBHOY(f=jq(B_Arw<8DFBj^Eb*@*eFppRo-j)*dI zRVXb_kCOAJP-1}<#TU$>*a9~eUEs?i3qrW?f@nc#ezGt)KT8ytUo7^|tCjfXwafeD zEmQEy-Kgl9yHCj@_dNJU$s^~9l4lOD=$ZXL-b5ShgWlEx^g}+b1u*Ubm4jS-FQbT2 zGX9=(Job`~MQ%=X*))nQv!(Dd7YZx$W+7!kET}A!3n)tv_?2Y{eM$>OUZquHkJ2`Y zTgg&+*OGM#E+u=wTMEu4w-j88A1k;N@d_@5yu#d<97h}Mg5HJ_ymv3gwE${C5x!0@ zL)=$^xMQ9IMO3L#7`&K}8dD0Yv7x|P2lB6VC%;;M=2IKSylP@OkD640TTPx|ZcU}o zxw={GSluIWs2-NHui6Dp%h^?ZDQ7?L7diXNf8^{d_&BnN)(w9HUTj_eg1rcVwp8K13_3v_bW3Uw zdqX?65&lCfV$XK0!RS;X*DiH(SvY~57n+h&w>3F-&!IWpZe-u>M|RzzG`l;F&FW5P zHr++cs=Jn((cQ&O?_MRaSh!7KzVJBsKw#eaqrkl5FQIum|1Ue{;l9*>`*J1aL zK}PEW$#8u%Og2Q-NIcNp*BHy{UMGjbrdVlRg6iljHHN;;!Dq&;d(TF6V$ z++jl+I~+)ThbO6z1(VuX9H}DtP-Sc$$;LXtYLbraB&D%4q_pEJQX2h*q}%@j{Bs5t z;d(?{*Z1IB=!gHX660_PeXt%m5YV;V2K^n--;Ed?vAMln!f=;$cHl zIBZJthpkBNumeeumm)qAO2Q+FBsfw)-1tUJN6?^0HsYa#c>C}ba0fS~pTX1TEQN0C z3gqFef)9o|$KgkVP+#{kcu;$ggR&p`hp-RlQFs7vAP?ark^E^93C>6{=}-mQ_+v{G z!Bk*{2cQECa7+l!;bD9Z51{k7=+0wsok!))ZwDvv{Ttve`0+WNXz#jp@B&fig#m?`iG%^4EuoKS9ITm2k;iwft)82Um{F6z*PxAIj*4`*R(Jpo`A{FWK7IwU~)PK z-U!O^K?Ejg=>Qk{N9gE})_`5`O7MZ7yaT>?P6PUSJP#)g{}l;sZ`5}Vbf@9RjG?Q2 z0XZO-;lErZ;@*Y!2jFAy3HVF|Pec)uav6LPbp$86@FtN2A`p&~`YD`k#aUh5BkiR|Z|lxbCL_#eVLCe2e34 zfaav{7(N?q@<0imiwb-dO?V+_lV45o;&c|g5R~KhDEJ{c02lwCy#NjW42}M5?{j!X zI4;+ObaTZ?04R{21VcYx)kKiA`|9{|R@E;Tq^E3DlgmZ%bK$gG>_<~62ra&_n zdZo~+!KXFBhiHfI(G9Jo&{_elA!x0K)(A9qLiHe;>P>hNSJ8Cez>D|^HWuzqmUi&fOZJ#7mvDSK&ud174SXkpwkTBqXQnxVrcb2 zWdK^k(AorzQTmMbVaU8eAJQdy@3|B51HB99=sG_29dPY6cvW~GGQ|8!KoR*Ec{FPP=lTNrjOE6TIL2D2?>!7g}wckx|qdzXu zIXFTeBSYstdgdv;g-<@icsdO)<`uk#BH9A$FI2GpLIr+^41J<75#EbAJrLT_w*n8? zfFbm`FqS?NrqTPtJbG7HO7DoO>8fZxy)9~^%c6yJQPfKpL@Vf=Xbrt3+Dxa#V{{5$ z%t`TE^oHCAbX@*>I->YH9fI$2@IUa{@E=yz z%zouFv`^&&+O7J4#?=0#QFt)h)p^?X8qb%P>Ep5i4{&+k(Gb%ew8y71ZMv>Hl`g5x zrgQ3U^rl82ozRG)W17iyL^F#HX%^B!&2rkWSxfu0nrM$!2kq8cLOZoq(3tixjcRYD zZ902ttIkQ3v7*^`6pN_%6dQS%c#^JTB#@xUJlqjLUxtZb8P3uItOu+uEx1 zmYxxvFqlC{COFc8iQcq#VkqsJ7)xUlQ)$#NhqfCQ(N@Ds8ZoS+&4w+s$*_wy7%ips zMuW80XcMi07c*phj13yU%?3ssk3SOR5!v-ZM4ZWf;N~Y&^ps}8aBAu! zbTM0Qx`Oqatz&&=qpa8L5Z7aVfm>|;71wR?3)f}AbDjU;u^DcyHrRgxx7PEx&7Z*W zur*@OX~=(=u0dO8n9wFmD_UpiNNX&;XwWK{2CSlKg=G>gv&^7A%Y3%fvXu2$RB{6C%4dYDc5B=#C2GW2o_lF7qnTuC1|z&OweroNYH5W58ws!U*iYV{d0S)|8PP6 z#azT&&Unv=vTU0rq)jv7Kg^y$t8Fc4rL8S3vvs4Tw*J&(8%B$5W2xIVg>~6xv5wh= ztbKL`*EYM3Ynk1~HO*cuXq-JDXt3QVth3!MthGHQs`SZxgzG8i`)ZOP3m?vrVh6m)aGVS&2Dbg=;lWaZlSEs zEr!*&CbKHnOs>+kfGc;c5R|#r3rbwug+;D?qC(eUalY%QIM4N{B-ib#B**O@_(PoS z#=k;@bJ0!I`66_W1)>dt5N8BHALZ#sITm|j-GQeXwRugXCNFbp@SaU|-Y!(_?M?H% zf>?!DBrEfZXC+=~T#;ukSLj(P$oH%f=6be@ay)y)S)QxK8J^oD>0XEB(!AaVcjZ#O zf0v|s^O7`gUYzFjf{)zM5B?a3As7Rpi1&g~f0Snl%F*E~q!wQln(wDawSH5n%HNtQ z{T->y--Am0{aKM;7%T9LVR?SZT#jEBm+e<1$n>icru#OF(tH<*Q~Xv*lKn>H68#Q> zi}DG6x8)N2e*?TE!H*Ze;)XZ8Fw}oPraYU&(FS3NIZ>WYv_}i}uBi`{QFV|uRRm3< z(x4es9BfAgL9Ub+0aK0TnD%s|EUjpddX%LqREf&4 zZ%=WU5fy}4P;S^P$_{g;%rH+%4+~(aVc{$}ERIVGOXU*6as}~WWy097dQo&(r#LEX zxg;WNqg;5{KKZb)bKq<67+>>}@X+UoaTH%hyB~w@SS);~7|iogmJXCvvXn*0 zQ(>eU?l*ph_a!yC7Za5&h>9u^Mnu($!lT;7 zp;7&kkf;rEK~a0aTk?TXU&#kW{VEp}$xFrwj^xGToJU;`Lwj2Se5iPg!#K17%2R`K zl%YHYF=EPzRiO;{L1}T5DLKxP665VDKHimL<9#S5KA1(tN3qEGL@qo&lM9V25`@H6 z3xnd?L;-P2#r|>YBz|$b<$U8#gD=1@a=x*=#P0=S9K~l)*Zt7moP?M&5%CsiM0qMu zmO^|#I{|AS5@nQ}q)iD)#uS%4jbf5#Q&fsGMW%RBcuF7(ONn41De+uTN;(&qTp;jI zo+tE8ZV`DW_lUichb5lLJ0%{;r{p}6Ka=xF`dQ+U^pC_dkr%(>B=H{1j&iM@jr~GcAL;j#>PTqEreco}Aeg6A6{wT7~{Yx|_hyRy@sQ0#Pc%q;a zdi6M#f-FqM#}!+Hl+`RHYLx5HpRTqrsxIZIZ+$) zaXmn<6;$E85Tq3%<|u)FIbz>R#NSnljc8`88CkbllU3UsvTSps z8Et+vy)BHUwIz^6TNatMmC@9;W;Ug5DVyB3flX@L&rI4bGn3Zum`U?fX41rSCXFu` zf!1=c091pb3bX;VW1t;0ANdb0c>lft@qZ`wLFiT>i$$trzF3z`7n{)3#TGPWi49F& zf_*5KU>}N}05a~0BBP!ZGVCd&2|aaW(1U#_daw^g&u-FLat?e;x{IEWZa2?#7rtO^ z4W13vxE?_T&a>-q9Y8w_+CJ^@AG(QVFM)m^_JLT=ks;OxO&Cxkg8@C#Uu8mit1L)& z)lAYEbR_LTPtqC;Ce6V((iqGl^}$L~MUsok-~bpU+2ETbTlEDg5Bx$ZEB_*u6<7xX zRyN|k2+dkh*o14L71sgk9IyyEC`*a#mP3CPau9~F55-!RpHe(;m&AOz# z*_dQo%z+I_w>Xm07H?AA5=IJJl1P3_0ZFzrka%-13GoM*xy=Vju;~i8OM;ERf@jYe z-;<}g4Spo*oYe*Y0d)@U#~2(y{0&{Jb`^0bk1jFf zrO+O87|t_+Gr&W4FCGAUae?ne2k(W*9#mw{4)7*k5PS+AfXC12LR;6O&IP^jA5iD; zLB#*4uM>2ww;~4wzcz68*wdegBObWxd%g4^huyC};&Df|#s zcp+$$A5E}E#u8o#%JDcDFLG1iiBuw>>H@3bi$M7Iz0ctVSbss|G5H=oTG==Wzi|_Zj{@L^J*YpMZbP$2flS5?%q`hX64@ z!G9q54}?81X)5wvW@C3=*&YG)I+BQIvvni44pps5d-+#VfY@K z(RE`OGKVmH&cKVff!6pA*)dPxM<5U5Kk!P(zYxp8_dp&5!4E;7{6$94osN<_z}xYL zW++-S9$M+p$%CU&3eTeoo<{?QXA5*Xp|b=!%b+tz7tt2yk&|*3Ud$Of0Y~TxdgWW3 z{Q_s^Z-5+(*WhI$tcQaCAdjOMeh5=RTkE3lra%|_aH1_ff){fGUd%g`h)>HzJqpmm zWzeaHP6KpW=`?lGDU6L1=#w|l7RN9yj=+mKgtG(qjeQu%d)Xt}!=BM@crd$=5AzCa z`3vs_U`YR_hv7c^0<`#!Y$^ZIxc7Mxw3#hl2=3?jRtN(CUjcKnodZa>8R9)4oO34zbuya%2H{! zESq-83TaGMPCI0^v|ZLj+mt(KM7f8yC=bvkmG!hiWt7&d9HO<(_Rf*+NtSAqniG-RWpLN zXvNbetu)%Gl|$>bifA3yDh+GZ(HiX*TCKg12DSTXKxYlD(Ai4Mb@x-h?iuRS`-FP* zf273*f72oZtaJn2FL^uxx0oro1)1ZPYk}MSRLu2_u>L?-k#_58(sqMMv}J-7ZJg*x z>n3{9nu$TQdSVoNB51R9!R zK?755Y57!F>YwUMOQ(iX&(s)NY??&frWv%*G@m+6OR2-Oh8CDMQ>$q=YcX5Sn&8DW znvbyt^JA>u{2HsZxW{Uy{lTiI@z1HkvC4vHcc$a^Kbz>x9QYq#yDi?kqbzGtuGJRs zAExP0-}K4UGsBt|&2Xfy8J^TJBY+mn2&Y!dIBKy>p(e{LYP2k*2FprTXW77NEjw7X zb;SJA9`o#3 zh&OB$Xo-ymbW;#37lA9I{xULm|s| zn8)%Qnz$T?ZZ6AVr6ALBlOV%!pCH}wtRT(lmN3=nS3#;1{|XN0+zltZ_lN#oZ?u6I z>hB4CH^h8slQt);J8+UwowE*AxlE!;ml;&%Vn-z|u2kgWLxnEElU>y1(?)HWM zg7bwaOB>2DAOBb5rbLx)npEm;L`CivRNy{~^4y&$$HSAdJOU`wJ&e-bV=2ueg{638 zvt*AFF43cwOYmqH#Ch}yVm;OgV?1{WqdnggMtOb)eg^*tqCEM3xr8=2>W}LHQ;op@ zv;oc+pe&7Ok7`ivr9j1AYEU`!SQ&&^ z2W4qSS!&T9u7z^R( z2ha*?P^J=mKNsc6K)I5`q?CZaa~>N$k)pAeb7Z(Rg-6Vxun1QQiSVJ|h+qnejHZCd zWab~4&HN%uIiJV|&O5SO;2F6};1Rh^=pK0#Tn7(@?h(%f9ufQt!g;}QXzYl>SODFi zF%oS6@==xylr0J8aZw5s6|F`Q(fSk`GnqnSW>8S9Ed|CpQ$Va2`Nam3Z(IcV#3eAV zxJ>35SIpew>NvN!PHt}8N`Xt4Sh|oFin$S7+zQ8&5Pk~De|1XE3F%k<826P}C zr~{=SI~w{p#>OH3kC#vg{nI3h0Hm*nsZEU=Nyukb90h63+$5*3G9-u2<(#X3G9>p5ZEX3f;rkOPuZQ2#XOXCUs%M%Xr`;D8H%m2%0I<84*wSyW^ zgs(F|Tqa_k9K3hUL+o1!|Dgo_Ln-`+GEJIQHj!qQPbKSeOR_4rCCdsIno;3R(<_2$ zT16~bRHT!6MKPIHG|<$F#WcBMm`$qK4bHMj<=?PLWsljUQl3pJdBH~1u{R6%1W*C; z@O28bqw*2=6(RO5MeJFD*sBWrK-MbIv^rI?sM8_ydSf!JH>0Wb)-V6^p+J8vDhG*l9?+dgL)PZ7< zSpXjj+M&?)u0-rvgV?Jc`i+QxTLomYKuSjKYGl~1M-$pj$e_c5^gC=wufu_KJ3L6I zGl;Z1V@RtrlQcWaNu#rs)H;`wYUfr`?RbM!IzA%R1&>Iz?HQ@HA)08#&H*JDi^cHb zaLfnEWq9`vZGULH&By!y7R3Jxpx=de@QaXx&?6?zrBYJwQzx}PJzzqreHNtBKNC2S za=#bJaI=&4Cy`Qr0V(#^lR|$F$@Q%#N#6mIEWHfwlBD-B$@O3jNDt<8&}#stAQSB! zTMPdIb@pt<`}a=7|Ijt-g?>NuS0V;pg&2HDjueMwBtNW9a>IHg88#vDumuT+XOUpo zg}Al2`L4yyW-SKvFgkL$9pD$&>;P|Kg7P_d2%cb~^gFfH;n@W8(B4T1&%@i`0in*$ zOA!A<*BrXWtC0h-7BTP!=x>GxuoXEN+m!I2LtwB?6O%6kFbPaUqSI`E2ks~yJfrad zgL)JnK8lKrZp6Rx4|RuD>b66OHt=^6g9dVeSjaGy8>enx)$r8zX|$Vp+5=_ zU?=hr_7EO0;D7|690yR2gDA&AOvn$JU_y*?9I*v%cp!%WTtbF=(Fv_=xAcDaHgy5tC76 z~=-^urx`oEPh05GIOF1Z3JlZ;F7xI7(VhlicGIVvIt8y0m zfLwg3cMV4r8$tC8FXU4kF|?2X@WK=MS_nTx0ltb1xglti`vwS}Opy~}3opb6$@FpX zLkd9)JdqV(GuRFGBl`XS^B<<;`&a{ln470CkN_Vd6aGgb9E1vZ0=0N8xx8-o$>E&!thU$Qzh|$t&KZ1y zyYLAfB6fa`xcyzC?$7ZG{tf>>>rjOMp#SzCn45BOO-?ep1$?U-%~te!(VCCTJ3VxlQhk>FoAH!g32J-Qg5>^=MCEARs4rn@E=|#A1~rRTp%CMl&;5I$?E{u4d1eW3%tjQR_!^rS)%h*ybZ0wEbQO?54E;5?65i-T`~l zjbVkM_63HyCtP^mNq)}Hw$;7XZo0!Z7>^}hr|q(Iqg}DCpHr=C=QQb>Ih{H&r%zY2 zZ_HKpi*(d}nXa^7qr>)_b+YD=-Q(F zZgaKIZJzeJ4QsdCn0C2MXovecZFk?HtsV!p+2a~*^1MYGJRi_{uNSn=`vXn-e5W-& zQ(FCBe1z_64DU}y6Zb}P8DnTW6~twiyqxglcOSgnb;u`J`+eiI&o@K6eG9bHuR`0| zGiIw_tG4)cX_H^SHu{Zdz5f!e^Ixq=|IJzxuurQ3uF{IYn>7)5PRoOSqh&#V){@|F zv^aR`BI6tl{)GO8cq4tE8QQ4Tq3v1~+M^YrLz)O%q-9|fS{k-qi^F$mJp70jh2Nls5qFzLBc3viM80bp zj{M3rjQ=v<01w9CtvD`usXW?%8yMP;ahW+tUiQQg|3|X+fxUA!MtW*pREX9@#c5S^ zx>iKzYk72;mPXfTNp!QuqdPSgJx>c`7HBkPi5A36YB+YghGGwy24k-?^~c>|niu!D zXroQ-pntJ1ZfvNxS3Yw2)(f*m-;sDFGndd_6O}%W4H)(C0omR!WY9c;BOB15B zI3ZbM3E5hhP^8g>N-apN*KlH+h7!9qm^h^V#4*iFTBY8kEvBBN{ig2Zlcu@JXHA_c zkHQgXH$F1)I~ZtUO@AF?(`g(V>vwQ%|@TRtWP8MCVvyj)>@L{q_Gqqji!WY zL2A6_r>1K#HBSSnrJ9#oqrTK8^`v#GJFQ<`X`|{)pD=Z#Z#1=~?=!V#95=OO+-laG z@vvD_=Bw}z__wJkbIR0|G0n4Ro-3dq<}?2TcH}YFoV=_iAIr(#c*ZP^W;kd#!&8Hq z!RpV9R$peadNQ-somHf{S(WO{s#kkfo7%E^)tWu5=Io`WrtEd5#+==zhMcQRbvb9u zYID!SEAXjVP3}(|r%bgu(>#UdT}9k-!hYCN$n$3MvYdR3k-r5w78=a4)4W_a_2dSq zD>p)&xe02|%}`rzzFP81)tpzOro0w4=69(+e@J!ti%qo!lct)2ou;aSW2VZ2(`FTg z55UXt3H)eQQ7~m-GI$)_vn9mcum?7R;lC^<9}8fhz@**+TXhvUtE13IZH1v~DU4N9 zVX7Jmb5w^XR994`+M-6)6nCn+ctDlKW2TCd)u!^2?WVGlBc_s)o6Jf|?>8$heG&d< zR$THgm@+Gw#`FlcIA_X4^)(i%uC-GIb92h-d{t7%{*U#s zDy&aYL4A($8;X_JP^H|4Cgn7CDZ6pLG8>mEqj94ty>Y)Ot?@cjYUACel*VV_173e; zN^O`jrPfcG(%`N-LLFe~g4V(km=9gG=yR-V;JGvV_LMf+sHoXd1i@+Q zJa6hEC);TUm_%Pd>3tz&!QvpUUWuY3m=i^!nfo#`h~nlewOzF zW|hNHw6?+u7@@qq(292TJYw%bV&7r>hf(y$h`;%5@t|e43RvbS|K*B&~ zh?n<7y1XXx}~MigCHD*dXT>2j#>M?K)08D2Itx<+%JaIWGH2PD_by zU_a$t4~x+pg1P;~-e{Mjol97hHctG%l=yoE@#kuuL9exx+j={>u6L5ldM`O|2$a)? zNI7muki&*dD3<-kTFu$mDZ7meWV?~yBHhSuk#4wFvp3u$tM$)o_WBPsd)>FPUi*uz zC#SSyh`tEeSei$Dw=SR#&^A1vjOFY>v5NS6k~J6`(BI4)fNjKpJ7>vux1DTuJA;?3 zcL!9P^1s@3J1dE0g zV30S=?_+c>W8v9R3UPElpSb}Z;0EcpTp&M&uYt%B%`xa(#<@U!SFGVYpv);`JsRDh zD_8?_h&3=r*azb(V(=5}gK_N)1~w}um)U_M7hN|XA2)3i|O4ibMKMITM)>+oMFg4NB;!MH_S zmUAA^_S9S!YfOeA|%lsWGj`oN8xgSRpV zCz$OSH|)w$tl#UUlbzZ2(>c`S^V{enkxd z6m9Yjo&23W_!boKT?+N?BLo(|QO_~X#gmN3lsTO`jz-s?a=JW<{u8wQGne#U6GZWAh|f3T4Q#{v*iA%zfLQ$~ zzQi@etvBL*oF#U?kGTCQV%RtIKL7G(qWa5gP>iFoCdFXde=xiU!-Fv9KN#~-g3(Pt zHwWEvG#ktwTl|8l{|{4Q{)Je3A?82eKm06bqH{mW8HMMPhGrpp)yywxL8qH?45BfL#u7A) zc^H#uY{HY+j>cZnH|Aj+r<>eBmpM!Kc>u5AH*}?Uu=2jZFZh|R#eNw7#T3nWnIDP! zzoKpa&U};)nS=3X>f&8x<3p68RZm&k(C9&9i0(Lw#u8FrfyN{?z6p&TqIuTm?Y-~{{&n1AtKT-N`EynKNl^H=KP9qQtB>f*Q5#Ra^W=ai2Z zP)S)D(da;#;sU+o1K#;Y zr)cR@}KsVyH|T>Jf+*XbkZGPTkczekOZH9ise!h!g@AjFt zx@CroZnW^%^_I~(IV)AiXXWWCt1=z6s@0WN%{nx@QwL}FYX9sZT`_y1_Re0WJ=SZq z%X*7;TJO_#o2#_d=0_2Jep(A!7Iy5Iv2kbMn&%Qu=?8~*wzD_&sTg1+S+U77%TO5Y9 z$zfa@9ad<)!v?K$+@(p!BU(o(+$E%s~IxL=PJ z`3-5I|CmPnS874PX3Y<{LPLQkG!S^J`hykaFOi$Pzb%$T2xe=$;8S#KRBVSTSmg50YqqI0!ouiMPnh z8uGG&yetc!jj!W~pW~&Gh!71&#%L&#y$>RD)E`--zQ{`TM%Ak)s!d%{J(?RmUmekl z)s7d_7PC{Wu}9S$dy|^t?t>T9821tU08{_r=~Qk>;5h7s%_-=Um*wPbF?m@SI}>k* zxd*ZC>W>XjUtFYm;u6#qm!Y|F`Ra%(RXgjY+Txqk8b4Rf2?J_MSfs|pRcc7wrn;oV zs!h5ZK35QmxgI=BU;*Pc^3nsWB}| z^=XN!OV3bEdcLaD%T$$KtICWPRb=$2Jad7{GMB3~Ym-W{_NzGiIu&K#Wh%^m##ET| zXO7>%)HGSd-8u9HZZWQajj#e1@!ov$(MO%k<=B=vOHEn!YT$RyYqR`SofWRCtTZPF2Z~jiQ>7B-=M?95sVH~83iFn#fScp|yepKKe^R;mcbanYpMpQZ*Dz(u&70<~ ze7sQDT|hg)Qdq#>^Qe!x^HEuTh)VOLRh*xsqWnx178I(W zphEct4azI*RBqvba*7r!yJ(%ViuNkA_!?yt-)_n%e!`Sq{4U3@Oc_PL!qi317IID& zaX!Fm7>6O~CLgWPz`xfNnyG?4luL^oRb1?;!s0*`6h|trBtf|)>B?b#PIhUTvP$ce zS=y$IvUy4`TcotINu`$WR!aGCC6}Lt$CXt62aaDVx$GC(v8cR^`^~c1DqwC-UWL1ID*TmQ5vI(_SY=eED!r2R4^_oVt*TZ^Rg03V zdzDl@s>JG5N~qqU`0A?^S97akYaWHS6VRVl zRP(y5l9;24d+zF)%BrzbMy-p|YJHSi7p#=JC?(Y;DX~6N3H61Fudh^GL!)9Fx)jqe ztmuXnifY)V$i}0JXgmWCE4=XyMKpY&i29!uSwE%7I(F$tV>e8qGX?|I#Qx9#Ool$n+Xi)Lm!h51#rlh0V!wX;he6`sVdAe* zOL;7^llvkkxsG|tWh_9>V-XN9r?Ctw`G zk@ceYWwY=b*)IG=wxc}HL2DVzhptXyZ>U7O0PTzc{D=9(|0Be|i_l-p8jNMk0a#(B zIjiQ#Zk025$#zwcY*t0en*DQUugaFysxr;OV6$A+D~nZ&Wxi^YW~@3Q)5=>gfghD= z#oIDX{6l6F-^0&Z+f84De(JgdYWwI1Xd50-(n9uN7)Kvn-xcVuL4O@F;KrG<+H50> z&HN6?W>=YQ_7M|hx!}cck~6q(VPM%pr#1?>WdZQTEm!dGr?@G&A6{lq{u`a(Yxq$U z)bj%9fhOv^0_{AsQ1q!BW@`$NBd=`Rs4t&wTd__+H~=a}eEj>br*eF686sD>(<&68~>R ze;aF1cHzJ5!+$wI41QQ#WZ{@451nja4u(7VI8HuJkdJG~$2IZX6ml^<$slu*&UF%z zlO%ZZApdq69^tdE0d;ZWD~*m(2efko^e zM1!<*+Zz0b4YUDePTs>gKz99(q0h(dug8D6i8&azpnDeXf_pA;FUNB;n1?b8Z^V|H zLVNrX7km*PJQ4EoSQ0m@Tm+5H${weopFrpd5`X+=I1kUm|EP=lt|xoNduapeIhyhY zpzFrRn5r%-L-QOw2oJ;K@Z=?);`lUBBsM(Y2Aw|g0(J5tZS%4>4~@ykYbgW|C3qIJ z$s2UEH>u<|uL2T(;~{ufE!20#m5c=^7ze21kXxC9gsubK*XpvSq4O+UfS2KwOT5bQ zHFyKwg8u{W%w%o|{p5WI=7o6j5IKx_A?f%bm8_GYP5!=&#r?a0O8yM7&y0B>#@Y|o zJY2-^L#Ee&Oz#Pj!{6mZhmAzboy4X6M64sk=KLlJ@#Po9xu4-b`~x54Q)0|d@E<-V zviy)Z^8+H?zYuBvndj|)BIDEhf&M@?r`Mkt>rf2df@%MOJs@OYpDfWf0inH zfE+wa9Q`M%@hf}@_W%4ZjK8O-zW-%5hiHOP z{D>v^11oS@CeheP`rGjdjCmMG=q4xWHmB)6?8nLf4L9Z?YV16<`XGw`&1L;x$jdi) zF`qIw<}b{_c$d0(TQQU;4XpxnD$!`9L>>47y?Tm#JV8Dlqg@`wpL&>loYyAO-btjt zpKg2=>Zdr3&f-U$$A!3nSMXQnV0=dmKRpNI@;nS955K}!@DcnEasMmS#S7HMGpr4I z5-;X4=7K!J8m0625BC%IpCcdll8?J-mpkbncaV>>g99J!RCl=N#z zzZt)y1HYpejX^X<&=^N!1sZEfdn-M2AH(S}yoejfjp58Zic9n=?|w?J`4`8j|L~0& zcB19<7DP{)Pg}6};*;HuFLeuRgKox)ImMne*U=^?m5)Xl8nvX}j7A3mSOCAX2Qhp3^KXswUnJ9>|?>+m$j^0wC)-kyMa8D4JX@0(^Z zB%9fw>4csST4AJ?fKG-BJI3&G z#MWF_&a%~hOBe06^4Bh_NNt~;q^<1Lve~*&o2)Ce!MaZCZJM>#rc;wPeOhfZtW`GS zT4B3V6SkYQ%yyra*dEtnyE7W&_pBH3ThrWm1xAhN(hhbbxy+5} z@L#BQyqMO2EouoksOG@y)D(D^8iSrvL(rep5d1Z;BeX%#lpc#_&Khc7wD(npfGg@#i=VORh>bse+Vv8TQKV%g6q{1+@_|GUNwd+ zP($c))rD?UZP*p62|uao@Hs*GBq zil}uekKU`Ym=h|EIjfS`$KdzyPxuv>9T_vFyArsaN~9lfyRik> z5pAW`XnQqByQ?wUU-i-9s*Q%aY852js{F)9m6!M?e8KBqVCo`g(LBa&#tvAMO5DwHKFlLuUDQh(G{?_WL%f}8 z<6Tsp;G@cf5S1rHt1KZ=r3o1-PRLhLVwnp0o$rFAHsvSvDKB}Ua#L0-CuOIyQ;#Vt z^%i(onW?YC=e+(I*sTlBpnEutzQ}FH3RnmO{N2s_ZRD>hnV2VehN_ZnRFUkc(iBgX zqy(xcB|-%$amr6gRbFb2a#M?ylUA+lv=(Kh_b4-cL>UZ}FvT!4s#;rJyXtgYh`+Vr-bqDV?k&jyPQ^{va(`TzV!(Ig$ZpzE>Q*K76 zviXhiEavBAW@afPt5E68&q>Q_QfhXWQV39zb0(COvsFpCS1K|0W+mi42(Q53;U^{L zaHj;8c z(v_T-ucZ8PCFa*FA-_}c1w)D}Sf<#*&59{J1UD+W@Bu{?zRdAc_)#$hQx`do-p*Xk zhdkN==H*id99e3k3f?c~n9qCJ1?I{qv{hQ6qf&}IlvEU;#G-H|6vrvPI8|}QIf^YQ zQA|m#qD$HpRXU)^(#47>-JtN&0}3m$!HWtl{hPu{|E2H}X70jK(%(|RcvQ&q zX6S(ysD%>9D<RxGi&s#6ngZ+d6i{C-|N2Jxv53aE zVN^a1>;cih9uPdh^lZ3W9u3dIhw`ldUS4=G-mnL)HE1n_KJ;3FC08m#Hy7QEYGU77 z^y~2-n#>g3Y^}f+d-=Dx%CFT|zN}O7X^ob5Ym&U$vgFA+C6BgRx#PvSwa=GJ`wBU? z?}QU_YP&;@ZO_18f_u~#eccL(w3 zTuZzZTe){T%B|Z&u08&8=?Rl_Ppq7J(&X5aFNfYrXpwzyzvlEVm0j;v+4dfT+ho)8 z1iT~L?k{EA^|S1{rnCXAamv{b?P%5k)2)=>NZf^Xd>j5lC;HvQf4$7X?>FJ6Sjl1j z9NEuz)|~lXvKtPR?QjGn%4RrA*25*5JzOuV;ci(DFVf86^|BZ~DD(L@$!z|4%@}%B z=0l&#eDFK?S*s}L2zuSn4Ao8eU@i2+cG_Sr{zDJ3*F5wG(I4g+{3vq}#%9WTiH&A0 zaggN_S6M9ak@=Ef%~%pGvn33&OE{U9P`OJez>*t*TFdoeF4+sBlsGA)Dr4= z06L(7uau#ki?-nbv1OS;=cA9V#~Aucn1isKbr37fWwF{?rqy%kOisX!#u`6vHbNl| z7~m%92N&`D zov(XLpuYht6aEXun6;BR7*r`Y z&kXS9V8!Gr@^O%S93mgaL$E9TxfzYXCrN^QE{JrrW7Ne_5J2T}gfCjdLDQ=BPF3Z$y6^@%L{07c^Ny&-Z}p9u*!;C&d2RlPuoLLVfa{4yBlgHt8`0(O9oI2{eXa(m1Wscd52S|B6 z&~-RW+h6rx{>x41o`G9|WG;FlG_UTYPVTbe#*{j_*MWzGZg?X8cotCvEL?QXbHYDF zLqD_>sEdcFi-+#z{b%4cHLv7cTt^#h!2{Y&AEe2=&~@ZvJRa4IThO>2?gFZifRgWD zf@URrn)XGW;P{jUo(OgF8`|c1SG*8^9^S^{S>zH#)x#j(#Uvbpo8f+VN;R}|0d1VZ z+Y!eZgHF-^D60+Sn?Y7DY8qOG2HCzi|KtKkgO}iCcm;k3ufZE8=Ag`CZipRThzmhv z0D)Wr6Z(q?Xj}0_76JwS+cBVjetZwnmNECjnEPO?zcA*4xIzHLKswQB5wkaH^d0`g z*Z2=#Y5?zJ1aDw5e#8XP<0PKMMm&P;_!4`GtPc^p9>@E*fhX)|iE3oV3gR#Qo>UhcOT1Zt`&_5%(Rm$yrX~ zTd9jP{L9Vc@FrU423qt6R8L*Pcx~jt(EkMf0522w|Ax5#aq8k>*26rAFLgh2LC&!+ z$lb*KcaV?UiTiIOAGgphXXqcNwSe@;@jNC-dlH{u3oWsiA>=4M<9ddaJE#?7hRUn7 z(m#mdjb9j=y2v4phW_7xk(2Tor(8HE(tny$ zinEm*8^e$#dUm)MuB70PP6Qf>Xk=2u1*BX~`Ze^vCN$b{e!9^Zz=;|`V=;cmDr#aA z|LwsIImWI!#w?Y4sG(<=RrM#W#(7%B8MhjJgZF0RKQOGEvq78VwVc|=%^BA5rT7(e zdY1<}LFh!Ik&H$b8ii<-qfyJ*(1b=iCGSCJ5RFmNU4|R8mKxc?4#$VJl=d)I4vljb zjG2C~F|*ILXvR-4b&=mOq@8y_pP~K)$3y&WJQCQ=J}o;fY_!efq|GL8Z7>Vbx*4&W zoRO;4=Gj_hUZfQk<(jal)pCnwEw$*-l9|04pE+M+Gsm^iaN~ez0RI8b`E%(FMS2XR}&aS22GBJwjrUWVPxHQxpQ!PNy1 z$5*|sq3U*xQI}h?I^D9=;Z~q__cFD**Qmw4SxwB1Y4jLWgU7h)JSSD_wM#Wz1gpHy zsM7l(Rr^M`pHWldFk=C)?9wyy8|z# z%{x#n-Vti{j#raUni_og?FAp!KloIt#8-^AVMUKznHStR}sjKL6p_wjxgdFdc8Z2^{eI&;(*;HvrnU)2VL zs3tI4)qzQ>4CJ>L0`pZKSgNw18kGdKs5rPsMZqH~3|XQ4(5=b~J*?cYo8W%sguSGk z@K4}JUh`{Za3h-gqj<~~L);AuU|ux(yw@Jiy*BmI5NfX45L;D;IH@w!Q{|z7DhrKJ zNhrVl7@Df0uxu5C6)8WgN_pW;%8lq!PQ-j=M=nzq6O%Kea5kfEPX5_dq!@z*OU{$3@@T zqi5U(uSz7|gMR3QX5Opg->RsWGV)U#J4*$zcFK$8wyp;0qg&gw|EtJjNoQxz#r6+kPHOXJe%+Euc;*hx zz&5lhgF^m|ZE5h3%#@a9t>iR&C8oJ5A>BuD>A{Lkk5Wtqzk!*Np{R@kMP^n2>mRau z6rMGzu&gx-W#hz<>}wR9eW!x5pMm#z{jEZ>*vTHQApLdeoDUha1GIA++`zFCiXjKs zo<}KJ#2s0)6qjwMm>g$C=XfeICqNN7;R?@7jlv4~4U8gp1sC}$s3=5%MbQc;tk|BAOOvtljyF5y- zg4^U?`h?s{{wVj7ujEnui#&^o{n0VjgUv^0Zr-#9RLQXza?njJ;GUmb;^<( zj29D7K1co)&hoABl1~MDKvYD?t1@1mm}ef91#-uWal?yot?HLc6?;HbZIM&e5jj@f z0*}d|>JM_P{8CPp%pk7d83tO5(VB-=Tj8_^RK}6%M#@Axv4rxM5qnk;`&4o7Uo#W` z1plGlL2mVKa;^83OMQr(`CTa|Iy_(ZFCcDNBvTZm7H_N8sA$VQ3 z^{ig5|4DZEFl*2mrHtLsgkBXCLvAT`P>%mmN!(G5|4>K#-N5tLCg#Am;y<+6$-dnQ z*ax%SUv>;=w(aqdA)EFhS-00}c6+C0wJ(rm`x?z`-zSUq>t)`4pUm4{l11Cc@Qr4+ z{;cI_4N^{HEf`i3Yt5(w%AARIVhyopJ^n)@@plXQ?aYDijwfHuO3HbMoD zP^pb@WaLRV4Zxl0V;v%X8-Yk!sily_n?oi^AP$AhyfR} z4q|+UOiQe|$lEfo*n=|{8&5hn7ux0Fz`(Yg&OSk$4IhxyJM@DI}X zR`V!pE7U;=RcClWiIh3Ak2OGp#J}h|FT(#>ObosZ4`8M6QDAZ=%;rKvU95B9#><5p zATI{IKxq>fm`y1_N8d!jHc^3_mICdu=~_N>j?caXAMw4f)P-gSgD&dc#%R^h*_B?jJz_7>O%J4`@6c9Df!)} zIj{ka^6$6fqdX0?$G(541KnD(SwwxO&!-Pi=3uhyjjr=L^f%+bY)5l9Tmc8+FkA^V zua3=NKFTa^glxDeo`W~y#LX$~a(xgFDCrxgXp>XbK%<_T;Qf7YBb?_3?qzuY;yj!( zvY0)_SVWn_sAnI_>x{1LUh3Y^G_f~bt~{S z0zSTXiTgM{01v`L@CZCc`#fQTAL4))LOz}g1ulZmR{)j%0-frGJ@^+Se1XI+Jf>2# zGtiEut^+Bv8(EygKU*5*I}3)Up+(gZ@J`P|d79%h@GLwBFTjgH)n4>NUZs8Bpr8Dn zKJrITypTvH)YB$^u4jJ85WWbV>o14!MySjWj8zb3V8s8%IuOHuF!l#A*29G36eJOy z<`R>Z<3-ftMYJ&&qlc(rlJtDwA(pmmM!~LG9@l7i2 zbv;G2`wqUrSH$xFyAH*ei}5r}>tFUDW}#`X?}_ie=2`nc@gF|t8TTi|c7Mfxcpv}a zPsH1QAnyM?`FN9jyiNr7D*5;w`S>mQc!~J#0ukc##F@|P6xDS*Ie3^T?-i=?@5IzU z(`5g<7pO58<8mH;r7pgeIc2mZGIqvm@x%LwVE#%HS~+MKdtua2?M+mF2mU}WWia-` z7{MnPC++2=y@qbFiSDtB`1v4y!3k>NG|hRAX!BXR(_hHZ_cZa;{{tfrzrYvp0lfX6 zIT#*%D+rw!bkfksqf3<01#3yMi72{*w0lTJ8+a05kTdqcfa8}i%5Uhu2S%5AS{BrtE#+{g48B-`VWb~VzB4!l3rM@1 z)3S!Nn@GEzw7c;L`bm43bjLV_m*W+zC2rqF4)!yY9H&Ok@alesmkackkBQ@dB!@gZ zfxYl0y6=L~wT)Bj9=MgdxS4)&3NPkbJi2QXN%~2opG7Mdkajs~*We8_;zhKPeiv!8 z7X|s)L%Zx^KFCg-hwZ%D%BQ!G(k5zcBRyjSJ!(C@Z9UqTc^}Q^!8py0Q<1X;O$*9l zi;fdz@FxGkq#r~2$)uV^uPY?|GSaUh{YGlOjr6-nzn^p$Pzy_Fku~@bTk#wK1VZwH>#h_iZ#Gk%CX z7(B(0b~~4xYwWn3zz$w-w&s#%Y0^5AHChg6vHv7(vYhi|DScxx`4}f3i^#`9@-a$% zjF687^pE+RFGHveP{MxGr20%d)oXTCJ!UtlYsP)>g65jzlUsbNxif!-DLueYX4vhA zoOq-F>v+AAVRkwBS!QFdrIxlDH+R$`bJky2F#o{<|6yi=24|*eU?yuYEQ>VHvO>L< zwd$GGqV8E;nrk(nPOCAs&t9#z+1u4>eWjYMZ&Z`@eQLB}mX*ziYOwtd@E_n#7yKtT zE{pD5${F4b?_-R=N1eIVAuq#rmYQ#a|6qgvVB@1+n-F!|MytybFL)}CHte^}M_H>%41oGKkIsKVhR_(2tpQyiys8r{R* z#CtxJ-xqz3^ZC1|mIsvTQY<aAMWAXU3Ys>(H9m9A;3aAW-gYn4h_ zt5o9Ns3P}H72?Gdcq~z#=X&LOU7;MWYnAPNm$H}>ljZ#vWwB387W?#O!wK|ua+zNp zjQI*>I_2QSWc&6j({D@}eruKPzgKAi*C;iB8D{}c!=K?B{^pm$;22t4L+J-$)Im7) z5AD2e;ytGIYa%J=tHon;Q#RYOZbKNxo)7tPg!U*mUf8aJ|AkKoWF~BKaad*K^i18 zHzzULPVq5LijDD9OpL#xV!{*|8v`kdh|5uUT#3TsYZV&bp^$_j1#^=cl(1ESiANQX zcndr#|HQXA{!@VoKLhI*(OH4!a6IjhKtF&gD1m(5%Ysx$g7}1)ib=3lR06+0k?5xI zL|=s^1}lWUj6;$V6`aKShh)}2B(we@rA_`R1M*8*D&N%2@=3i?-l?bM#k?G^)Hitj zIq&^jJ}J!kC+($)oDWH~LozWZR6rqr=Rg`H@HfR&REn9xStk{eX0PBh7X_wyDIh&i z{^{ZJOOKatdYXJP^5xB5#$K7t^33d$N9MTPGdIdD>k!;5*DPjcX1yj?=H<9${v>zi z5{PALZa~$Q9<1X(UKY8Vb$TK%a z9=XYK%gvE1UW`j#gPikvV3C~i*U2&efE@B~ga>7x|2sM4{X>p9LDyQyuEAQ?4fdMd;4G^KFU@KQlx0JNEE*DJ-k7BsjioX*Hd490uoyOT(jSA{ z;7K~(AK?r5K?~5CTfj>whXTk#m+s9xBld|i^25T|LW0{S&Vkfh9 zM>;bXw+=2m9Sms30MkJy>L8&GzGQ50(mBHGb#M?)Q|XVu+guPngYPv!{kEc2159sX z?HP4Ind2yPIAsp3L!aWXg_#`Mi9t!ys(Xgad#$KITd?OM=L{YUat!MI4D|h+*!?tY zKMD2o#eqSND`793;;Xkm<{--K-G)B84&CVY5rYrl z0n9fspv>d~V@-#)V}NtOXm;VE=fz3J1;#M{jgxJRN*=2PzC1>IjBVpHC*U4Fe*r$` zyWg-6G~*(aQ|4SgZp;IUZl(`VW}hzf(Y2=-OxKm=Le^o73-1J$%>YYoxX8x}@?qS3 zt#W11^~NI!!XJqNI{YMcG1SOb4F21zOHuAB9`q)K2c873d91nEx-4t>+ zeRDU7?Ai-Aa>MX2yw2x8Q6pI_qrS7L??gTpPMO&%POj+MPp}4N4f9XdqqiBh!!FnZ z```*V00+&v8JopTxeX5`?eRp&$8q}THDN%TTvLocg3yUk-rvHJ_PCn%xcUV?`<|-0 z7!&(wgCWKuJ`%8$_@DCHqieO5x;Hcpy+d#mt_G4I2*GcG>){koG~H~$LrUu8R@&yQ zGY<^O$DNTto7`Ci6!K0IzvCb`^S8hw@Ulwg<3CW}v1kXaW)8|m^mm|dl<`V1G!3mA z;56I{w_oBej(5Yoa34GX=jpExTjEvFHcvX?Q}{tF*VA2 z)W`n>u}&t1hx(j=@6g%aT>(_?AF0GY-cFQr*?TaK#+(p)@WB5F#wmy+I?cGW7NrU= zq7et7gD9yNe_)U}eMIjQ?Yu|y^GBkgcZkORhv@7rBDL3vOkbt)eoM^#5}7uB8TNUi zYs2?=?h;ROG`xua&cm<7nLqHX{cE}KX+OM(NamU(>|L;3@x zzW^I(jF@>jX|KVT*hK8U3%}qXzQi@CpCOLFAK%~tk>1BN*T4B|dM{98M-*dEjLUiW z7xnQud?afkV+YFPNzQ{QODt8JhDJW=mr?b#cq&bJD#m^oJ$N1iq`iQ&$LJEv=^AV3 zBAaQxJ#?ERcoL^*&O6D;6LhC{iRZtCDVlkj8!zSId$d1>|ACh^lcu%f8!nW=k1~W) zh6M7Si7!wazgPc~s zC0GBTtN#Mi`(PNyAJF{}UIRuh^vo!SH98Jxc#!u%(vRZQOX4)lB8o30?Q&ACA?-%e zZX@lvq&W|-U_LD{#*i_=(6OFVdMCAUB`tG`S9deKJOdvP%l`m8zq!Om=)MAvfpMCh zf$Nxqae}%yh8J@LkM1ygq%bp>^ixPXo3sl_yNtA}NxOkw*h+eHDS02QFiejaXBb^Y zE;i#uTtTgzz>BzzcOJnjcoY5!zY@z&UBoEAajF@oJZB1JGwfh9JP&I$>`C8^^!@QV z!bmZWo}WV6*$meOq+d$wSCM`_>9>&XTxwt*tucZ}u$0TnT5_|KnmLL`a1$QE{qQ3H zYB-BO0_#BFDKzf@WBT1eV2GeRTngwHYw=>%FfV2m{c|O4GJ*fF9RFb{{=*XTv6y^} zk&i{>gD9KyTj`x$)Wjej#3KH`k{-K-Tpgr_uIG$4Tr~D5!k_D*Z+4?^a6d!ZDQ*Q0 zT64*PbquwucyGd-`)$UH#dtAe^v{KOLL>Ne3pihg>7zsBV~~6dkdJ=y(MP+m>VdS| zNw=38S)fjwnRZ&Gjn-?W7qw8^&GhzWMv^AlqnY!xnLOOaP7iCj{0B>Z`@u3sjkA)}FpJ-Rn3bzKt76q!RjS&mK~=LmR5`m} z<;y>8pKiGPy#x_vZw(P%P8>>p&WL4N^s?4rHrFLa1v8z+DU7HH$%u|8=BIVhy zQLgxf8lS&3G_C5QU~0240EaNfp%Uud2yf1ztxhLYNr{h zcCc2Zy}imE+*Ino-ir>wDrRm>p+me19MY8Un4>&L);~D1{=unPS$HuS&Lc{9Ua2&f z?MihyrexP!;1MOcz6oFO`e$H|JG9oJJMKpv_;U*kO;GF0Jtps$lfP2(QsQcUhmomn!A`BL2OAeCB!1P>!dKvOFA=;o+(@ zPxe0W3{r|`gpxhumE@JG1g~7hdzUKCyH2q_9g6W8RJ89>Mfq)3q~Bph_}#2<|MLp- z{~i2;*FW(daW^^>=ne-H?}bnYPz`1LUC8@+@px_^Q zpST;HrDzU>(htMv1L4Ho9E%{2_p-8SU)TZpvTv|&`2F$;e+fR}^}l$Z`S$3K zqt_QfJ3tM$f2B~s>umCq#=j+#mw1Q?ouQ~OYlVl|D=f@eAz_{h4hv9Fc$fmiV-*mQ zD*uRF`9+q?H?mPaOqBPIS}3omwepOOJ2n%z;f zLp1#WiXfN2Ga#9Ni-#zP;P23=Sqj3735<4>KWnA@qFMhC6C$4&)<49s{vkG7p0OqJ zh^v=-T$kM9M&ug5MlSK}zYu>N+$$&M5xBr<{a)WS8>1<|Kb8`{eInO3Tn6K(8&Hc7PJd1#Fdz{!bjohy?T#(N7}w z!i(`twUS$!om|qK@o*sn>ity%pGuC)){wb zcE;229QDDkSh;G{>-H^i$DK=iWbq_&;l=9CP>`&|G_P zksa%l>{zE{%Q_{S+$6}BHD1i@yhhE+>yu^PV$ICkEQ`D&GS9nJGjbo3dG7znBKI?3 zEhidt(WvLvyA1Lm1Cqh;pd!+UJJ9yc!hgsn{>tP2A1}tPXtr#M?KHdCNmj)knpNyC z%i=K2ERK^!ak^%d6v_-Q=HjNRC8HeI!9fg;o8e)29sW+|`|jeNKyBz$qf-RgkOm3B z^e^TP;XmY{pO1ba`o;JUW#+Q3u#zQSjCqxVOjQi%RSfV|oaj|_+$suX7%J5j&<68i z74PkX8{h#d{de#we0On88n%hbAs<+BCZ=17?#+B7{D%T!pCa^2(I-i}Dq`T;8M3UO zC9?(_JQaI97AG3h9T?0ElevioZlVCjY~*z7o|9KeM}<>J!Cz-k!IUDQPv z4cEmNyXL|o*vx0Hf;;*AbMPVG|3=MdRYD|2d~9s20nwGsn%-~o^v zH!=)r=6ETy@Kvab0rD|OK89!$Yk&h=hXP=;NJdqAG_~Y{k>gbz~Qiwx8dHU^=}=$)g;pQiAZ(_{?2 zb}#djOw2j4M$eIm&5q~Aa?+k8 ze%*{`u=irLe?N}EIbytf_|#pvDR=27s&slU(Azkkfy??|lb4U-PsH4BP#3T8to9K5tb4eVHo1+s z?#%yX@4dsbsAm;fdxM0~J19EN zsH2WL>ged`&boYbJkjW@3r^Y>wB-g4|e>Lhml5n znL;#~OmvwfcA;$_G7l5GpC;12ME41xMWTs#)3K(UZq-BN`7t&06UTq;gYg&iJ_eF2 zNp2>&UJiK&>C{CkevwR!naEm{1llByxIYFvqOl{Ac8MU)4HK)8y&kz+;3Rg@h4;y$ zeLn^9fN;3N^Kv- z#2|l&n90dr=o2)P*}-%tie3Jp98seB&(q!x`*24KH*OKRp3o zcbt7&h_La>b+p7*hV?!01c&G`r>PZBc!DrISIU|xhKuL`e*}LAgK|{;tX~?6B;6iB z8kWyc7boZ=$B6rn;Gc)#gx(VNU<~qIkUtIivyeX@ZeTGwUPazYYGDKH!*>4dg&#OZ z&0L@d$?)E*cupSQHiF0CuV7G)JO%1rnc-NKM-Mou!lO_L{(w95wkV?CeNa>p9#Vim2j63<{KS4NjCJg5c?fKR~> zfIh;I8qDzSuFYc<8~}T?nCEH`bEqhYZ3^n>(Lt&qI^ZAhjrI8GTI^T@53(9NR$&J_ z9-!wH^t_HGyn_-emr;sW@Q5X}$6`Ee5uWZwZ*!x^yD<{D;hS#Q5Q9!1T^{WWsR#9$ znt(0*y@AK@I=)-0p(NI-Xu_%R-UkI^u|mO8yry6)mMb`kSBH%g%Z5!7ONUJtuPDwD zixu6(BE?sQo6>5rP-%;pul%N%r+i$@Rq+sWRKmn;RoF$3s| z9GIgsgCy4p# zlf^vsnPRTSd@)Dk6){_b{TDSiiJ6*jh#6XB@Mw96saj!TiZ<&Xv|0b4&H4xJ55PCz zpF)PNv*_ma0b&P_xbRF1Z z`nF;UxiORVS^uCvS-9xC3TK0bVgl=w#>2&oHQXu2kPtlD=%N^96e66AQ-q^&nQ&mA z7zbl^r#AkU&&1v6KZ?#>R`|dO>L0uU7V??!Nw{Lm3~ZT>ty7Iv#S|lLG0Dh4xEPrV zXCo^y-e{y4XFNuXHFgnWjHipy1VN)rmI^1+HNw$syRbJqBu27VjGcLqur*H-HoPp) z+Pn+=fxo{2#NFsWgwE~O_@51J02YC{e3tVb*fIrMCSj|KxssS*rYXjm@$LsRBQe^{ zLO7Y(2uE{A;b1;q*jr2$BQ53#JIf`))^e4w8NO9mTOAN1tj-E6E14KRB2ie5C>9nY zI>CFw!s=^b$=ay1=-7|WEw=cf9kC8r0A_*doSV#dF4!}k>&AjnR%*i0N=Ggl`ygWr#OX9A|tFm`xT7&)F2hK_#1!11ckcjOlV$2Oto z$gd<0e-#D}KZ)bW-;F+L4b-b(p%ebcaT=J2JVq z2s0-$Vd`WpOq?8q@u=~_Xw+0;2p3~8dYRB4yt1XPi=?lYg zCclq_L30sg^RNf=Dj5PFkzgzh9m zp@YL}PqG0{LTi$<(40I&XiQ!x)F-bHYLmALl_`gW@|250X>yoQ{upJP&M$`f4tkG>T8U0hyKb;)#nTkS-yco4P+VCiP@G1;qa~Vi@r<0gVqw!8B zF>e-F0@i}v;1mepJ7!69%c$^P@JH~K*n*A~lx-214WP+Y14nX19f+BBpd zJc|*Xn}KWvoqYv$ArKdtk}fg2f-z-y8;Thryab{{a=Yyly3=`1LPbKdZj!h zd&)cl5oYs=|J=||wQ4RS244;bK+(i1B|4KD4XMchOI=7mGORboxfpn*!Db_lv=O1~ z)gm^o;rtr_pV$z_b=h3s%zf{GFU4}oxDd=jr?eK0?VyAsWwxdo%@?D8>CgPnN@Bp( z=w1glf=vo^C}lVm?AS(|Y{!nB*s+T?*<%G90S&rmE(6eOU^_U@_cD;iwY5B`?{nYh z;+6Th3OZfEME)L4nQbYv1yyKBc}Y(d8taL{H=%bc*a3C}iY4B}j=j{!e(aF)QVwCq zVSMALDO`mO7z6N&qZH;SLXIAQFS!h^avgs8Rv-6#%wEvADSD@JB;`-O4$P7=^XO&G z6>Yzby5A+~1N*^S;0QPdD3+WPIX#R?F#d5?g9$NqNRuKDGkzSj0~5d;KEDosf$^7U z5BCVx@#HaqZR7eE!VSHox3QNq9Mmc}ILtMoygIvR`@QHm1df7};B9aoxC0MBkq83* z#fbzck|RQ$gy5&)v`Z8|8sz}!*ijTRl17Z60O9`ph?WYUQO&gj^0io#sP9o5@IlIK zya)YMnX07sG>~*k`n&$m}42JR3do9jz-$0(Hgh_8ohxAu1862Fh~cL^8Jt<&~3qKy>sNCcyhlCNP1&| zq?1eKdI~s}07*wVr~);h4m5&h&;~jI!=UJ=ZEj)5J!7~K+T`vGazj?|13!&?i^?59 z!3~K4_!s{$v>ry2SX`f9qb>P$-T^+snYerg{KtGEf~EXwzXCpD9g(ZF9%TpI#~W}T z2jD)Az)hTjo47#q;4P1^JBgCpi2hrMDVjLjKo&_o8tXtgkk*1odvX3^!#}X)TkscR ztB?47`47Z*FN6(T%V=~>Cii0&(dt4ZEhR=@LDaYw`J0fx9r=5ZzaRNWh}}=aOwIvbD7Z7zfqwig+^Lu!Sq5KPpgGdBF{jB>_jxWF+p$PY(g&qT{)Dk`R=oyFn zsZ{%Hcn>#ZFGcnWWUs*mHX?r;@^>SDKhfL~+(g3t*veEKV{ zMQjBkKp6NMy&uYT4^diD(@Ohr7E>1m)I}crLmqhtIkZUzaa}5QBvT)WL~;CrO>M76 z_Il)QA)?#`FCpb&97XoqFb`h1bSRc2(xr;1nRX)258)1e;yAPqh8M>_qO%_~fFh6q zl7td=Yav@7+2+W$r3;S2OWX#^~VC!Ek_;(k_) z)7sKHkXgu{kL< zkm_FQ;xRw#mWDlP7<`+$;8!SyM?K`4AlC}mBYO<8U64JMA=DN5^N_y?S<8^Uf+2k! zJ!LCayg`i|q34{(YXdkFi*-v5A~&O!QYzL|w}u2?^lR+>S3Ov7uZ(tD=h6H{pY z$&6}~;iM*$6ChbeSILQGOnCs{udNiC6f%2wN8N{Q^y8u|QQ5PDMw|RWuN@ zhnb04!>oj>BKt2WjuJBz$BXHTlf*QonPRHaJTXQ26){U#*-T}PTN+D)n@&J_INQ`XNnlbyQQ6U76}KP6~bP3vlywjSJ>*E z5;l6i!dfp@jL^#$R_qfqT<OHz#RO~_ zudgJ=>1l|udhETZXCOxDnF&X|5yF8zeC_qdh>`jeg`NIPVXMDT*uce%FxVih4BrsL z4NnLQLvLYj6eG-xa)qf;qcAnR4?Y)WhChho=-7qMbr$%aC3P^I`sX+eOyZm~-;rV< z#&F#z)=4=UstN}~EiuwiPuLk63mZe;KWS(yMi}w_2P5A9V9fg;jOPgplV!r(WSuZG z-7QQ^j|*cnMqIOKVQ7{k49psYf$3fFnJ_eE-yn2sN8f5I{1D6=K^t(K0-UjDEZ>d7 z7DukL#}+$NC1GQ#Ax4<$h~cIN!ot*4n466N_QK43oG>+?B23_7j4fUfMkJ^kTJ97E z!;cF6;g^Np@FPamZ-q7O0Or_G2OKAY@z^t(?;OBL z?6JWfD@z4oX{jpAhiePd;d;VkIO`vVv;M(~^$%8~g~15kfyq9?`qqnup7koBYqMSG z*c=wxHtrx?XxU^4O`B@a2mU0qtl8@i`5V#qnl0@BT)`CJ48~xOBj1e#HrQhY%sFRb zqa=)MG=+h!j?lL?6#91NLeI`x=-N37osrI9y3iiEKxo;&E;Q{o3l01ILf!tHP_qvf zs`g|?+LsFz`(B|k@&lnd@~>hK^4Fnn*+}>gFat~iYwjy!3bahSfVBj?G@p1 z)P%01w$O3X7urrHLd(fYXgcv8PAA^OIckbf8#Py`vT=pVsP#gb7kDU*J}nekBQ?kP@HHW6j-Mu zSf|9IG%y}aL)b#F0&E9IfEVXuK_L#;4qkvS#Rg=*itKq~7>|Gp7|Um;QTPFnI4BFw znT#X;pMZWBegmC2OlV9|70OdJ#jt4%=&VyBF%Hmyq-3fY+%R)8msskZ1`K^piG_dfr-Q)h}E7-3^IL~P0u3!mR2ln#)IS|gZIiQ*Qo&(-wK-m^@`Ai_^fcQ+h zm9XQp6=gP?gnsnuO^5$Lr21?)0E#CTC~>12ji|}P4Lg=l7q3tkOBqO(;=D_#i>2Hs zEwEX-65tcBoB}~yo54U(51w%EmtrY8=7MS97Y@jQzpba>12n1OO!T8yYaTH;&8xTw z{S=KKEoek#9#K3Z zyWn%-hMrkK$^&vnuMF2&r*asRH0Z@waJfM-7J)ANd zyP+Swn&?$t@v~0EFquOJ*amQS`J```5)(jGCL!3d4?7MRz*Cq5I<>SI@eo1}Vf?|J ze0K&OBb@8-%e@`kLpJ{$+>=WtaCDfD4`4En8==Q78kALOJ$fXalD=Kw4X_U!0EfU^ zfGQTpsFM>a@C6!7%CSS*Gxj2byR=E!1uFOaVkT^x0T-Y3=bBV_lUlC7%bw8q;zIn8 zOKd2!DP`89yi)$nupP*jbnXX|u4CXNpeO_uL)^qgj+ek?;0=5gnWU=|q+mxFc7%`M zI2O>jp$HE~V30eAA-*r>+HNuJ6^5$i^aI-1oHFzJM4_>pH87H1seF=7NuQ)k(&Gat z22<3bU?`L$Rm;?Uh@*((n4rKK5>+?@Z8#4j0vKB`5#EDJ&Y*B<$Kfx6xh4bD3YS&1 z0lF>GZLo*-XDCtN61iTfe3DK{pQKCDgZXkUB!OcRpi1TZL?*`^kPnJL8K}aJI_jgr z3^>7k%;Yn5QM;Gm4&SVfg)1o(qtPv`gCYf3Xq}+`&vUOA2n163xJ=H2q;bpwxRRW2 zk@S>;3Qz@VKs{&zZJ-NW0|VG0<$~O`BB*zU|DY~zp>%+;qMw5Gr8B!9;z0hv@dxlD zvDM#+Y5qoDIlnd$sgL0o*~vu9v*0A$U=Ws(slJlEIM%^^Y$jsb0r&9+akI1@9-tBjkph04g9DLtOYb6)_Tmf&U!(Uk@Bz37r0Q!Y*T0#%Xe6GgC&nZj8Tro0o{sD} z$aX{aQY1^iV6Q>`2IOzWHFhI^KW=i282%jEeQ=)$xPug2Nn&NB-Aca1wTAY<@Z;zW zK0@a^pa;}}5|BrnnN3}!Qy1yPm}%r4q{0a$64zbDj#%uF@-U)=l!vjLvaCYxdLl|G z4`Vkje1I7GIFZ#wtdQYevD8W~EhOb&d;op|Lwj((&G8{R+koU=l559ag+@iwf$8Dj2JTy=udAEOk~ z?yW~yWpo%%JB;qPz%Oe+3elM;*E_@kc=ES9b#Z~ZI7f_m4*xtSM$ldDkvj&t(mIf- zl++cu^N_m;nahy5f}vB2__xv`Z@?QIfj2mZrv!*yw9HOgaR(l?1D00o#6P#v zCR^~0E!e>dc4Uvi6P=Mi1^F|OKL>ekc*ZhXWfhie!ejT~Ek|gn3!Dwc!el(C9NggX z|0TykIU>--h()~)J&L49#AdJotQ88#RYRgSa>;*ye^^1j(hA0n*RkU@?0A)Yp=FF8 zOROoM<6_ZeAh%n~Cs7YQrgWi(uCtFY8MC@i$j3o|XU&a_g5saBaV(dq`g zsY~-q_PwPY(6@rA#$scp`+#i~#6&)`)r1(sccZYyQI9$?aexEMoyH(_9~Lg*W85qbs(g`VMgp<@^VQiQf)xzIM~7Fq`H zb0qFY$2#;aW2z%@K-0lQAaO6Fu*IJ5YysP$h~Y-8doWZI<_0Rl)Id`h8|Vll0|Q}b zXeJB{t%bg!qtG`RFZ7J230>m_LdWVV@I;D9Z5d}j@YV~@Fsf-p5v6vie>!q9~MA5FA{o~gdj zH8l}BrmTN3WBr2}>mSTn|6tDg2e=pwHj7lZ*ez5oPY4xDAE9g+CzLD-grY^OP&9uk zl*~U9+mOEkeQv{P2QV3sQX!ncNbIrUJ1cCl0H$UN@Jhpk0l7JPa4|aUTcZsZqcz-6 zXbv|Q8pExHx|NeqBT+$h#7v zEn8T?6nqA70b_u~|JZVV1hB*wGhhM_WN0}IZi)35BUIsWG=+wZj!?5P5GuB&LfMx0 zVA$G=VRqw$g56XqaXtlk9c%$_0S^$(`CQP1f=7Ti_n~X4HDiJe?Eof#QD7wJY_Ns2 zA99G$4-EONZ#_)t*eD5YJ5@LyO`+nz8VmLm~k`$`h2fDlK9_2QD``-!1HLp z?=Z-XWdIpR2OEdeuue&gbEIQCgPC9nSO?w&XZbD^WN}Xezy|ViUPtZ%d)fiG04WE^ z9zC|4hiVh%z?jbloYzG(%N2#jm|^ff%3|0AbsU(%+=YQ|5(CX-I?rSpZ8A5pXN;IU z4J-t!z%Fo#>zJubN~iPHaUV9gd?9$vj+l$=DNdLPBo1gK=d96b$&oFyg#mh@VTJZM z^iN<7h>L<4Hc5$_RB1pSIMaEs%)ky;>OvY|T&X;XSaPL3T<3z?&yL%zIGXXHsV+nRh5ByiD3+ci4x(UE9UZ)_hPvIQx zA#HTA{7rygzIug0v6y@M;AK9O?*}wlavwmNmz)E#oQMx_tHl4XM3wv>V!-*5POtz5!z)n?Xng-)?LTj@uM@0At5)?06G9 z_8BrU8P3rOP@ugCdXpR8e2d9~AJ?TaajNH@=VB^lbwTeajy9Cpj50(03oXj4B9(O| zkaTVUo541)6YK_5EtBjae&Qg)96(+~naaxy2x-qbU@1TO;M(`PgoqmHK4{48+ zF-&4h;WV!EZZUju7HvQqTTs^q)Uy`lRbGQkshpC|T|m-z08k8qilIsPDUN5rS#SZk zgUi5M364U8fB`!~Oqsab^Y>(8`Nae<=h7(oyWjwa2{L&p1=oCrSeH}5XTS@4x&ITh#y8ih&r4HeI>&Jj)G2>MqyKF z%;f#>Bwiq%>q-SFhQb=%#^~0;MCCo)e-Io8Qu!pE-oPJVx?E2*N2-#kI&wfVNCTN5 z2NZx3PyuSGk4E}RgEjnw3!kZrYAUyiLY0RRWb$rtbW7`?^w6z(h|5{^_$!d!$@&d(=2y&}zhwUY7y0aW0{q7`_>Z|T2#fgT{#EiMR>FO(gZtPF_pyVx z{SBy>gYX8&;Xck0VM}XKLNO}=^9qTs+K8(^#ANpN1JXX6QXYiFKYWAU&%npvBk%$| zA?ChITy={$;+C)>G8=^s7o56YItkW5yH9TqW+mLgW&O9bwc*2!nPo19l(-r;Pa0 zpTWzIfy;;J*c&@M=};bYTz5WQptGFAk#zIf9nM zTzLp);Q%$YpBCFk%kCx4d=uT$8jxL}3Vmrna(_P{bw#O5O5Ni)G50ZI%wx34F=D%8 z#QjIG<6rAQrXY7Fa_1t~4X<2E4Xi-+dWO;+{M(N;r{pywy}?%6MP(z5+D7s~HlkZv z1F{aJql;cDAKuPzJPhbH@?mi|b+LfOl{JX;||FC&7NO6Knw+!Fr*9JXPdr z0X<*@ERbzYt=l7eG_ofkdor?TAa4#9xKSIg;w5X~3bxTo`#Ez4p1>dPxk}5GfPNnT zUvV6iL#ls9EV=&ea=k;3De1BNDp*QgNc_VqKj%Lz!AF;1$71YQgdJ|!v5;tc0d~y8 zj=B6}H3%NN0&m#}Kd^_^I>tAicycselndIx$KanpS|c^|NZ2uqN4g?Y4)6+aV|ZJ@ zck`$VeyK$6FZ_c(@=bsR@~x3?kNnZdpMbo{Fc7YA1Cm9{U;@_Sy*n6L-lEkmas_KJ z=+)!t{o~<4#={6tz=l)kdQ+LYS4F?t(3EIC$60)y!TIUfGmW~KihoX_O{U-*?BS&YZ5zEq2;9&qJvwl)-#c-**P&jgiWVuB)jF489BXp?dH##nrG41IPq3A;ZR%f#=^~JRvSf=# zax9o*^4%!xaZ*+tQ6*n z;R@`(sKEY<3cJB^VXoj!503=}pdGvqzG2_lp()@prW^D1sDB`Z3gh`a8e1H}NNlmw zQWQ3tD#Aux9X>@%j8I|kMfjNE%ErP{*+N(-TLTASt~^$lDNh!rDtL{`60k-XtL_v= zsz-&Ps;4ke<^2yTy#GPDNf;>c{s+Y`pIh)74ly~9`c>M~z2fBd`-&^Sn zgX2&Z7H}~Zni|4fgLglW8)K?&AWYOvg|Yf@VWe&+j5OGPQG@*#HC%;(CQP2@YN4ma z2%~jW=xBL@D?(c{7c>ZMjl19zu?_hv(C21=9~e>xU>tDb^GMEH143(I!P+DcU7{Qy6M#3j-}ZU?lXlEQG$cjnE?y(1AU=kU@X)PErFd-GaMsSjV23aexy}0epM(MZxX|d_X`E%3kV7WnFLU^ z;1-kEKZ`Z!Sc1-3rnCbX3mkwg=Y|7wY%w7h#|RF{z)%H#iQEgIW61goBMqTptPQ85 zCsa%fg|dmcP%^a^il$Ctn3;=EFq?&dSHODkCO8X1C{zZhrE;;s^b__U#uv;8AI)h8 zFdB>m68~d~EoOXgY&=XD8Y{y!slYX2ei2>sEf^ClX$Rm4Z22tlKjy%c?~P0qg#jFt z9yvieK+9Yio=H`x@@{WMD+Uj1ILXYJCQQuXe}F#Fu~2|}BLBjQJt(Y|g`zEks2zi; zJ%fxikT~L8ju`D^4V=IvKiIAezk1O6oHVhVL3k<3%6i>U~jN_$M52He0}zTXGjsq|Q`FXe$Sz`b9H z*(3RgY$*>&;(%<>H5{F$z>v>$cXE&5KSmRSjfMZ9Xgnyn2~1I-0N61DJ6y3t%1dF5 zkbI$|wBS%8c%=0~bLT;Rt^m8ahQ2W?lIsg;fa~1*7x{ib&gl4e4#*t6QVtT-uh2#> z%S45;q<1pF&6qGC1Iz|9E12}fv3x(Eaa5TT`s}b}I7bscvm|qf|3R-J)yd=dXPq1u05`A%ECsKE*RW#+ z^|49=u7EaK$H2CZx>$=)=|$vgxna$ExROI$xOie`rF5MaY)Q`5K^$Gff@Rhl?|* z{#R}YV&xO@eH;&hBgzB@noLyn0RDN3#yv@0oLtQL&2R~{#}V4&tsHVm+QbC(I?%>e zw6PJU>QK*8{*7F(R8Fak+{T1{h?CgM@c?)W90A9`NpKqAZiF;HuUo-a{jnoRpZJ|N zk&WdumFi1DeD;&y;6*@@$h8$>H11-LTzMn&9u^_$H9bb`(gu-UgDM zOTZI&13w@G6pd-?zj%;Dd?ZZ^4g}vwvn7b6z{wOaVF!5}7#tT4uaL_-xEPbst&eVX zsm$BJ8vxgl|KKa>r1|9f{5b{zNlzq*1~e;E#G#y!G>(}d7Zd`TThve=P4txp`bqsL zI1nnkin^#kK`F);CyC+cmexUOqFX^K^I;&>t2^)kfdF#}c>l$NWbk(ukaQJ*5>O7R zKpkiT?VuM7fcwg9@PKbTV9>ia5e|ex5A1{kp@7$<0{z$j6cm+|Rn*iqG_`ef^$iS- zOiaxzEQecJ+t}LKJ2;LSJ!b6q3C=E)CQq3*ea6gLv**m6zhL2_#Y1@}uHo?hNQe*OWnpx}_O z@QBFhD>1QG;}epSQ&Q72GP81W^9l-!ic8DNE32w&>gpRBn_F7jJ3701daqsYzcFy@ z_MN-;A3S{YHlG7XG_2!b>kC6O+B#!SSdHy)b^(W%{QzhqjmAro* z?(Zh~|57@@Yf=wbEp>qn^noo>C)g?Vf;Z^~2c&**MCu49=?QO3UEv~q;j+{je5KwH zNOuU8`a_h|A>!x}iBgwHllnw9og!cA6(w|w3aMYzN*$w#p3x?Cjc%!L^wBwPO14Kb}h+ z)Yt!}!v3FX?El{?`~R-7|JMrpZ}lY<6ow5`R8+!eR8&>f)YS1CO)V{LTt`<=Pap3w zG{S#OOz|Ld3u*)>!i#Vt{0K*qJW1|KL%#IioJsCWxYBE{OG7Cm>6UFfcI|m{-+@C% zj-5FD_IY;?FCYKF;IN3On7H`F-jaEsQ!jYMLo5vs@f_KG&Po7?5fU_^;c#G+$qZlcu<@w zds3Jj_`Dz~_}#pO(D!rW!#~K5i~8`l;&&T{>hSk^_`M$fy)XUl55N1v@BZ+Af4=Xpm%cTfJjRCiZ)wf^(=DA{Px`bX`&-6g!)w(o zRr{OkOD=X-6$IQU!-isPD8z=m6xp-vf`P$RDzy2pjU#0v_k2%ARE=%ic*#3Vb&?G3dRk%qxE@ z9^Fv4czQ>3#=Cd4c0QDBXd7MBQU7{PQ{{n{+LDVsmDo^9Jrrd6KgdlFc$}3Q@H9Oo z;CTvmBw$Aje(_uJ@P@ki6Kr^TPiyD>8=8^V+eR0+)vu^&s65zOUE>i;wmJL0e-nmP7w#RF`3d{=Y&<9nJr?%vRh?rj}g+)}ryw!Y#} zYgMs(PZ>58=J?;v&GNgSnc?>^HQn!VQkvhBtJrY`J0g;U-ubP#KcH^@5E~xe*W7Wt zUn8cgWqe6f-Rip9ibHLc*ieEE`Ph)1<##VF)AvDghVR38?1;gRNZKVVCGgpA#k~Oy zvj_Jyr`^A=x$|bfdR%*pOKC&x+PdnB!);~7mwJl|{QGlr{cdGs``$^(^1YXU9kJLE zo$mK2Jk9@ca9ZG#--^38HB9f_*PMFip2nX38|v{b&6CS(YuDFTlpkp?F22-LfDKuB zzBf~IeQzgb``nGo^0^nC>3csS)9*n@hX2FBblIcdircp|Ozzy*oPP73#@?Pj&E)!~ z85LDEn;MGCkG2<-T84jV&aEo0-rAU7eylyG^io%5v2Sm3p-LfJR_f`d+b>6U=eOdo&lD@DH7BMN={LJR!*gMcjG|At?Fz<^I);BUsYJL;C*4>T6G-qSc)-ZdgO zyM9?sdd;rJl*;2R$>o>Y6U%%%V@iFyB1?RGLW})+gNproWkvqi{EGsv`4$FV_bv?j z&FH$TX4CdieQDi&jdS_8Os}T2yi$@-v!~%|^@*mqO81ty3g5P$Q(nN0k?*=Fr0 zwWHi`$p#r{{H6!|4T{k8brhM~Iqy&itAhkx%wzx%`Q{_sD~A9SVXg^T=oQRgH*FAC+) zi>&|hyy*S;d2#dS=f%CU(9j2^LE(>zWzkP~UOX-IPyW^Tu3gvoyLJ?BPjnXqKIpA7NLcop6IN7c8D}i^*qJkcjgoivT4h?%!6cqKm zFfjg|Tv^hyUyExi|V)H`6q!or^1t(6_wM;lr)u_0a7RhJTQy*epi zpdy~%5MpKbORfYyDvk<%S{M=fJU=w*MP6vai_GAp=f4(`4c~ScyL>0v&|r@Z3yOMj zw_w8&Y`E0gm?`V3O$)eQl^k%hJTc&2X}s)Vaa`cj!kD1v`O%>-a-$;ukQEvKURqev z^IwaeM#+rqj^n zZ|#^htvLpAchL-G$tqnQ8-8I<({S}#TG3fyh3sYrJbKxJd;2+Z9 zK@#CX;u50Yk4s8?_t)Y}$%ZaN=f8DWtofwb1{>xU^%QKV?8w80-1Dssc|P4WxdAsS z_)V)QBj90PTELU66xs81V(uhj|2XW3P7Hr9Dmm`mUyHwVX&HXqZQ%S(hdDM_yZ05% zF6t>3?0~xk*pN+(nNG}|oD%RXJ~`l>E6IWHMkIy48EDcgMg1L}E)A+)OYWkt|d8z%MiCSL5k5b%Rg)wa0(jr@!jcUIX_VHDd~{)h;S) zsM*|5T7JBv02^`&{rb`hd~YP=`3%Iu0YqU-c#iMwkQ~1|f%r)P{_2+<_^a{wy1M?0 z0nKqA4d|}?xL@buK$BfwN7L-`^6DK;`Q;}&a!M|DXAu7<75ZF{EAZ)yF7WM-fR6~x z_Z8+hy2;=y%wy~npS#y-8Lx#IC{%?ksaCV7<&ORDp$_B3ae zpKebtyWE)~kN;8oS*MH^! z`rrY66>i;7*S>vMV-#G>n(jLqJ_ViQ^HORz)+JUSXiTU&(-L2Cxizlbw>`SduOqzF zuQR07uPdm;zgt!k(CuF$>+vm?_4pJA^>`Hr_x@Vk7*N;hzo#+2_rAvFipQFsDFb5@ zuC}i#jIG;W8&iFz=}MJH%auyM)`$wfwvclF_Mq~B4p~`1hku!@!>=r`)2A$`)4Mdd z)2lS(SEKiqnqK#P^(l2vG`41ZWbk&}Q-{EaTZ^xT^={7$Z9iHV+I+q|q`|8yu+FE} zzt*qbx5mH0r#hg~yIR)7*=EnGpyta}!7Uz@A-@_OcT`Q9AF8?L{#9pv;$JNfM1DB_ zT;MzN{rnzm2=}?QKhC@VRI793`!#DqQhhM>|_JG2Z?Xtp? zZ9zpR+Jb*I2J4J;2J1#R4OWd=@?Gh)gYV`p4DU_cQ{I($vZf>Pa(!E(thpsVu&pU3 zsIwt5xTh{O^jd9Dcwdby@_$~!#LdcpsM{63F?Y+o|X|QG# zHcUD6PTu^8-sD|FHYB+>w5IsAG$qM88scT$buoe0Ya)YgRE34yst69dQ!b0VSLT1^ zL8)*2!&0BrM+M&L4|2UT?!7Gjiw)(I551E=FH&wp^6{FEl#30msXi@DDY6cD^WNGx zc=IcPH!CB8@05pz-uoA49{;4kFXc(LZ~B8Q-;Db&i@^p1oxui6r@>mMCEt}#!iKq# zy(v4(yONL9w5OhLXi4*y^B{Fe@F4NB81ZEzf{bzJgo#l%DJ6wJYf9YZ#xooi@G_xcwPk_ZoSRd%N=Ch$Q? zRPf`X@X%)kp;7PVg(SSl2ugmM5}5WdDKP#1%i^C6`Z|M+7LJ2;k_|4{FdG}Tmv^Nc zt!YnxTmBoy&pb#UJjku`gn)Y`S7nciVuGF(M2Ehc7a8@3tcdvcQbLoTC4{6siVseI z@Ur-4qrT2RxVSgvSsMk|F0oZkHv&K_rj|5g+t! zo}2@T{xCHr{=J0Aq-SyAsgGjA(jUAme)`3RddUW7Y;eVfZRK5Qhick0Pr*UBxBVLr za;G#k;9(KJFXtr%zsMr*Ae9`9Bsh?`*rexIqEjD7N2NV{Szv?qPfg~I|E#xP@@>_G zAsf=LA^kv2d-e%9hzo5Ex$q!4a1dGKgJc9e$|naRiP_2P-j%tpx;+POF8^FxL%w%+O`hyV*$@Zu zDCg%Kh{R;syRq;wk%^|35H`O(Ybhh`n^A6iTter&Q?{B7-+L(fX5T)CFDzPzV! zQ&nf>&f3O`qjC397JN4@BLW#hsgiMKLkGZPY-?Un;!khHzV%h%i{YM z9j)(MjU9eywqE>goztObWfQMl%X_`Fw_-&_d)=nGs`BIHAV?g9Uw;N1L~^dr?Kn7y zC~Sd$xc3YH;F}rzz&kVIfmdeqOXHh1ZLROx4DG*fwOaITgTtYx6%%6mO6M0}Yo1eD zTeH2Pg!n(d_;OD+IS8rz_8-r0|5xB3BKYke{^52Y9Faf1>YE*O*DE{pu4i`mOXIV4 zO|8Fm7}$T?Zn5aQX4^wgt47BTG>yt{Y;>zEsovd`SAMEJ8xA5J4kEG0=Xxx82$66Q zVQ>(^`9Jdyc>y>1{r{#{Zpbaq+|ZZCi!OD|ue$X{e%)o__D%cngYR}2hqt$lEGn&k zttO{xZ*xY)nYOet&(36G|F~k`o@i_dhldEEUIGjK`vQPpfvnG`Ah6%7Ah_SNAmpX- zv|CN%)9c!HpITGjt<>i(bV*ki;zxFT?T;|^qNc;~6&nVH#M4Q# z;JdF~3GCmQB~fP&`K2b`3in3;%8QKwRTmm$Rp%Q5 ztIyR3gOHk+#r1wQm1}p^?J8fWFHHNx()`hPWVzvrhbPF(!c*7I+_x7YK;JBNLbK0PUW>&e;RgO4tR z?tkbJw(o&w_}=^85pUl2jeO&tf8?Hffl<5f21oC{8}iZ^EH_ghEFU>yuweu)wRb3H?~HHG`EC@wKau=cQgh@bv5{3>8|&Q?Wy;Q z@2&GpyjJIta=rFaMqjOaZeP{K{FlP-HVoC*@AdF|J^Xv$`Jdqrrs{(gc8m`$j1O}* zf0p^``NxS{ulB~AEborHSlxNmr>;FdsIfIBxTQHVq`fgLtg|5~qPspIs<+NJ=6bF7 z)xH|f#QqwOlpEETGH+D6=MI#*=k>n~1}h{RY)1^1I=c=Q%-;N2*0Kwa6Su_o#Ga_= zx_Yj8mYMWZ(f?Aud1a~w^CE zn?K8b<-$YBhO0*^x)M&;bR=DZi}7o2PLj1ZUJdN7yApJ*CL*}MDkSV?r7ZGJxnIn^ zGM|L|W!`BI3cNDz<#=V^&h*M1co_^<8mkRfVM94K6wTcHY4(zf50f?}_Qbzc(Uk;m zo_w+X7apV`f&9VPz`p9Jpn=M;klW=!;rB`dt~@O9OL$xe50m4QaX-T+>vpP7&cMrH zuu8JQ8XLw9*^smN;)CQ(iMrQ+? z&dryB+=l8AR)ZB|T?dP2Z22^25%qu#NqZ~cAgVjk&kW_oWCnEA{gMw7FS}O~8~C^| zI^=m?MAZA4VF@o%LQ#F^Q2T5iY)-FN~R6jkh(UxH)U5vcgBJ0j%;$2axSzs=J<5il8aH9K`usGz$0lrN*1{f z$?;(yBqYYa7ZaQGEIJzQAu{t`M0obCmx0`d8Y^rVJ!C`PLiY!0tCM?Eca(Q$y;3 zDr_j7y5*C+1?~?rRwT(eh@4$j9R&yLTZ>P%G?rZIVqHu>9K;mV}uy+4T@h!{8siGK)!e+Wp8dMZm#dL&CtzaNm4b?0R; zWP_#EV6_uAOy2TI!Q4v^^A;su&s$mATeh~MyKZxJedUqXa@HV7>mahpgGkNsy>pe^ zgD5x%)*irl1U>Rg4}0X35&O_LEA_r_M&_ND!CR_G4@WC2~nS!+Jg?HMlBd&Ma=2bT=ttzbE)09(jsx7nBqce@){o{*#u3aJjAiQ8G z|6pkSfo#A#FKECsFZ8BIUgUs#e(X!($3`91ADfIV|Ji6gW3YDgmM@#_PrtaQ<6qxC zrl6pHWldJ~-lnvQ)2%7xmpc-P{pC3b@DTDf2tVf^_!Y{ewFiBkg&}<&h2eefg;6hs zzcy>BeBWYV@k5K{^dB1>wtRHc@W8bvnt{cglXBDRHq<4PgOFG$&p~Lr`d@Mo{JR3l zLGULB!IvC_pYsoTE*FROUMh}wDZFS?Q~q0ruKBkeCeuE@X1r?Pxz@hwr@FF?>(kN_ zS~ir$)gG#kseb$C90Xr^4nn9r2f@FcbqMkt1mCj24k-u0t1P(Fvoy5Rqcr@b@T5ak z`H#JtW?x*>n{wx==HiAw>m0~=*UbCso#`==*Vd+ockIm%YdKjG+H|pk9E9q?I`7(m zI^Q}!@(_H;L-4K%;H<3CvpTTRqdKVRQdLNkdsS%j%iw;Os?x)Lbz{~hIW_-5Ygzs` zMtkEwbT}9Ge5Swb{>ljdTf5?XZyZVTxqc??a__l}OFb8}E_7eYIp5`(d$!Xn@9hqs zd~y*APPh9P%5xFg0*g+x1{c2!ZuO`RyFH+8(Dp=qT+w&BOA~(>u`T?IiEsITwD4`u zKdkcbcxR{I#U}@Y&OSIAe)`_2s1tY2UO9gILhR97m*S4xyc~acz$^Z(0iT3JH~bP0 z-UvuK&>xt*zc1va(05Huq2sQqVabQ;|($hPd#LNo_YoDe&Q3f>xo~`uEznvJ01sx z?06U&`ZDP1Q&lW~s$`rxs68rb&}w?%-~>1C!8t43e_For?B6%+JofeWy$8PBbLh># z96Y-F(_<%g{`u6Y?H`{#v+bjcZ*Tq35sAt8~+%5al;2; z?k|JEg5k=81x}`exzn5nGZwx2L-LxvpTzC-cpiN;`c~Ar?4Bs^s;;QOn$F0uy7sV$ zhPI%nrq+Nf%`LvMtu5a1ZOxvE?adx3?M;``JDM(Lbu?Yb>ufq-+*xTuSe0xR~8jf1#kK`a)4p`Gw-H{}h9T7TDlmI+!onu<+IIQ&;Z$ zIDVVw)7X8nH)2lYcgMI_b;bJBcEklYv_*$B!IQQ&2S>Cw1w?l?_{Mg_k@nPIPVTL{ zlzy$wJ?DD$g@Ws47YlofE*5wHrx+|UR~{^~HyO;I>^zvg;Ab1QdOf-NM(mB)V+Gx@ z=c_v7ylUI81~$OOG`B>Cwl#%@cQnYNy6gR7dh5KeUa!5J+*flcqrV3Jr3{X?$i3iN z-ld}M{}lhrhJs0w4a>hzTe1J6gw0-0;&;dO#~mr`jyqe`8GpI9Jt3g6H7=;7Iao^(2ZeOkg@j+L35e>i@`<}i zzTWMM%UO4dJhE@)dF0;6x}1MK-Lt6YKgD3Nnew1yL*Ybhm^WlY@_Mf)N!#N3;tv#c zC!DP6OuA6lmgLjek`&lVUQB0wbZ{?xP`{KDQ{fwXr`#*~UXdp`D4yB3vpjPL(!BEf zQoIU#{!H^=oQ?k(z0I#$)0e6Fr7#jB|~Mb_4sz`7Lj zVyYv82P%TY@09t+JSg!^d6es&em~PY>rSe7?m&`HeqVx5Vefy6!D3V8!4k;^7w5s; zx!AD$z(=X8y`H3Qi0ez*Q`DV&xT-VtOkG==M^kfJKzl>-|Ha;0hP8c$egCPfvaV|< zUDvK%yEXO&;LcY`+pqIaXhrV zd|uoyKCk)gO1k9y{=VmxFZ-n3C4#7Ph0&a${3zkYEVK{wKN(B{%3vhn*Wx$RR0 z3Kq}&;*j^BL#n3hJ(&*mqSPR^E7>NmEd^WJEb~(~q!MdtQ|XPBQbzli-JICS^92Id zNKUk1EEVnIB%&UQBr@%MY-C2CC^EZC7@6BXRsMBY^&8}E+Xh)jG~cFbxZX=WOSzb; z8`~|lL~ksol4iMAMMHW>O-*`OlTyy;D3vn$3X>u)xQsrA zD7$N_K;ajMMNqJ18{{0+e4C-_`XKi>>9XQ5|D57vMpv17ep`)xQDvD+bzxyhb1pgu zIhy|y(d;ik9;kb8Av~2m9F{5^rpc3rXsPl6T5?7oJwB^@szBkdGofh3B2aAJ3b_X~ z-)0|o8CM)4TrJ00wihh|1#eXh>!cAKtZ=crtyhBcp%65m`sNWbh z(z7zIuKq|tA<99rOFZh*3-L{<==neDAan@P>_5>znAm-wp}mKR&H*`7aE_Q2-$%?& z?ImVrbx##2{&^;pEMEjg8@EFKUagNsd+eXp{_S_CcLk-RXH`N`&8gz73hTw1e=ae-M$+ zXs6{xP4o>q$a&En#5{fnAuqNAm!I4|RiJF)3@BgnE0nF>0;QYPpSP|s9KQ0iedE|K z)V$uU@hJ^Dx$#P;ir6y0YH=yCR#Z%@pXeZ<`G2B+(2VXtK)r*gFP($X0!}NbAf}a2 zAZ*1eBrQ_~Dt?>+%HMv0ij`a5)vq{l`QlHiE#p5s7mfW+mGA|a)9WaO)S9rO z&}w=KsXDxrP{k<2S4NfLDp}>hm7MY*CAT6#DVQ>s>t;=V+wkL0k2`)`-F<1^*7CPM zX{Enf?ht!#b0BBz2$eCS%c2e1^25$~#D@0zClI@XlW?7QX>bQg7T6w|=HC`3_iLr6 z`?N-+d$&Ypc(pJyJ)1e1?#(e-Q{|tAS<^1H&G`}SqAXJ0`(bAWEYgU3zuuDbbRRbS z{^@|wTLuK;m@PHrvTJz2sArVVh%d`)IFRFUA%yETM2Nu-l6fwJ6u$F$n!xFNxX^Kc zA#~_xitPKN#ZzUVY1Xvf?z!Jpjm}<@_4&sg5?HPh4cqj>-ygFizdY-Rdur6RbC;bsuY{sxY0dz~0+bB#=~9-~sN#%N)dS0iW^SEA_VS2*EQ zrMqPorg~u3?CiUIAI>7~7+{c~3ro2Q;` zR!@9AEFJ}Vnm-EhHhW0$F&!uSnmnNT89$)=8Qo|28{TII7~G2voGLY)-(hkuO`8)B z^M2#Pk~QJ5X$u(+>FN}+Jm{i^=CU{^N-GErXSobjNbWJ8NLm$)_)UX zqxXhjtNWU4r}K(xuk(uTp#75Jc=jdBN$W+7^Hc%p?=v8E^FojwS_g^hhauL);4RS8oGc7Vf3?O}6r+L^qfmPlcFbGTU9M3YoDQ4=Z~LzAl-NwVrjg1ou` zpIP02%c*GyDX3`(F0HN!nHvA=@b5YO?>YQ;4l?wfOxuLslLygzQVqQ)jSb!k99$pr zvH0sef94367}p<7mv=?8^V>Kvr7cWBMRSBm*+h@4Y@{YsHHIctH;|+?4Fq`&dQa9i z;PPr4LJDhZLrQ9tQv|*^{Jjul2i8H7+7XB|GJGSn!;TA`3D@}E>|tI=d|ymhMh7>l zu!YMlZ)Wi-nj(djjr7>6hOqdWhR~$idXluRo*=KQ!)Mjkh2+=Qh7{E)LyGH4aV51= z0aDR9$TuQ~{m9|W5s)AUu{CyF*`3-`dUJILoF_+p*EzjL5VADD8&`kEAYj&QvuTcb4W!F$)}Nnq0yUI zOYC^8EpbeYCH3%$Ij#I~l$El}o4DM{Mi#%ifg!4`qs7(NQW6_#Nz%p|LV8mT zF1M)?Wuv7yMPnhpupy67TsIYv!*odhYayiWTL&q~A>P2~jl>)~F0m$##X3a~iM*4# zg}B^S0m^HI%!(#nbY%lO=1V43OOI`s$fK%BQj|qyv{d2qTTAf;EegD%DUVRtkVP!2 zp9&!V&mnCuaya#`L%a!gJi&rECb5qmigi!!76s+C3Mr+{qR5Iy0jH`Vnmdt6HPnQQ znkMq7N|Lm#l91J2ibreNC{XDZo#n*rt^z_%M-DNs zErXQbl15fEO36j_QvuS^Ib^I~0P;QmIwa~DzezU0jwc%t#*)oBLkW(_T@vrS);N?) zNy5H#gzALs`Wn;^s*L2fqh3&V8Ciz9K$*SSr0lMAa&CKSXnspdsG=!}QdB<`AOksM z{*QyoQApA?dXu8(IxaOJT$7q`hLUZPyAs^&h6F}UZ7j2)ipOrPV8(Qn z(qqpRqg|vd)Wu1oWOYj^x$Q~R{MH1jqDc}~R6iB|by&9m(s!=|8FEP0HhL@5c72$t zi@%np&%PkDNa{{@&TC8ZEp1LlnNTvlrX~UPQbf%5G7hJ&FhV?(9VQ)?g~|Jq!m_&K zX*nGdT7GLRUC|__7dA|V|I^{*QAp7;dYh`{@-Y1@?s|qc>w?@ku}5Z?(#RBH}JeKg1l#wzLA0Z!zjmYc~MdWk{8F{S&MnN;5QP?mQAQQb7 zS!)+S#;$dciX5aGhVSGW&W|!RLT+ShGKaJD;(IeJvfI;9Hzm`jyde``QC52 zE0Q9I(_=+ z1mDa*6L~RTGwxiTVMa%uwW3YoTGCYHUs+d7YA8lsh;-D05Tg4hx$#M(9MrvF^E3Kb zG1*D34)W6m@3U1MpX93q-YPm7eyLPdc)m3eND65+x$mxjW<+n!0 zD4M3izYc5ub2xq!GF0?G$u)W#bLV_fy!}fKMe)K5n?dd@2KC9QYyWGaR zb_8A?-5A!=w_j4+sFRzibf}P(`q!WiKw})bgK%R1U}E;CWV3sS*@7N4^LOKNGrI7Z zIUU5*+;&oOe#=yVoZqHH?y4Uld+R#LJbV-iQ3g@9-Rw!nX8WrbH+XhmULR53b2uTZ z(J(iq(z!gLETCrM9s&{CL*Sqt1T_1%QPJE_%8NlWe;k_mQ`&>`Guwl6^4f6e`7Kic zau-d9yp=yf?v}NXyY~>3Z&Q2G@|WJV;q}Hnw^sXBkNy>z({myrrP(A~QiClPR|HiG zODVNHw1>b#&;KX(4kmUD$OYX0X8!n=5QVfQP?6aZkYCU|6(H}|>5#wTN66c>77DlR zdtblq#I1pqr}}O!x2V0pIv{^^b5u&-DM@Ubxm?iThBAPlk{D%Z1*e=|9aS1p8(th) zkM2Hf3@wUooS6CXMdC(WQ9@%#v8*AWD6?@YK*7(`K(Ty26#TUolxuf9=w7;~|HfjK z`lrjS75CN!N-ynThzHb!FXDHNM9B_rq~!!QM4;Pjn5b{SDe$g~QFzq} z6&`gGg?pWJN{ia-9eD-D*bi9c>OWxqJ&M1QF6O}=XsguiJ^3>Nj103&q z1=-*653#)+jI+6gCs^Gi6D@C0NftN4$>!H1L(Q(SC??lpD8^$V>Qor2{SMR8F&k5Q z@w@NSKF?n!hNYWXu<0NT4yqDC?Q9_E8+d&(F?D%wW$Ez7-p2N|i@nuLcSnmC-cDxE z{as9+1-lwQ!()w}lH3fRP}~ik(B1VPGdy%3vpjVk#duAHwubL8rTsIfrQMwVy%-k$ z&VrR|!(jV1JRIBS2Wm&%KwHHT4Arc`Ov?gnbWFif-w3dV2H_fvqW&c_fl z?GFU=vmZh&wBFM!HQz;8slQ`ctG$h}nF?j?(=qZ<%rr5~n$3h?f1<+Lr8wBN+6NB( z?FuJ%*n!4gOVB%H0w%`{!0Mzf*sE%T%V{m}P}2Y(b#(~PP=jF2(?HNV1)*nEfu^kj z4DFM^(m4S<-KhXP^#2FZhIt@5vyVtm89|kKcV3o_r%JEb4N2 z6n`WvS~3(WNFE@?rk*1tWc1>avwDJ4bGn1l^ST1E^E>_X3p)LZ6`j5niVoizMU#JR zL0v$t;#+_KIm9A|IOHHg4m>ke;Mp3zk8yT<%JuZW#|frfXH(F^E<<=BoRfGyj3+x6 zD$eL3NwT{MNqL<(SwTl|dSQD|c2QekesNnsaY>thWl2+DRY?s(8T4&H_|r6qLk<$; zAVv;+N>@gOpgu^j}asvxv9dM~tddMa=U zxXbei8{^`mM>w>&^DL&ckHO9CrVH{qsIi4@p$R1|q?EE|g1n*$mtENutf*`XDyyss zDOV~HB_ZV%g}91u0%GJ4j~wFBImBx00HKj8h|P@Ni>;lWiW~#(3OvHD@`9pAxS^5( zPGo8?GdjDI!7pf|i%VLl@#RhAl*%T8ys8nGQ`Hz;R8<>NR#g##s>ryqN(HW5nTId0 z_!bb091_-l4++R2PJIW64OAh{#OQsTh11hmyMVi5*RZPsKkhJ(5Z}+Gr*(7KxnJ_y z2vK=6O``mg(@tcxxZIlh;4e8?buq5Ax&T*Jm5VQ{%p#O4zs3LWka+N4hd4b|h&MER zpJ3|vG~O!UZk$8dRk0^`SP&fF&!fn@xKVj+9BxT-l&}J2b5#wY$u;$a^tyUnL0u)v z!HRLkwfXpxnruR8RR*D~Qcf&aehY|0=a7ibA@RU6kf`kdiH<5H>KlGYGIDs9WbS`A z!G?NOg5?g2{o>CFi0NIt@ch;oPH8iXuWSq#SJzXL>QElmSdB8M;x8FgQC$|Hq&A&c zTAfNNtCW(;mEQs+$RPRUlc%;6sYO!!xO|-@OzI%2<+P^gr>Q@%^#E z@-86?7x>Jwrs$Z;dX$TyOsuh*kl9j#x=6VMMN_gy zNS<)mzYOA$L!#zMke=25n5t|4Jk7xOews<>HJLSgB-tgdKfyPxD-H!tqVUotKBuaV z$*-@XCbU)%vpaK%x$SaNL9>)x*pL)jT$?~Csg_X7s^X~S%5MSj=o}^-k|!Ka?}TLa zlaQ*R_ep-%_CrM*HZHuRrG{rHiYNEMKT>;gB-naB3&WP92Ag(>joI#_Dyxy8EL-t&lq{$lVW7{F~ z$Pp+wtN~?*jo#NDv3=Th(&f&8hR4;b+CjaeCXBj%dvQUlSGueQS1dyJ??<4!{s=k3 zu8`cUP6C?u$%*+b6p5mVDlTjY6BgIe_$4*p0+Ro8knLUusfV|sF32G$-hbv}^*+7l ztp`o-44kmMboC^*_r_^r-9LKlf*zX$S+i$W9CASW_tj|EzKK}CYYiz#X${QJYYj;+ zXu(SrO~k~)2C}56E>v7x`z^jWq^+9^smLMyz!oUjvmcbZPQGm3rE#}^zxKte2h6+g z9Pz5Vc9K#sq#Gmawn>UZ4m@-RAWKP8(n=$1NkuVe){busEJ|+$&!)zp$uB}0iX=TrxmiE0ANQfmB4vueFcitBvyit7S0i|c~Y zifg|G$dE(&nz-+a7nU-!*!9{l2cdn{{fR?{rYyI}j+la*{3{GKk^! z+Q&27eNrQu@mZ95Nbv*|7R$)2j7XMwYMbzPsjd?eo746fbr<%kCZu5?wt_;|v>f82yf7dbe+4 zD7ps`-%3uO*gNoRjLP+{=jMCViwoTAk_z1FGYVYm3ksZSm4!}~)!zitkwezXIgq!0 z@yF`rOK%SRvZUwsl8v>`SD#RP`rAnMVz;yS{;>e|b@?RlzTCN0lJ3|lMP#Krv=z&3TdFg@1!OJ6K+d0YP%r1Fhn+tz z=)3vjqPpk5Z&7?+ts;XhM&g$T9N7;~dDCy};mFr4DY(ln5djyym_EY+(H?_19`<~w zz@?uqay-Y3weRD`+4hPgHob}Q*1f3-R=v3imc6C%-vV+MU?6|#?B@;h=b+z)nN|1n z$EAwTOSa3<{4au?`mDFdtZC0QT}cm(eS+`V1WoKgcwF}&W5@ic&R0U{4wp$0c9+5; zZAKYURu|bUixEEC?4LM}>2NZ~WH>$A_(I+{0mXdGL?@=VbI!E7k-2jVJ}q1(gJs)9 zu<;lR_GyKEK4naNr)wSf(#+B8nGM$Uv7@KsxSNmN10R2zdx3#gck#g%cS6zr10832 zE9%SMgV7BE$?!%TS^s)6S?_v!=(l)QItSC&Fbh-LH+x#awb}Ee@bfYutlY$eZTqQk zLWr2VMp`;Af}}K}K4i3C5bALQT{^(M{DpMw*@Z zz%f7lL11y}eVpaDsI8rW$?u(pNgl&Yt^uwm)WGB9Y4A}w1p%t65OV4ykWQZf>Y3vZp>_<|>PLX5 z@hu8kF_`3k(9gxc!c2>R1v828$2?zH`;!Z7UTgzf_F9ykcpgZm-k;67j<+6(-{-vVgAOauDr zSrESGSD>9<3!z54fogpcsLr|&>Sg(k80`E4Pw{>n92xu|kW0GjCknge6CZxlD>?GI zM=I-@dnRWLn;U)ARS|Q=rIdHYxr%q$sgZx#sgr-nsh@w|raR;-aqkB(cZ5 zQlqbUWW-!{&*fjjDuko1Wuj4+YSE~3vv}0GM?C0yPTYe%C+cuJC;l1`j`9Zva$q2b zaO6NY+yjx8DiGwSZXmRlh{;Wgi*;l2?ZC zqDPMSqFZ6y2)11Ek83TWEq>6oC%(t6N7Clr6W`?Vbsz#cM6Q|zk;oz9^g3V|>;aYq z%I56#J~Q2{Uo(8&p3?CF4=6O!Z4#4y10Tb@8YJdk4oKjS`btG3-sy3}p1JWC+zS&f zxK$($Ve6BIu$@Ug?p;Z39-T=Io}EdxUS9_o$RP?jFp)zfa$xH3foOA-2ifU;j&`+v z&GL48!UzexPp6V^Q6eL*5xMLuI3aH|C|)$;pCY;7lb$f-m6JT^S(I|#T`4{9-X!bw zXqUBlwaXe1)!ywgrO(%aNHkwCkpl}kL?H)`-X7qas-P@J?{kck_3LO4_b05tpnD85 z(@M2_s7vbLHAe$$_)lB>I#j? zyGZ86UckpD4hAPl`vc@@=X`TA`n*drd%bG2+I^a`>U<9+3mx4a^F4y@a)YQS8)IH(M526-D;Xq- zQu=X;X?=ldnLU2FIo&>`D3`6ztM{+Vs|cveFAl8BD+sF3%L%H_%?fVF`5M4N4xE*< zAR0MvPOS&=*RM~Kgu z$OgLtatk{Al!}Ic8bw85O<`eBO<`_ujUp?gR*{aY%TN0Tz(U{UqLBj^IdE0iL7c`e zltCPaBt30NGO>7@XzBVi-XY*#tXt>}K_K%omm(NuMkNf;W72xbaoL@?)PnZF{GyhC zsuJ|buS5}2S(1aRD#^fC6{iuZi=>2_!W3d{!Pfvba^NC|805fLSqJfIJ0VHq7^G;Q z1*w7gJE^JjvlJV@2Z>Ikn{nQemj!tKFqfXtAIVAUriydh39_Qrpu(~$lmjYo73EoY zWqBG=St=z~mL!v^iWAAzMG54Z!mk0G{~UP8L8!78&Hp< zw5lYIQdJyFsV@2&UmRl4IZQajo?Ht_D%&9y^@GyY)FDgD=wr5?-K!iEkH^_o!FRJ9 zsMqD5tl?ywxLZU^ZD2*`m($|Q3&=Uua$-qMGP$fOo>EZ}M^%=IsgIR<+w5?wi8n`|zHut+#Vn?{7@S^u* z1PL1w$*HCM$bxL9utFLxuSLC=IuWh3T0k#X^6AQQ9=)B z0y#(agW~u}C{@*ZRi$q9uwKvdR-3uqNUObfXAPEGUF^fp&k2@FWt4nD0;?iQoKQ;> zDeCFGlA4Ig1VcjcVHWI`#C&1}x0Zb=i70Hn~uWD?MY-?=hsr=8$rk^2oAKX>4^!N?si? zzN98pR9Z#lmn&&86%}-@vW(8DED7gS6@Lxjq366p%)@ZVnrs8op>^+TL&NuIz9m=QO%UCsz3-Mwg>h|t=)Olq8 zjbXL@7q6W?)jMWvTt8xoE$p`^$U0qFf@W_?cwMkOq>7gBr4*F7DznP5#maK;%!+b< znX)V>v9dHoQdNQ%R~HdPHHBXT1js?W;yaM+`W4c*F9XH)HJ_@sZG76cbIZ-4Lz{=M z9Y5H8TTQFsy0&G>6=RR|5i5M$d8bHbpQn)28ILT~t z>LRc7>LOohO`(5Mts)@4w%}`k5IMyC`5h$f_!+XdEJ43H@aL9JP(447&3}dV}MAI>Kabt}= z%KBXA!umXHPJNz7dVQ{EYD3P~01=uuCut>y~%i+rFdb z$pO{EhsTZ59-MZJy?54^b<>DMy=)hOyWkP+HxMjBcMin6bg_~hJH%4Ejx?EVM~Td; zy*bUMt|i^BygA*musPi+@9RJ;a!5oD8SCexd!!aTXy^N8M|Agr?MMX6YM9G0zYxbY9ZHQc z87klz57opN5B0_v_YUw)ItBz^0}|(9AZ__fDEoQFQ0Mp4J4WZts(ks|;yhUMw+yx( z62X3T7MwH+d#h!Ge{SsJ|Jd5o{eh#O^IeZ%`&$8cn;S%u<#k%9`4}tAi5>v+WnQagVS>-SKB8Z?pBZdyv)aOJ|+(+6FUz92I$L{-W_qU?w#Zi z?c15Sv$qxav$v}8TDMxi3dm<;MwBx#Eocv;VsQ4foEP)wCBxFC0@(0Z6ztkVfn&#T zpmD|r^t7>HVqpKt%EbDEqlNi9H*1qOKDLIhgY5NR5*>A4(44fNGhMWv@mw{Z#$h#{ zrnsp;$#hqHQs{B!am81;qQGEUYi44~x@KasE`2vW5f;wl!Scln_D z?XWZGoU{d_(-vT%VFGqq2H>Kj2cGE5mY=>Bgcxc7*+}g(-T2HWmg%XF=ppHcICGT` zDHbQ+XIh?kpZ8Td%BNw<8fRcK2c}^pNN!*tWz9_WfxB$5!jZ>Gfwp>n{z^ z-*g&Gx2S^E){|hj?Kn8^I127N4};IHgAlm;0O0rR1IpgL5V3C$u=no*AtLU;*T`(f zU}A?bn8;fg37VKMe297RPfm2J4gXSMcKzI2e zF#7WVn620c)~oh_{p#J|vSufEtla^A>$X8KTD&7|*aGywHUVqnCg6P)2ts{1|CKYr zZ^sYdtFi=qb=O0n*)9mQKL-A8>fjS#0A6H^kM0b+cUZ2=YZtNmOQ%Gy=MFO8XZ9KX zPwa97AKMlNJ+>|jeq>!8^3bXo_t3H%KW;ICzhiY3KW2R$H)4Aeci}7izvHlCCIsx5 z4}K??K!DB$2r=CSINM_ojMV^tUqkRET7K|~aCqY$jeUs~dOmka@O|ne4S3?19`wjQ zC*+}BA#U8Zj4*CfLwsP}LV94;OS)@yk#ybWDrwaA8gan>I_cb3fME1m1R#e%H>cIS(cjV>1>>%RivkwXY_z#)f_6H9@rz5ycicR{52 zabVbM0S#;Nk?d>thCskR4-WTz?9T}v_Yo5Bc_vWqxTVr=xn@P&bXG*(aI9cncW7i? zx9?+(I1aOVorhU%F2n2w*I{-wc9>oD6@Y*;G2HSQfJY9*V@n}iZ9On`cLCeXJWN)w_T zjXy@Y+r6gyyFU#j`#&Tw@b`jZsJHxLBd&X;Fvr|8qp!FY#$0x;Mww`rpx<>s(B{@J ztaI-dDn0szC7%64h1YtKG}8dStCm3#HZ z6?*r^<@xl*X8ZPuvwY7@3XqWl5jl{NL&Tvaz(ce5L_a7__b}R%QUkH2;U|HE%^R*e z_9-*Kf1DmlyhDznUk~B4uJ|X$jCy5=M%+py!>+9fo$lQUwO(C`rQThM1-@O0Iey&< z8UEezY5qNu)PUZ&$$(JgK>A|_gd&HigTI6DO{3E%^0~`F@q!WW49R)-vZOw@j+8pDj+7GLc4?kpyEHSPBPA`cGg%tc zm6RORosbmN9X}aBL0J$PIZ%)Td;jkcdwex|_PzyD)DM7ETNR{6Iv~jQ>s3TCC*z8jvtMjf;EAeYh%L!;rlLxh?NrT&DNg?ghgpiJ8 z39c(C4%d}98DAVikpmSuaQ7`nv-c`UIk_29PwxY{)=7}-YrT=1nmkFfvAZjEcE1+y z8+buLq;@mIIn7~QaWx@68C4PFg#pS;birAsETlOj3D=Syk8e$jBebQ83GGrLp)*-X z>`a;rprUi2AcwH6KLT&>Z;*I+CCHBd4e6@8Axq=f`)pnH=Q+mu4|1$5Z)7_=|C8$N z*DVR5HpWmn%1CCcf+~_`5mPhL@Wt85_{MAru`x?bYR(XnTjYFlYZ{N-E{h>|q{NUr zlP3ehkOLJt(6`P9;hx_h`QV=*Kl&GBo!kj|XAZqqoK<~PWO(*=iG|VSLMOYvEDzu2 zlmLoSOym@BBjVB{d9oy0Qf4eAKUYAh&*f1Yv$@o!%xG$JI)~bt#-X;QvZ);@U*lhg zr86L6%X|={`CocqIb~v9BYyy`f6EG#2Y({o&B%?krlF^V88PSv#8Qz=`8Qv<7q_?F;(mSMxl*s@Z%7Vg> zL*%9(K(g~!v@`byWFJ@yiX)rfl%3r5s9OEN?I!)BqfMq-Jyo_g^~EmUCAnVYth69j zQX)B)9~&i&;)^rGxaqlMZdD$I(~v`DHD!e{n=@!pt#VpqTPmH=Aw{H22GEfM4LPv> z{s9uv+?&38Ddg>4^||EWUoWbUZ@J&Bw&QxY-oEqg#+t1S)>cZTlUH7m2T7Xa&lJiD zB4$cNG9`hZj*Cmp35cu64Hh+K<9SV)L~e6BiPM@!X1B>gnH?!#1HzF59XUk*H4jpD z{Dk&$7Na{sm%ppryZUj{k@a_a&#b>RsI{lN$56el)zYG<&cQ>jbi>CL`!d*hH~}>) zEHPNlP4h`j$@EAq&GJfV%Jh>oX9S8`@L}?4s)Ofl-ZJE$8*mRCJr0Ww+U36B}GYVye98^ztAs zu9y<%t>C1%<|m~&<`>JIa$4nXsjX?ANo}bT9~6dQkfU%l~{haH=LxjM3M;o$WXe|C?;$KtgDKWt+2wxAno+B~|wiZY;d3azK7dLoM!xt}*+Xi9O|tjYrTW z*FcX^Ka#@;nPxe{jx-rbVj2EZ$T1x0WgB%5a!ne}^URe40?Xn7flc0kz&2-cfQ=lu zXfG%oeU~a*G-I@D-t_+KKTdBM|8u_b$>!zxkN0g&eR%vx?6{f+`@W7b^`5b9&|Pb+ z$6Y5M$2&g3);EbnvujZld>gZ@#5VcSTQas59`Q{^zrtbB4H z8s#vt=(96_(X@;8b1^-Ab1}{1Kh7+F_2*ByuQsoez1+85{Nngg)(bTa>I+@N;1?!V zo-eFW;n&&C=8=z=`2)P4@!jwsgWEiu-mPSU&dogH*_*Y9i$vX#Yec=iYoYpWSE&Xq zlLLH|!${|2E>+CJbT!Y$G@PG{DSNqaR`!P#zovZLyhiwO-wx)-6Gy0@)X#)`($htI z5GGFVtu1X|JKI^j^ma0S7UF8~B+Om+G22u7VXU{-c$%-qgF-*`2aSGe4@UenZ{G6L z9=YSMec^7vWPtd4%+G6A$@;DWP=`Ed(si~WZQS{El$gj?x77vT&#=zP|4A{PsjCLRb;N(ts zP&?oN+Q+QGNW~Pa&KQ8BhAz0DJqvz18i3O~g9d_Az%o?%EHpmxDcR)sr!3QBA4|-R zeyle?`eDHQ$cIalay}n}sZ~zL6tqsq$SzFBh~Lbc&Vk?O&|&>gMA)&^7xu4qg`*p7 z;nX&B(As4P2K#iu{Gb-t9##kEqo=|1xGEZsP5=QF4r%DY2mACP5S=-Q%Af}zQ)53A zY3zeK&3(``DfJ~7OhFw6Bkjdt_*XHQD40Ku3QNBWhBZHU!r#9*!j2`Du>VgZIKEmJ z)YfZ(&PFvb+N25=TPE^{W8kvm2zc&11pd4J%^t$`q6fwMCUS?Js3^Px^v6A*x?m@$FWe5= zKWznrpErZ)FMos8uN%Q((FVZ&whp`(uZ7^ntAYIcD)eEr0=P?80B=&jb;)#aTr(4F zw#@?T;|sw-b1AqOt^-%AZQ$&3037{JfIUeKY#G{M6{8OpamHYtYzC%s3oyyC`fOZi z^U0{p_M>5q-ABU~`ws@a4)63YJG|DvnyNA4$kUJ!OdVDxLa-qtkXeo@i_^O1a+_v*8!VoL$DH?f<=-An5S8NGR?O6Xri$D zU|eSZ-l)d$y-|zPJHtMwHwIUnUl`nRer)u>`L6LJr|YJVoi3R@ah?otLk`ZXXM)|< z*!kuNI8+0o>46>F7;Hr5V3la~$s*PEgL$_7dozXO zJJT|!cP2G1Z;e}B-x!^9ePwtJ`_$+T_P)sj*BfRJT`!qG!VX$Io|G>RF37xKanW49s{UPvhR|Ra47PwIL!I5POb^^;!HVL*LtYr4@EwY^6nHRXcH7j#{ zV_JiKZPMoU%6P#2xzTlxhbDJDZkgS8zict?K4kgOqu1)uq<}lxrFC63101%@2A9JN z!ROQ;5TLytf=qTmfbAjh!JYz7|FhsmHU#HL^Un@E>koDk`*$`{r?*y_uCFcg-CmiO zdb~8N^?YgC?)BVc$osL$4evWZRY0M6<#1UjDrA0KV-Bp8ERxaCJ@uHAdbSf@93DXy<=GTdKU<$J!cEcJeF zUhDhJtkds_>8Srb^PB!xt?u{@THo{Uwz(J3W_vH7(Qb0U6Mb&Gt(*bQ$iaL6!Y?@p zNn;&^>hA!u#bF>goQ5C|9q6KL%C!l$|d{p zH?779Lv}X^UG_JKO^(+|HBQ$_70x$^B`%W#K8vP-$BG$%MGnEc7XTGy5R6l6fq8Zt zL>V0fhSezubJ78#m+7aV5ZkwYbeC72(Vow+aehynWkC-ea#1c=PP}j3LcVJ~5;|fx zM(%PLqcl2QrB=FJrIxy0r50kxD0$ei(8&N__{ z_5%m?bE53ePUKRbh<>(jgF;P*mQ(lvAgsohih=Y z9IkY|6kglYJr(=&m1)ir}U>Qq9%;Ls9r4rOr@8C(T+ zBue2v5|!h55nb*z%1HAXjgWd@3ZING4nD}i@2}Zt_MQ*XX!aHzUxD^wHln?l-6)GW z`d)0Q{*v!#@QCehg}$~qUk{~vUkYId5BiD8y`D09r(1qht7|o@(Y2pl?tXz&;CX?Q z={?L&^BHEP_>M4>d@n{N_>D3q1N_lB_@GQIa6Rg)ZJv+j|3x42x$x9Q5yUc7}egH>~haiZiz<+FVE{dFWqM_ zM(Q`jP4vIOkpv90V*^H*;=qehlK}xJm-9sqA!}zr6qMV z?t7AIeB!RmN@Fa^#Sq;WW8cjQ@oEka!&Qf}!^&{tC`F(wI>%4J&+u&&%KZC z*7eka2Xyn;g~kj|()m z-Y(MLb6H__;#`)UeybE~R~_f)Rm8*NvRO2mES$rNr^fMwWyWS&xx4i zegTtwo)<+P=tjT~NG+P892g=ItfcMv2NZy6+irKg5bIzeZUMNnieo(6R z*NsZutwW{8$2;?F^y<=GY)g~9J#%7%Lu5Qkn1sz_@fac=i=HZupcPAK=xRzjtxrs+ z^$Y1?=lOK%U<{o)6iufLvnKbcbWk;6XsX4vs za=q5(zDmPm%|%wa6**2e`En1Bw4{JwNgR>N6GSkhxO`q1J4H-lD&p|0_BdixpO_Tc zFC<5t=ZA(5#)Q&`qA9dt_GCZ^atK5Y;mc-1?E2Y|wrw7|FL&XqlHI?KS07w_qeb=i z3oUA!I%*A$)RbFj7Zo~KW#qWICCYt+_$hcwR6H$;Dvpl92@*vC{JdCSesi2Z@0>U& zx?dQ=I?ux~2V?M&7orJ_VfJKvaR@>Vk%;(pvmj&B+|P>5KRm14_T!!Ay$i1N9{#Pb z-AJ#>EG zuN}RoHdJ@$98ffwXr$HJn8qocoj4_)zBEN30iR2z`DR5i-7>^nr}Rv|eMT+c0bLd2 zk~A3O7B|H86kXtW@rT(yF(b^$00MF#qVEtfh*b1FqG09B=he$+-tPKy#^|NBGkPxW z`MF_GWo>ES*}YkvMrRURtc-XKj`k5X?w$mte~@1pk?dB&2zMwFF|7;J*%n1L91Fz- zj!otTj)QEN?VR`z%T+SMbQ52UoD3kMEQpGpCyCH!SLX8RAIpB5KHj!)+UTVPnBHsa zW;T!QU7);lVpZM+&0W#~19WMvxn5MKy%o6)>k`oH>*L-?2yv{BAY0W5!%S<_!;R~z zBaCV7AqN#JA7vWZl$0M!9aJ9(2vh!1JoJh0_&pJL@ZW7t_lT?gp1d z-nv&Ze9vC3_B%Uv-B)|yZh&svonZaCTOkI@TR5ZQoA}898v6Ux|6D>*gmv-nCTt=*R}nLiQ@efKTc_cH&tyQ6xAS`R5Lk z@mYky3}s_5O=UB_bP#gReTR|Woi{D+)6!Yo&l?v;z@9(IaC}_=oZjLNT6-Ko|DYw9 z9Ww@76GQY;@@Jb)&=Yo*Es0`qD_{?i<6& zxFkazj6w{i40RB)&|MHo!!t30k3UXh!SY!&*t7r-dlvh`;T7mEi1oIhzS#_Pb{K&1 z9yEOJR|lsHmoM#w;xl`oMQsmUQQr+u)%SgVJ1L#H z7)*I729wo@!6cl+V4}ySVHoh!G%_ro83-HZdBC=x9bw<^mT+{rF`PmfgyuR;(Ekf{ zAU2%gNg4%9gK?@4E5Cv3=S4f^Mc=IIKc{(L2Q_30^1kp!9J8h99?`Gbs$cF7V1Fg zFFydLEB1oTs@-T{-2qP84Pqgfl& zdv!ql3W}}nBcAJiP=BraTJxRm)3YD`Kf2yJEb6WM-`_)HAa;TvN_Te;F~l%44Bas_ zLwC0{A_xd5AfPB1SYS7Ho#U}Pu@D>3^;?|h^Zi}V;qYANx-aw}*PgxiaOQQt*WN=P zrjI+lqEAfA-(zsYF)(oq41FQ+un+}bho#`_Dg{3N3g8u`0q&{#;96h;&XpFx++Yn( zn{0vJWe1ME4&bnt4)%v|tDa(j-4!O#9x=gonDvV`%KBmVjrGal2lFj`jQPT8jP;Z; zJ>b2Z2i$QCEHwde))T_(S41HIeNX^f3jBN(!8bw^ypj#TJGd=a-Bh>_^O>F85i}13owg zk2SpDjAL-q5dweHxe$y#h{KYCAa5lI2-O1L1Vix3HV5}oYjCNx1LtN(U~O>%#&#w+ z^*aN7kPVKfUBKa*D>yuLo3MZH_RVqB?Ss=-x0kGMZjYS5yZ^=h;ePXf0KWM7#S_QC zRuup*O(6(2nhW99%OHZW8p1r3fD@z*fiXtln_&T7#kSyH;{dKrPQY$w0jq}%%ssBa z7<31xGaf*{;W^>>-17(hz1Ik1)bo||7te?6uU>z;eDl8I_TBs9{{Z}O3|^~w!9|51 z{M3aY+&~n#mdhZ%PL9c6y-eZ~Id`@nV7?}poF|BLQl1I~DU4LJ2b0Dp9Y-cr2ap~Mft zDnfsIkOZ@(kYpzXiOxz8>#YruAtn$SZv{cwsEA9M;9d8Bez4o?Z#U>X=r_SS7cl7r zKeJv3zF~g|eCRe3bj{;a@Q~Li=cM;2=ZMcJ=g|KE0&omII0heieh5<%f&{HOkY>CD zGHg~sIztgsJhUM`&;+8QtRXDT5rT@G!LJ6jd9&9qk50dFw_O2aE{B4DurF}X$A!Fe z84h{m@jm35*T=B4-XFt{`F;!^@cS6P&;Mh1|Nj62(GB{o;)MVieu$PAgj97A$TnOI zc~+8;M^}JsH!Vo_Gls-4Yv3k0j)&*6e{jliRyX*5_H7Fs@!HP$;ITjKo%`wVH*R+$ zo_GvLT=RYxb;kEy^kMw6tNj7*xVr=2MfV21i{2jeK59B32*==$W8kd8el;0E$ikkK zLW4z6LR|?Z4suY$)_^=8qw!3R)wiS=$I-Y9_J^oqui?;j0WX7^InVsN!XNwYiF)9D zjC9w}izyI@ zV~E1Oy>uylC{~^cRl4(_7WHFO<>(h%ksXOP!fEB zTN8OEVq@%y(5}QIoINRn!6(xW1V2dc3w@T}9sVr6J>q#8>3z&HAKHk zoDK+CM1dgma+5A5YZX6~DGEcajwm#kF8S4DBk`rlQSMy>TjNEYx8Z{-j`htFF6&}$ zs?VA9e9p1N^5}us`h(n9TpVI2V-)La;$g1X_$2{@7%>;^St!HLo@?l^(Tt=-jLew7O6k z$vRb>=yNDHD`a1K33q35O=3@cb6RK2)~q(}{@hmXg}jY14|D5dALZ1>J;|zxf0|Jp z|17OC{&{L;{EL+7z{d>~;NTo2;dA!_^mFU*y|Gbq*6+;*^FDW&Eg9~zk$Boom$}{U zqJ62!k9w*$jCrUc&Zoa9Eo4V-0k2Y>h@{`Qcrn6JVnT@e1BD8{kW$qGQL`i!wHI&(g5HCpg;hvl*d zy>_dwbvbLD-Q;6=xRJx$yN>IT^=r<2FsLtjZJ(9onVk&v!EGMqyE+2s-7VpstqpO(^|fiyRaN;(C1sTv zc_odx8AaWNsYL@t$;E#arW8KNPs@Lpo0j)DJ3aSFW=77l^o;ChY107_lWq`yPvzpz z>ZN!`$||0*ZPI-2cgyiV9aIsxbxeECg+WV+V|(dpd;8o>d$#&HwsnMfG_-Pqs+y9c zO6qeGbJmrmr&ZPGBvfuGh^^RP$gR9y7+dxrKYs0_+{EI?SxJRYGLs9Qr6=b8K{E5Y;Xpd`bSUU+RK_}EOpFsCg%E$gil&8lRl0PqqQ_i1LpLO(<>5BfNc8XgEoef*} zdD*Vt7052#5#gJ=EiN>*Go2gPQJ565xh9>nX;XHsmwTV-e~EL zGq!SV$C&Vn=>#(Dte%U+mKI+_p2rz2|&}NB7fo zpUuzG{Wm^K3*7KLm9zduGH2b(X@MAg{W+Mu*NKqPj+x|6zc9IaRG3`6Buq})&DBVb^FUCk1a2gyf(i|^lg5X;MeqeS|AR`P&A8-HwcqE+XTp!eS+ld3G6|* zDoh6N%_qC>%JFyK)|uD*ho$7Y>kg{LSJ;M`7rm_$&Idb1o{Mq~K9l6*b2>NB{ZwVB z^Qo2y#;LwY`stx4$CK})oDRO_vUa_VVQ(LfaqD^;x5vA*ioIuNCow>CS&BfN#UXdmzPP@pFuXkHzVo3Hv>WdO?1n};4#z%)(hrV=G4_mvF?W8P7Ra7K-q#ay zql-dL@8Kl_hk3~^bP(Ha&me8Dmhv>dk{7OirL&;;rTNOt7c{wq=M0UAXYTqzPyNk& zo<>-^Jx#P@J;|ZdpH{H!o^E8@KHK4H{rt46)w4IQRu8_pTVMO`WjplUhj!wdpWTt^ zfwhF(X(i;`4hk9Ak39%@4x;ml0BL?ci`0D-?*mFR+(3H{6O88D19g!N z*e|sJ7WP4Sh#P{xgf2d$)q*(ef5=>;45jEF8qq&=%i*Ov=pJq;u7Qt=s{xe$7rXli z=@`KJX9+1n2a$?>5V0UcLSPO$h=u4N#CaK*`|Utk&lY!; z=t1nz$5^0`F+vZbgX2)g`A~+{yz(H=w+2-BR)H4(O3>#Q0~7vbKowX5b^?olDX;)M z1m{73AT|&P&Vgj1*-#)f3#yT3q!s>R{QV2^Q@-VBVt(X1n#lbie>iPZ)y9MI$i2Zw$t-jlt+MGHwF+ zH!ua`F$*yIVhP3{tiWVC{{JyhwfKRiD*z5gLcp+@15D;(VEBjwJ$wx~Br5}nC~(K^MgiUb^>c%Gy&85reN|0`GS>ynPGj?U*=%?jS8kC*wFLF zW_p0Jk_YVNc)?ne9~`vN#TW?TQ7}<(aasawua&?Il?JB-MR3Sb16sKj*lf@RtIY;L z-D(JyyNto&pb1!Wv*(KeY#&dk)isZB9JkD9a0uDtzFK z9>hai=%>^GVMZHqbB_F96?ek-s(VGUN7 zZGid&U+qVHwLfU%*1u?_@X9a(w-R$;*IEIq#TFRo=IFZ|z;TfN%i*lkn8O{$H~V46sKa;0 zE2lB$J?6Oc73T@|IhSAT6RyA6hux+JTrpEH(aU*AV{Z@=j=dPMIwE*ZY7xZSuY?#E zS%~mc1x|!61ST8*_RY5(_pG$}>DFNX-KCBGmA#$unYEAgiFv~LBjXzTE#npYCG(rh zUFV-}mt4l&Pq~eI9QGLZIN&+%-tRR%;D(ukg-&eJ#e`t~kC7LEWUbkdX1Wm4Xkw7U zl7@IMm5FGM&X2G-lP|$p)KC6vX&-!Q9p8GjFkicOv0u9Fa((W4*zKvyCHIFe&)sjk zeDS=1{_D8M51)fxKYaSVfB5wI{P5oH`_pSWz#Yfng7pJ1dq*OPviy*#F%xo(=HnGi z%OTro_0M#7r7uYV+8<)0jE5srsV_qd?4AZyG9LOjuArC2Y8^DbC<+kG29oE`9BTUBTr2j z&&7yBiRIFtMGlgq`7R1?vwSpPq=p$iPDr5MSITBJPbW z^n}IALU<-cWW3UR(P$-2{B0Ri=6SKF>VsU4{>`))>gB{V$Dx>f_NmAU&!b`WeuJFN zK?j1ihwKeJ7`8j$LU>=`%kZs1AH&-@AH!NgMnaoHKZP`ejdJS4J_k()cw*M{#oCdW z_fl~j`RHWI74cpS?U`TKo6LJxXT9`!Eq&GfN;jn&#R2*kawDmy(i0tzCS|)Ej4Scp z8(kByGh!oWdstWa){y?l&X7~l?IDk&Tf^Q*HHLqPtdIB@UK=?Qwl4BhNOj~WXF9+O zGiCtJMGWS>3>;4p&PlZ#-&nnd@JN%P$g7Q(3m-MviQQb!R=iN*t9PnT9RtR8sb}{*2nh5R>uy;RmNVAFOM6JEscN6Es1|0RUH2z zqB#Czcv0L)=yZS&)(t@~HrbPskD0e}8Rd6_6wjwkihQp+H3T1Q)thy_gSz}&iyrlP$c}X9`bCX8GrsMB1OuCrKK8PZmi<%{rvF4SO_giFmp7tp6-P)l!^J2Hf zvJ>r&atE8;w0Casx9F}3bJ$!F>(W?~>Qh^o7gUj39$u2Yft#PXB`!OoKOr+?C@~}R zb$mv~aBN2UJ8ov$`>4#+4-uKEBVp44zBmsN|Cf8$V(zY6NWN}fMtQwWg7RS3YRa`; z>NC#nFk5oC%U-6x%|)xH+1IRnLx_FTIxf4mGTFPlEGMX_q%AbYS?{A#GCxG5WQ>GO2l(MSMB^M}VeT%+=kASj$)_#z$+Nyi z+g-+};S$tr--J0GmXN@gw-lokh!L;=ak*xB%1ka+H%z*65;?UIcbx{eW zt+6pBy$MkzClaF8K9A=X568w7yyM2@zmJN~{Sc9mGZHo(;E!X7#b^I{A_S^M$k&$H zk37rlI z!F5Q*vj?R!$fG7cakYC4!P|uF82=|V`NVksr5$>BmM0hoQ4EL=c3G-j~ zDKxO^6K6Ug0@oo+n0%_{Bez<4$)z3va(b@-IeL_z92^oQyM|WtcAwRn-Ezu&dF^p~ z+0rA<8o7gB#;FH_tYY^?I7aj*I0x;{^6=fY*4JZaeSk|}XE3X8fWzE*FPOP~l*8UK z%5mE?%JJCvIoNB%=b-6;7(4?~z(*cn{=d?}OHTLkkwXXg$ljCur0?=9(tSmOr}eVh zjJk`)i^_&OppJ@lPzb9};$c5vL?e$TjvWAE6sKsF)wYYDl~PRL<&5W6w^Z9UFQHeD1T z^>=2Hn#ap2rH>Q^@*e4lq&~D*9Q(jQBK*FKY~Vd#74N$tT5fk@^;vh)jp=tv%>)haIicNbm~cPvOWaN>3CU6$W@$!W8H-8K?l)&h>+&9 zyrlNJ04W<5A%(A3@?^bM5=eTjD-!+MY!T-bP0aTdQ_B67ryToLkdo7@C^h?6DO$F# z3UsYr*XUck-fU?4W{3(%d8A@o4| zu@`0Q9;}ZJqH2hce7p}L^|c_0pO{C9_$9^@^h<`%=eL@W+i!!}EU=i1kzt`Nu$EBq zLzo!^iWy^LvH>JX>Oh{9CRDG%%fMw+VVA5j49O|Mu)HF`->Ccq`RN!y57dJWq8S}T zCH6t&;#rJDbO2Ff!q|r}m*NA<&_PJ?GC`5w9yA4Qz;K2ISk5v5dl5VbBB~3X^Rysv z0p35c2+w~kR)m73a!|WW2HIDwfxTj@;gYx%ycb^uzgDgSn3f&bi_(H?P=yX62m2u6 z@lKAAH-z|t0C58m!hnSoI}qcs0%=||P~tPh%&!ZE0-9hhs0Ow|%HSld0IoA+!FR?Q z2%9AZ39}{e7rz*~hvm>Nx(xQrT?#|O;t%ilFvF3DiCz<4T|k|3L$PYJ=i$T~PX| z56a&R{tZy2{xb$+O>s35prpSxO(0IXC zl@Hs|{^NrjZA8Gqc_G;OF9(}w381F00rL`BFj=nv#;wX=)U5)BJJrDOpgI_w(g6Ld znxOwg3-muAKM))RGHwi7qh_G<)*N(SSc2{o>c0US9D@ap!3J?u;sb^nKd{inu&E-z zVk`uP_X=w?KWWLOVOz9OJE zZVI}eEI|J?6%3zPgW+8pF#6N>-+(P zr45=g8kpU%1G7sGV0L~QP(Sm4jSPAa%>QnPw>+NJ(!euHCL$0(TL^ya72xGB1@2LD zz)n>LW`PDcRcZfnY}6aKZ#NjT>oNL4+iUWjcGUE%?IqJswolC7(?-mOX%ptpX<&I9 zJ$10>I27dn?JB) zYls1~<1C;JIsfpEcesN?kH^0ON31*P<|g~3LXc?e35mz`NY)aD6jRZk$+RV-@y-(O zxIVJ4BEnUlh9qh~49qdS<6CO>hj*>jb8yM~0;4~GWV|`bw9e_C_60>gtu1zXBvMhDMpE-uJKV@4j z98PxK+3ARP59^TUe)d7nGcNl) zAGz%D`s}*H`Tqa@-9b)`)4srstlj>dE`5Hx z+_(E4^VsJ5hi8}HN6&WuF^`tOarefc3Ag&7U#@k*zuD`j;qNhcW9=}^83{O!%w>27 zfh6VoT6y006`F$2D~x8}FSA*6z0g_ee72AB$+S@2!-=t`2V&E#_e2*s^hH!KdqV47 zx;dNOJA$`+Z4MgnX$`vUvoUDcr-Adsdws}HuXUm09@U`}?vhUsd>b--?JI-enO#z1BvKdz3^@xRpfw z8*s+g9e}mDn030Q7b z^UUXtyXSK!+@=C9nEiur4&pHLPG;|tx#Vlj0`j_HDS6N;LAlb%z`Z1yjV?+eU}I~J4^|1c;g;cGy4 z!Z*L{_#Zx5@jtz?;>J9(;>O*l0&Y0>Avgz#IEGxLT!eh86D7}D=8{_-%gMzy6@inj z#`6XmY2v%qJ1cCf_SSAMqs~*axhBxKt|G#!YHhs3+M*0rK|!Hgc5bzITJ}c& zq^zw$@tFg`u~|2f(ZIOOZ+`LV-+dC&etIROj(H}gj(bc6ym2o?V&+VrNxqc`W9AYd zf3^sd>zy;mg`S1v#5Q@p16}%a`r56QceOgnwl=wI)YtnPR@H`CuC0o-&#y>jW|ro= zrj%58#TPaDaSOTvBMS}$g%?~!Mgk)9zWGJveD~pI|McQ!j(NspjC)K4d~q*u@!rEM zA@Z!8m)zRGPp)njBp0>{Vg8*@4)x0L?%A$0dt0~V(#=~Or5iT8s?{|6>X$Z#Smf7p zX_>XjPRTVnE^$?*9#IwbKA~kD{=sE?0|U!0As_sM*M9d6Df;0PR`Am+Ja5b+B6s{> zKmhK;I6?9wkC)u5!e{?Y{N#Lx06Db-zlULuDA~7XHP4P+S~EL(%@=R%v6ouc#a1ru z@Yc=W9Bi7|5^0^ZF~K3GF_RThU*g88TkjRHzRlOSc8{NT?Rn(AuTS+4AOG^7UV)`! zoGQ9?o$B{u0t{}c~*?syOu&OwDOUY-I)J(;ysA_XOq4Il02RJ)rDL7O%|@- zMUyD+WhoT&cxq;D4Kz&Z47Z5sh@*wKrPG5q7diX2*0_1LZ1Qw#+2!rhat?Xx<=Qmn z=}|Z4;Z-y4?o&D8=36o0Iu!`Vb;!W%jrsp_10ko-K@9frkp6xO*>QLV={h1tX+5kW zP(Nrmx8k7nio$(PGMWAEs!6;3bh$f2O~QI(sCbN&=F?L^_t;j=WN&R{GrM=XGPa&| zb?P2=b?TgOWo`Q9%5M7Q;#&XP#jWl)dn&-?BQNu?|Dgu`Lkm9t$3BQXnEiSVU=IL# zkml1%NZl!UzKWB2vkQ+~F3CLVD4BTJMLv4aM?G|aqZ@D_+Sq%4iiO+0JZtB@Rd!B$ zTO1ww`<(3do^i74A9kYc28R81U^;dJ)2R)Zj8UAng+x29UKI>$z z3H?N+x!s8ts?CW$E9(}lKo%^P$e*Q@ai2|q5cF9DZeO43jUl;a{d&jr2eVQANPmhtjL?z3xaPrE%&|dF6nVSK!$xSLXmMT zQPtsUuBPqPN*$}KEqa#M`V7pkoi(tyHmq-Y`IoWrMKCoz1?FZ)z`}e0ET{6I1baYm z9~?vnu^sd8W;}~ghtI&vhImQNbwQHyWHw27B1VaRqQD#aSVzeJvAKxXV}}KN91^T$m}CQr7j7(YFMyihTGG^VEa5H$7ffVTcM&^5RO zdQ-W)4js^DLVB?eq7C~X>d?ciMF)^|m4_tU6(G@X=aSI3;*@~5^1R;fv;$@1m^}7rY z^Sc_K!w-D{o1f;wmOt%gnvFS&7>{|08jJ_e(;eq7&>Bx)q&~5BiR!P0rOLm0mMi`~ zwn71(iOElliOG+FnEZEGflU7V2mQDPTW}3Fpo3V89w_}HA>3Pp1U@3f^Q!=1gD7!= zrRX4}&_O8jnu8{vF&OgegM|P-cox(ImXI2F3M)g<3ihd3;wEQ6JlML3rEAWIPiC5i}WP-cN1 z&kQi*5d>Qv42?Vle2_>agO@-B(t_+j@F3U}Ko4`ChB^;up?}cTMhy5s+kzi790ftm zQy5f2XM-j0CF{pKyKp_klnHrWP6u^?7rn7b94pB3?YArfy{H{qd3U?MBx7+ z2C^Uq3ix*=SUI8cZ$KZ%psB_Kx@x>&fa5UG;{$y&e$cTK1TD82pb;zrYVq?xC3hhx zRW1glhNYm`wj30DR)E4D{I?H@gTm>Rpm1#^$Ul()`S-|o1SGKTBr8D?Q31&bHCXjs z2YToTnVZNBtYf3B&b}(Epi_jmI9Tp$ZrHzp^ceDYFq=>eAR+ABRU{G6)?mx=qaHO zQbZ4dKE@LJK`iw6!GbCX=FFL3>MIJy5evZ}WeMmNE(e`zanNa!0PPM*(AtjeLH$yo zd3ZHwo?Qc)e;_ZVLGu$bE{zss&2KG`9@7Td&$=M@P9Nl68iM>(z-SE*7|COOSst*) zo)la3AT;a|qM70La+n3y9&>>jvIxu*mVs%G7#NpJfKlBlFl=2723_ce`eZ==fGp^r zKrW*vdLs8*|AXAP{!h8@1|auM9~4FmKIsg~!$8g8q z7P@xRR7 zh2!xm#MA@$NDN%pC0uF@lRnd*JkdW}1b z7VSTsx^!>Scj{lGA2PV&c;4_L{fW^z`l#_KI+z}(gV_NFc9b!}w3h{@JPl9M5Qy2LGER72!)k zmLuzpUSQSe1=Oj49p-LV%v%1KGa@i|$1TF#w}SFDeKqB6rV{VdEIq+H=~klGl32^m z$NH>36&0#@G(1*gFeF`fe^9>Bo`7<*oql!H9X^|^w|Q@)b$RW#+v0W3zTNAo!zSjyS9eqcQnaKwG!fmtgEGbb0vku;x7WG*HjbHypo3uJii6=?Eb z%e9y@ltEv5BE>`MP<)W$zL+SD-H}Q9z2Vs=+d@h#I)iJh+XFY!HU)G#H2d$SH~O7$ z+TeHJsm}j1y*2`YZM&hHGw%i__o2O*h>fSzy@`USZQ1y1~9Kq>a9g z)61w1KEkXF{)1J{8DW%#fKy2bI2MP3eNiab6^4NARDg+lz!z&r%p{OFi~P)*O-2eu z$g?t0a;s_?xm2mlcdE>2_Mu|hlD+xtRlQk03f<`;8tutk{g#9j)5f?QY8`j2ZB10I zLq%k(Q)ze)t2lhXxiI{yb3w!hR(>Qf@*;tr8-bmNcsLuOO$D5BFZyHdkDft(r3jPP zc|zp=T48d#3bTLhVsf@tf%j;&!L0q|HjDd8n3CNEUJ7kFK^n~&QTiKF6HV78XHqK@ zi)f{B)s9868yR`rZs%<7es(7JqDuz%t#d{UFwGnrCl=j_{5;%LQ9%99Onfy2{p<`l(eH zhw7FU#F!N2rC8=>=h|jwmN}%P*E15*+MQ!lce`*?&$vXVy<|tHO|ZDBz=%l(dTbIn z#3q9MRKOGWLKq+UmW-JXbLX`(UUI$`d;c5p*?+4j*}rKuPhX3sP*;=L{7v=tVhy!y znVM=Jm9mOpox;*6qnwgNi}b=Qo22{_`?$PXr|6u`tcdJBc4+oVmyqmd?9iMsR#+DP z-zEd-5$V{Sn+o<*0Uz88kreVWjgVVK*!PYO;`9bSa)}~s9ugkXyuCBEAui8lWsqA5T zRvuw`R6b;SR{U`CDg(Mt2{`%{g1vt}J{g(<1o4xv3549mb+}kba1UbkY{lpQotVG3 z3zClBWu$qBB46Eh{aF=Tsf&ub=qt0gxXPxs`>Mom=4eH=MjM1SCz}Q~>i*}jM%kHyZp0dMPDz3*%K5}c2T1a=Kc0gyMf%led zQ}>Qi3wHYkYerkQExm1!=GcD6)^YP!TgOdc>(mIg%zChKUI*4wfoMV=W}|vXG zyC3s!PdC=b{5hHbY6e9~`Jfb~a6nxk>!7hn@_yUJvHO_f5&a%(g7*X{`0Wl?_1qP& z<+?LdkJY!?^D+IE7a-Da?KX#O|DiRfT(FP^F; z~Ir$Ec_C}sGLj^SO9=XgojMjD*d~EuF}6XDy8!W z$Y|dHInC>!pnU<9rg8>7(0*Klt@u7@?!_Ji+ynXOVUo|`U6klS0&WQskLM!9?fDAg z{9KOW^jwSA{)Gv@%?n#W>I>Ek^Os(;O57`}|1r~fKzfzGS)MOv?#7i+xkTdMx% z>{9jD!%Nj({9dZ^99Ael2XUpRATgCeT!S8T5G}X|s?h`GV+Kz?i{~)#JcciN2XRyu`KUgx@u&r#!Dl;u-OtW~+F!hdG`@ritA0(Gsr)r}mg2V> z5&3WJb7a3C5S96HYp%?@|C{lG`8CPaw&uf@dpw_Vi=wI%l|3D96@q!TJaSD2X zSwtJ=qk~wEEs9`UJ4n-17&_h_Ehj2s>;f5X}5IsZ;?>xxmn+vskqR`DZ z2afQIz+>ba0{@~8_h8W=&OdsvP^|B9j}ZGOgqXe}MCUWXx`Zf$AUX&UbP)526f7ev zL4qs?X|el86|d6Iuqd$s(9b=EEW)ienLhm1HKY zCc+?31VEMW;JgqV3jrG9f&?LnND0z}^dR_4X9}RAKmi4GAS&n}R5kIZ9xo`E@q&y! zA4s|JgCs`)BoYKcJWm+JD`$du!z>VQn+@XIL_mDk91uH*oDc=Ei^#pXAody=MJDE= z^_WETZ&u=;SHe7yg!yO#{>kJRl+iz^qkquAaj2tkd*sJLbU^=)Md*eWPsqWNZwj#Vi{d{4H8~1sB6{c_ z4A4UuD)WFo&V`;i`XIUhXn6^NM%YYHOA-OqJW)`omylr}uw)GRsRYZA6`xgL#d|dn8&>}(pn>&ur72*vnu6COp^K2?0do!P zk1^&46Pf@Rxe9|p5Nfv=bV8Z)K)YlCXrUX^YFZ4M?Mp#(+fvZjy$m!4mxIQc<-as; zE+5l)x%|84=jETZelP!|31TA}u=0ZjNW9Vp$tOA>`B3+tfR+r_$Lwt`K>;ho2FF30 z?2Xaq18XY*pfZKQ!gn^9Ma%`01zhC^T z{_&Nc^{=cP(SNq`t-+_2ul0XPzSIY)=O|a7Ah!)c`kE0)UpD?Hpog#DOq>GN%PIJE zOcbEwco?`h80g06W&&XEAPjaMA``Zpc|WY<7Ja2=F8yp#y!?}Sjo3%?#+C2Q+9ckY zZIc`}-LvYo>ETtcOfN`1H+v-Y*zDu#`=%2zw@pFr4-=5TY6|k_%t8L71;`(t0vO_J zr(*84!`#n6T+ju$p)YhpN9StDKf$&U{>F3`8FliX|G_b8@vwc$@>jGx@fWt`5>IXF zRz0?ES^dzubIpCLKIyyE0h!y>v$B6s@5^1a`XGOiI<7QC1?4kTG>Qah&TAD;GS?{|XEdoEb?Q(*MDNucpbuyrpbu&9r9ai)P5+|P z=LEVtkZ!zys)GsIn^~Z>X$oM4^Y4Ut&kb|8-(=>(@q{lWUm_(b!%+%6Pa<{r?u1ik zTnk~&yAbHJ?2KQi#0j4`=_6k0@`D})$_L!a)%Lp9Y3^pXYVUO3s@v<_uiwKuX|R=b z*PzpR#IT(WhHY#xXmJ7kCRfng;0n5k&VK{8IR7lnT3(oSf^i%XbI2HXK6w|nlsrw4 zqTG&G=e-hZE_^PEKJP@h$Ff5qL6Qf8qGb95k`#CPW~=V-Dc0QPRjt$Q*{Hw8y~D80 zz0Y`)+Yyr%x9g@&?(a+*+<%*F@BriWo?uwx2?kZ3*mXAru*dm#nVbV*bPzMJ{w(r2 zUWB|z#xwV+%gNPLWu9}%#=^%E?B)){xGd|B@{`;d9wyrp5~tK1oTlCpn6JIrzg(}y zugDVt7$OG$4PFP1>wsA+ zP>_s83XoUv0^~u85V?!b;IC&bCPP^YJjXK(ga%S==I%*iE!`34E!oZG$aX|UD{l%< z*4P-Dt<%6MF{lf!F|G;RXjU1}ZBg#Om%7&f9JM6i1+^$(%%Ug|%nJg+G(QlG^8)cV zOdxh4{ukghhkz&MZVoSb$K@gSlX%G=8G__`4rc$n1>|JDEYD!Bo?w5LmFSLi#?r22 z56QNKK-uQF2;~NDyvF*d44s;Y0)vY1N|Vyi2J@nj4$A`0ZmV3*DeG*`6YK1dAJnW+ zut3Z*L%<{>1dI`*{|1=27rc4NuP{QM#9{7D;~`hF`N@TRA#$b&vwyKPWnYoDKwrMa zobDXQC2bjQ5*t(fWa^W{l-4E0XjI0f>XgRh8Wu&DndC>+S!74HQ8Oa?tkWWn*`!1~ zut|yhW|a~NmdTM|o*V(D$>I3hHXMxq8*s+G=u60GBxe64%)OaBl^S)c7AU6a94Do>48FG)_)DM-vV$Vn(M$%tELkrLZ# zl^CK zV`Y3~pi+eFsg|H@uTtaRQf@N4rIfa)zL+ImQ{c6xEH_B8C_6$eFEd^{D?P&?Ew#ub zDY@D_E~%NyP26S^nK)=0k@yEPY8{aX)W}4zh>8cZs5mg03b^B52qxqyuETYF{(r6j zGfycG89)!wkKYNgW4##Z!XBm88pD|lRW=K1${8!l)_SZiDh`m(Eeuo5$cxoV$xYKw z$j$*eSw%xI#9rFUC%(hk}LrC&ovtb^0Rl9L7&At~4yn*=6P0Z%^i9rwcII$X}d z+=(5xXo;t659er6*mFudU3SZguvJmB@wOoz3 z;$+>Z!d%0!f-+N1-Uf?++)gXM-2K)*xtEcTR=&Al>6Z=W{+VDFkPaq+Y5xR#2ze8Q zdlA=R2w=qCAydhkfQy-`9Uzefdy}n4_eO;}w%eppGR?QwW=DPFd%$m1m%xW-YR)Gn- z42)e$@KaIoKlvH*pLIB&kM|#7{_e;8yAA7aMh{Vs`KM~z98%UJNh#c>%9p*>a7IeE z^}P5_rzKGxZsMWseyfApLgaim$0&QYrmDNP6lk-WYxJC&HyJo?++}3ne9p*zlc(sSF5ny-uEI4y57XI-XCSs<{++Ck888jM zCpBqc0f`$}Lx~#D;0rxyA{2PgR>bE3bH2xZk0mbq16DBiMMyaAO%zfzvs z-=b*V-=}2Oe@5A?|BaIAuHPyqJ3-C37t~F+g1TAPKlu~q;3S?!*;kMGA00poX7F{` zk5Y{FGtfiC9OWgUCq)S7lo$y-r9|;LrN`%S%2LSXq~k2+Nmo(&3Eu^FC&CunoQPjW zJ)R|Ie!N`5G zX50hlVb){)lF9l9u?ONPA)GV3#PgyMalJgBxLlDUtSjmirz^(14p(jXZLczgtgd>_ zw7429VtSP;YJ4?)zQNVC3-zuxF4noaeTnw9Q%kh3zFwkvd3?G0MG#ZF04r6`gQUvP zKRJqPuyb+`;CrBc8}=aV!e{W4^^akF^Z=fCF2?DGAhEtXi&))XLa6s;h~)z6^L z`>;UnE-aFrLjU9*!1q8Sz6Z*%e%56D<9HV)_9D1nK?iUX&q3VdBPMTV6QehaiNUZG z(HmAK+HZ9!ns3c1YVYiLRNk|BmEQaE%72L9ll_pwFa5DZV9m!SL8*~G!BwAz1Xqo` z7nFPlf|74RNa8KbSUC(c|C_D&9@yALNCi58?8*8k@EivAVz^-rw?_|ShFM(qIrbrZ zn?*FfFCePl#fkC{8KUr0mB{|orAUvNQl!RhDU#z%iui;#&x&7RJj;Hi@-F#Z#=96= zc^5)I-vapm2s;n3s*dj6uYDT*(0lJlQv^f-6+}Ra6e-es@4ZS%-*$DlTQ_#Om6p69>!3UXihsX)~_AewpfGk;|7Q>urq7K3k9)tzy zF*`z{MVic=)Yw$`5JB)EqTxei!iT8j3O{-R=>Mfe&H{AHtLo8KEvk4}Od`{09yA532BFl$kL+2uo66_9V;3krbOm z*gQeFNCzqK8A?Gb=mWdJCGZ&h_7TlE{>l*lbcy~g)SzJO6TT<1dkk&p>%ArTEkx=- z8U0Yi9vuaEGBW6kh^Z4FUI;NGU;*rb3-AQfK^(wIkj()&_}M0K2%P>1W;U8*=D-SA zI&jQv9FG}I;xQdRKGTfkGxan+Q!NoNl?E^$bb~%2Q&|s&L`-EjI0R0MnCd4|O!W?U z0$u_p0Y3+7c&)~y@md;=fXqjj+jDG`9lRJ@AbIV`V`gqVX6VCb`eA&gn=D}31p=m7 z3tELtvkUZ!nC2R=S&C^6gZ^Mo|hx&*!kKY$nDtu)hy=6ht&$}%k`$8`P+3+US- z|F^f|*l746cJO0Hx$u~!7oVAh@R>=xfEnconL#CJ5;22z_(IFTYH4OL2!Ch@>;^|< znZY^mIrtv@B8$ow*}n~$>{~-7_r{RPzcFA6XvKdRe+;vR?r8WAW08M5nxmEYG7?{E zGN0K6@|ktCfLUb-nMH|+nb*S?nkUW77t1j7URh?oMvj?pf-N>I&&&?UGxL++69r`Y z3UADxDZH|HE&tMjDg2J2q8H{&2`K++!Bl>>{1|2reJAAmF37(p7{d=iKPDg+<1ru3 zzI^5wE@TeLA~rf-irH7oFuP`1X4fvyY?mo8+deQTerr3V_{MgR;$OB$6<^t2RC;N9 zTj@8upA?_j{i*n~9aDK?%hVp*GW8#^$qT!uG`_R{7&aC;_ITvp?#Q<%0T1-Y6F!*- z*5;%s*tZodWE0{vC7|LNQ&{==zD@pq>lrQe(elz(;Hr1IQxSml}H zLDi>@XH}m#{YUkY(_@wUPJgJ~bz+*|Ix_8V9GTA7;D!^^`Na8S*f{tN?zq?QiF@r+ zfiL_JKX^0K;1~K#M2(K0@b#2Psh1upGQUnPkbgF@TJdN1CZ)&j3srt_>sEc_wo2`L zxAp4xUAL*n7oljkv-c?s-a2Z^3Wrk49X%h=UEJAF#w?QtvmjPaa=YAnlPo~AoKJv*{xbI!2^xf2Y)!SaJ z>R(Uk)V$@nLhGi-fcEDen{_|)*rj*P@}0{O@N3i0*_$$$k}-D!vg| zp!%tQmBv-S+1i(<&DXu)yIB96Z?EAQpLNEke1=R;c<(no=6%-ei1#;U2YjBJ@AYAp zyTCBm4z~C*i;dGhhIt_Op9cLP+;fjW8;$`jMbOui%9|N!FmdDaQ0g ziUapavZwHTVvyXaxM=00F=?6yqw;n4MOGN@j;J>o4r?{vKBL2GYv>B=%^_=TH--$^ z4F(?^y)O9j=z)-jqgRK#8nY^djqVL)_A5e}-7;)O?ViDGM#6j${~&k|kwW@AK}63| zr07w)EIr5&(-)cAbT!M4JDWLCcs$)-?qF(!%ATY|&EbS>y`i{bMt|)xKl-_j%MF_Ad|$A(1aP@l-gjD$^>%mdNWWaK)?J3mAIeW{dBXUj!&qC$xdRGHGw zDo6g-3Qw8A(m#FbL)_pap8@oMD$ zHOT#E8PU)z2mWBqB$@syf2H2?FwLc(OXcFV%%DmZP8RxY|~I&GkR8G z)7YwlcIWc^72`|t2HlEs_qZ42UUbjT{n0Hy@9zoudCa9Ck2x3QGN*zZHg+T|jOaxo z?s?}R_eTs)L;r9+a-P}9y_;lcQ;QC*ZL#64Y<87eI>%ew*$}Kgzb;C5Zf&B`?CMPO zy2?W9nu<#Min7MBrKJm;3rm)b&nsT%nq9QZJ*()PduGuix6I-<6S9h#OI8tc&Mst5 z*#)>8F#^e2c!J~MRsrrgBL>IokayzJYq$k@7yO{L^E7GI{88Mpc`l;PRxkMlEz?z7 z=0s>WHpUyyYDhP$s>>f$R$FdYR8#MeU)|=EUD@rDQL)A~wPM&Ux#Em_a>e&(|C*3e z!CX?ynR99>b4n{=jw4}FSPL1Lhho$oNMeBT?`lQu{e0v-3wg9^ks2-Uu;98DISCgm z^pIQ+yPpS9gJcGgL^ z*je|{z8)V}%Ut4XnR7xlb4sjajwAUSmz{Sd^ML#RCol*5=3)*OAn#d(yrT>C_e+$i zv)hDQu*8AivUs9QL)SFLnvPJ7^7d%m;)O{@dGoW(v*s0zN^7mQOP<^05Z}__6w|zF z+{~t-@exhOCqy*e9Un30l}qFt<{UYjIYl)v$LKmXb|lY|h`z==oR`D^`S&j9Z|WrK zUxM6o8FHTBy^f&^w>SqcJPN*b4qotwQ}BG|7&4EI4V}vzW;B1yW1N?+V=bO)Kn&o+ z3_*WwH~g0s_&o^7gPZ$hsc}G?W~~{;RSt|3mh^kd=C2M=%<2nMOIsDIowPDlKdv{| zII5@2JYq%tsL19l;vhiTrm1^jG$v{-Y1~f7il)*ubZXjjB|-$&3m%jpgTTnkY)&=qs1JAw)5L zFiI_YeUethx-7jJ>xvD7*3L2wSkq=PZOt+(?|}{0Q`a1_@mlkhwO9Y|HdFeT&6HKl zcFIa-J9Rm;8OeX(37&353>M=4KlJ;d-@O`p5a2`9BPA)?PLwk&rmUU%l)lrJOWrx2 zA3y9RiXIM>jo1+`p0OidIcR&jdcgKVt!dk;b-lOEHJCEA)M)b1pvk0d2Tdn#yJa$Q z>+h!So0+-WMrPr*0j&R+E0T49IoOVRoVCzj4*iAjVH&sKIS|N$v-c1s928OPAvKCV zY(|lX9k{T=6Zs*BeTCBxhe-J!ik9^~l%g>8P@azo9^|&fUuGhsh;kqu3|J;gPK6j>ZH$2GcW&r1SGlFyYB8fNpi#)#F ze`*9aw-yMkZ}kbSzT7FY{OYR6@;}c+7B`rT`E@2|_8F5m{gf$;WIK+(0j$4n#D6aI zs}cJ=)Iubkg8n(=;FsY6z>9FcDImMsvSfQ#m2B=A(y05^Wc9$AEFVlJi|_r(>|q3% zK1}9}9~E#$kLq~_KXmf+e_YSg{qYD-=kYzB&cio6?e7_1`vDVZ-Dkp)tb_hC=(jg((a0(N+07}w+QNxnodZ8}3NIO_@B;kyF+EuS3!py>`uWG8 zkFn3VO5}YV9ss<6(chv5;vSLVWB34niQqqo;d!XS^Uxs_5}*#k3LeB5_yH483*iM% zAPAm79DIm8)I&59ei0J#{A~b(V$T8lU$kTW)j_`ibDwkp`iQ&Ne^3K*8{P}FO&$_y zLtp(BL6e{<4NWo0GiB&$Ku-@Igb6;^8aRT9zz@s>xF}-vU@5>r*?I5-c>NKL&@Y01 z@xNhA#fcxDR08zS_D&=-OiLhxk>wKBwPz&PLq z!T?Uh3?~4FgO6qTY z9{up*m~se@DJAfjI2TlcCeRL+f@${}I|Qf<5dzVF^G@FyIj*e{rV_0c`eTV$m|;Fl{NM>i@R)uI zkLeb`7pj2;)(qOgQm{(Ebk+%&?iR2M90F$rZ*)HwywZIn_+9UX;8#5+{6&w6p6fBG zXDBWLGSBpw%+nDtiN3u)u`#+p510U~4SN^(Q&V}&DwM}8;^7Nr^Ow_T1E*H$drqA)-#M+2{nn{p_8Z5Ia$h+P%inT5 zEPvDSqQZ5jI|`pTJy*Qs^j77(6H`6w#MDkZF?Bq}Mg0WY<9Po8HV$KVL*6?H_uaj4 z-`NNK@xh$43x!}52^_=S|)ibU))lRrQQ9t7Pm*zoNrnTRdY3~Dj@p_jV)7m)# zHbF9Wi9T|^Y4897(4Rm&b1=}5p7>kS13wq;wx190rf-7URgZfvRX^(4r*YV0gXSTRVXXrmN3{2OT+!L>`A}zv=O238JemGhu-TL8 zN!Z}YbO%SkCPH(HL?5~Lba)UUa`Y-xk)DL8)7@Yrx)tI;*Frpb7lVU@XM$p6P6VbY z91X}+bLw(A(y-#bB%VKEuu4=L|Rcd}p-Y=Y`Q4 zA7+bEmMGpeBTHpRNU{1*aJAN!pxHW`rq91W`f_iWIPf!1^2rpUooM`b?`NFJC>uHaYDKti{Cd8 zr$(m}tmsI>c;3EvAJNX(P}yzKv5K3c(o{D@=4q~rDAO4TuhU;0)?&DF#v+rR(B)>! zL)KU<4cTV7IOK>`SIDPUi$Z>~Y7hO}aseJl4$SeJj?CuGV5V&&VBW~P1BjkS!2gKF zeMaOPSCaX3AsKuAla=Xkni=g)bK>ntog&KoqN@y+ zM$I-}GIPFJXXFx#MG>p57DQ~eo)^B~rZxPsO-sa&Hq8-lteYa46|kHW$;@X*GP98| zUzAh^6FrK;v-Y8XH4QcQ862I=z@C5jL3^@|XnVE;Z&TJ};kpce+5Yr!#op9-wdKj_ zT1%4h^*R&E4Hw1NnJkE#Yu*;yY1IQQ=72Pt~z1o=*svrqstTS zfme3riOdF!0#@Y-%wi-T=Hc5!!~h<|={#KP7b5pB!u>ux7h+Sf4y`M(;rdD@2v!t% z%Pc7f7I)-FsV&G&(rV4l)N9TvG@6}RXmT61nmtrLGyjfd2{Du0FMiZGSN@>tDTr75~qB{_!W#U&;sg|!xi1udiU@;hy_ z^H$ksX37Q74^!rq|t;HH9GL-HB1sU*G-dem=&sATNABOS)HU)TA5`~SW#r0S6*eF zT{g!mqjZr?YDuqsQt_tI3B`xT#24QHzuG4hGuwnBW|LUNMkN(8tE7T|`31-1w<*Yb z^HFoGEP?w>X zGb`UHv$n!CwWiTBsd|BReANoun92?IQI!Wq&#e3m{9+$f!EAwbbU7OpQ^u@-kzEVGWNUvE33exF@v{U>&zbP)G?bGv)HJx zT4ohq!z@SgLjvlNa4epvK;DVV@U8Fw*33ihy8yX&JL>N{q^Z7BlWMyxxr#0)eo5zK zQGSP?T=t?+#q{=Q)s%%vnh6WCbYtfi8_b+nYaHG-*DSQP+cJ3W+ELTz?y(M-d&N4S z<>{!vW;QCYiCF>5>9d(d&6#Hs3UopjSLp|JH5&zVcA5Hht}*lN z+-2e2dC9`N<7ad4c4qFqkeT}|U}iq^Kjs$J;%US{;sr{4u>KB-2ZcF6dQ{(w+@lZq zZ$JEpem%+z$SyGj?b4;`yR9f-w=?*AyXT9Ycl9bd?%t*}cK2DOvAcd$av1(wWz2S_HhLRVA3el0Mk4Vew`2a- zKz|wZ=V9!1NXd$~;vV=8)Ih)s2-=Sv@Q9E+jw;jSqXsnTm<_of8%J)(J!!)60M6xj zIM4Y+BG2(ej=~D;c}m*)#3QLuN8id?9bxj8hnRxpK_(u_ zu7&U-5&s_Ox33{;*oYd0ZKwg+jeQUYh=LEJ2IDxM#c+m4V=hXQ{Uv3xyQELHmq(G! zWoNRv;z8C|{mJTTBw2ovOctLMa%P`2aHiL~c*fTTc}AZerzWJ&icCDQ#` zhjhNSAf0a;{8Q-uO87VbjSxSR1t0pN zh1bTw2223{ARd$g#DgIo>>#)SzWpDVmMt;uQNRjziVZQPF~sEHKk%n=j6y*I$OBbi z4p<1f!76lVE!YHhfPLT?xB&hG9)aJ$Tk!w+PZ#?77R2<-(GN>vs_2v03H^ZoARWXp zVJ!NS#W7wPXaH@X3#i3HL&)m-|g?4fl)oChocRFz>1MA>Pm0=Xj5GzT`dBdBWY(eZ~7um+`;VV}jf8 zWWM=_uhD*hS;KFzL*0WN{09m4lJ|&(tv#_(uIP_1_u4F+duf`){c4iSJu@li{cKXt zdt%bc`_ZI>_sC=!?|YNgyay(Oyn80wcy~?r^X{0Q=G`{^f_Ka8G5@;RE8#VBCc0|Q zq%NB?=}QvKK8D%DZ*V~FJr=pI6YhOGh$LXLwe?7LyyKypu5h# z^o?^kchfnE_nC7J{}ZQD!4;=j!b?sqq6<##Qs*4IrO!C7k~!r#D09+ryXv)ij!ZrH0@Vf&QXibGT8DQ)rW zQrYC$tGdx+z1pD14)t{&M>N)We5$e9^RebC&o`RAo=kJado-4NGWC%#H;jEU(JLS9 z^A5nX?t+kWO&8E-{@DBAkG&7mE$K**3%5VWo40Fvh;T<>jP%xkRM}1bc?ui+$`sf8 z%~D=7ty#6-ccFTp?^4Z`KK)ufK3lYx`y9|&=5tARiO)mbF5g$W9llIwkuTE*TJ7)A zMEedl33K3uoD=%DLs5GWj(jJaqjNKabTU+#4u+Y~?g&S2d$=cWOL(AgLs+Er+R#M# z{*X+?zTiUT-k?gg6+sOe%ci$!EeY(>?h0I~+Yzuqzdc~L!GeIZ2J-^$8MFreVK6t4 z>9+(jy_V@r2d(x%WLD0Mm0up2rnEF7S7mW{sd`6Pt!Dd-W}OA0?RssY%MIp+tTSv588(^|a?-dl zudBC(u5Vfa`30+mP6fv=*F>p=H-3i94G=p*;pnk-H0Q#GkS z&5C-{U3kmVyo6n;)1?7cV4R-941odA|G=+-SPPhkONhbIY{Z}d zb5MlbAN1h$k|HJQC^4k@CHCCh5;y*wVsELsq9D1Nf=F>ieu7GAUb=cwZoXE2PPuMQ zc7s7?_I%^?tYxMtnd{AyGWS>{W?r;N$b4*`komW1VkR?A%wUE|8O&fL&w{ZQ;^4(0 z28Z&I^OYb5xCHDg!#z)2g0+_^(7XygYN@p0W>=2q&#LeeRh0+Il$C`m6qm*-7nG!` z!1S9KBz_vKzk)>|EuMwsaA^`W?50~EN5P2t%tC*#!tGiI#fQd zDq1nCGFdgfB1>C-}sTS*?9&=C!ebCq-MY9^!scN4;OF@9K;-KufegIBVh*9(}v(!g)_km0n( zlSaOc_YHj;UK>oSX9m;in7-dErZBvPMzCf%p&3MGWR62GE^@ zw4}Tfxo0=-zb=)ggym`!x7?IsR*a^ZD_pto6;t^$mIn%hmxoIQE{~J(Tb3^GyR1+= zb!m;V=h8OS$=%DqS1&*eIuL`! zhyk<=<-PFa&IHF+Rc zX5v7loa;b~!uWx1@wkBvip~Rvm7E4{D?0W4q2#!VsW|q69;P~yqgeYpFz@T2zoHAr z!czP$iXQ9%ScMq$BmZ2B{C@*#05(h0)GaFHwZ)J;x7v`$Ru`H)ymz4d`I%&J112VQlUrO6<`CZm#6O*&vz~rq5nZii+VhwD9 z{wnBqEyF$_Xjctj4Gf|NU=wN(hEM|mKVaf6qH%lW$Z4-SIqoy2vHNVvVZRHF+3!iC z4+N0?fe5laki=OZ$md2KtmRoAZ0DID?B|;u+`~6Lc%5&$|AoM0FB2N?VIt$**blPn zU$$ZX2cW+cac}Fx9ISiq0VQvP2ecD4AbXI5?ne%O7&m2bU5AuD zHz&o<;XmAP2h&Ia7a!AS1{7=CHI7ei45j7ZBi8Mb$9{vUL@UMj={jD5{?kJP+jt&XF zGb7$zd*bf7;tLQLL=PeX4!G~J*dJnnKiq;ZXAI!cbD|&E|Dp@}b2h&BfYK53na7}i z8Z{{Au@B`kJOKCsdN+vFzJv#Hn@Hv^5$}7#Mne%$0LnyR=@54kdKPCF(H-LVOpauFB_~VQN*avh{QUip2Fjue#2V*wHn6D+F@*Y(=1k8-w~puPt$L=!6S(FAxFnMZ{)secrOxN{4;tE{sb7t#~>z59w-6CgqeUb zzysh*@qhY(gP!3ad$$+nCffHfRphs-@E(+b0`h*59X{R}c!FRM2eLsKs0VGJ6D$XP zU_ICZhQWSt0wDLNJK!hqC;0#TrvYsZ;p^(`Z}mT&_d3yidKyGvbMye zCJ^KM;BSV3B#;X#Km(A(q5~`gl31*R?q)bH!_YiPf6y5$nd@+Z9@4K^I1)GJIe0b# z$@>p5185s)!}q}c15K^>{ZK|MBz}-Q=0i4!m~<@ql!19F28e~IiT)NXpf{q$^jg$I ze~S9)rDy}a5N)GhMSJPF@C5xV{DhteAK>soPf$#R9Xj*?g++k-ZPsK~=XYopUB3?_6 z#arkH@osvkc$B_Zyh3*s?-O>#(3i@L`$C0rcybH(IsBOG{{=ILzPS>-Aovfa$iYqF zKbT-n4B?gOInyg`FM6RBOwTl9=_kz$`cbop9%@$81I^iVPjeo9r`bh!G*{4V&3^h? zb0dAJwUfTkI!rgTFVUwuceyLNzi}7!81Ec7qsREC|1U5r=#PT$V1?Y*2Ca?cy(Yc2 zFrgQw_Vkmn8$C4gqkBdX^sP}6ePx_OUl^Cs=f<vu3)jNeWO4~T;wgVsTg z-i}tHUu?AK(I|7eW93MeuCWbEBq;uzO3b-@26}(fn^}G|dbNR?H-G^+y5oC)t*Uj2Ad?)bI4XES z(8t%!_mkj1;M8_wqJXZup#EXJI-Pd4praGV(Sb=*xjmCYxSbPY_}eF@3Wg@;3OBo# ziZ;5}N^fv)l3DMzKz6NLx7>hRpZsdKO$vQ(d&Dc<&Wd~7z7sEVd!g9v&J-8FN8IJk z6hJ>ft2;69@-a$mIPJcV?^1OCGlW7<7+EbZ|2;I?`P@HTo!@YhdG5U!b; zA=U3yAl>IxA-i%)yOr&WtPrp-}o_g$d8z;~(2Jf8uzR-YmD7N5f!%|4%L z%<+Ay(df(68+@4>Pyxy#VQ$#};e|PX{*_SVz2V3;z#hEbK3$GB1#8o~&{4E{h6~p_ z!;7~(G)T}L8Y$`uNsw6-oFTU`C|_Y-P`P63^m^r%>8+}B0=v{316OI(2W-}y6>vbS zHsGpOb>I`Ns=&WBD+8H2Py?z!0MH$xDAeB~dBf_^lfcaPT_!=z{p_G_(* zG^Z6aow)9q9=y(({(|<%Fwy*oSee%FRJrEx9Pym65~ao&H7a#8n$&7T7im<5^k`Lt zY|t(X*{f3;a#5!^3u?8OmAos`EPsSkkkH>X(0`y~X zuMd9E%4ii@7HdL_;~cm}aT9s-<9r3Ju_2_vMy%7x58tJm7k*ARC*mRaOFJimX#x$Po*Ti`KJY8%;Z87~`-t%$h5qhj zjvi!0*tluF~u zREuKk)C*$TG;?FRwX>tw=w?O_>!nAZ)=P`N4_@n}MKdj+nI6s5f!YV2;TZU82Ic|d zJ_P;YblmgKK<=LkeZ22T5z~S+J!(z2p{8^ft|5I2e^y$6s46u~x;!;jt~4b@T$G%n zl%G_hnwwOso|QOPGb5o(J2jzSH#vU0UQ+x?y~Oyt;7^^zc%}t3lj4~MQ2)Raf1D36 z_oos6eb67mrPo@pA`|*}KQBX`TC%ihcD5DO=QwjU*^~Jd*?z*(tWfEq%xKyC%p`@} zj7+7h^dgn?v}*O#v?k4@)DG>0ls?_Klp(#Cl;e8QDR;mhIx#6sJ0_WF#wIfjp#FhJ zI1aC24NAN)i4V0oA2}abk_&zGr72g28n9PvR)INH6^!M|3nuc43w#6x`N2}Td66<% zc?t6Ax#^0jIr+*-*%fN>*|Rlbv)Z+yGFR$EW^U1q$ULeSp7{-UsS}>Tv?4N?W<)yE zh)iedAGn8O@DkR6#0!)7P#cQje-z<*zYzM!MeB-$RE51_<)y|{QaYL|EOq7Ol};68 zmjsG3O2Va6i(}=Iic%Hgi*l7>3(Hia3L4ZS3KnRF<@acZ=5Nvo&OfXhl>enpP~Hpe z;9RB!G(&Qj22lUN?FjgBn0JX6HU#~(<;Z)>5Cbp^UQh+z7gs7#VU<4RRohT@wF{R~ z?ZHp2_7f&mg-XR&M#;ohCdo%tWQilni(=|Syjr8l+x zN`BMwD`uMh#Z1G$h^Yq@F|`le!0~tjG1!f9Z>qprtj0R1!hK&*iPwd*aPPBDfimm0 zDXqbRQX0o{iH#F^ag9Fwn1&$X%!UZ5@cMX}8FlIM!F7e=z**JG{jhXhF8t68eY{*-K&bJO|1kKO!Wg-6EP2%`|TL_dgv{$NBu<|?!BWeX+Yl5 z1plEK{=-~Vifc8dm{xm=Y8}r-v`*oLwfgfzT4xA@=Eg_`%uSK;Ysr)IX{l83YHm{W zY+kH9xp}S1#OA%K?#N2$!j zMU8T6i zVgO2@ozaf^gD%uxEJgl@=hgVGP$lmbMl`j@hP-;lk!P<5dGt=>CiRB$+I|`<_eE_A7ptwqM3%?YhAdCTF+!U-n_mZJCYZ5xPsD zId>8KhYt7;NL3S|9o_@|KHP&JKn=uN1scCjlU&xB(75$>-e?8su+h0KRN z$!s`)Om{|-3CwomoyBCha}F8o>gM!zZRT`$o#b?PKH_wD{LRzZ#&|kIjIXoxUp7I1 zCC1&=jbot~$0BsIpc^{~4+y$bx1k1O82Y=h4`UzpA{-Qw)?w_yIIKaMM~q10h&8Dn zbtbi=9;A9KfK-k}flN|9UPVgB7n0J6wIn`q2;3#bqkoa&5k^Xf7&j70EmAke-imQo zufZH_#6F}UcwjrR4`dJG58crR;Q<^$9l~+cA)H1H#5p0!T~Z*KOKK#Ix)iBPmS8N2 zE>9xiTt%C=&1$9Um z^9<;ELw5pn?V)Q5UEOm;s+Wiqt|AZrjEK7-gD)B}E zN50S=k1<~F(=fiC}pu0-=W_zk=OFTtPSHFzt9&r|>^05QQ{q4&=QK|dr9 zsgnGHDu%`Pt{(_CLty`bDI&j>f5Vg|-iHB(01rUQ5Pt(c2uZvcS1=7kfMk#dKKKu< zc)bWL1uMZCuo0iV9p1-2_z)-He_VyfaSvg74nN`zLdM`p{NG{f67K`~zLGX#0SNEk z#bO*LB0OfcKyx0sT?LoXFb{lNk@s z#hd=(hteOsIQo^BK~H%_^q5ygk9dvrkl#uV_>1Tse<|JNucAAGb#z;>mA)42p}h%I8=<`KN#Y@o-)1E(xac$t>}Bz zadbz;i@sD1rq9)4=u`DHx~h>+mo>`iqGlbP*KDD)S_|onRyUo}UPULg2kDsh4mzrH zm=5b)rUSa)b9?k&a`>%AykW2dZ2vEq1^gFt=v&~lg469U=6rf?EJr`+snECD`gB9r zj;`vv(*;96Itwr6q;Vo0H_4(SCPj3}w2BUzHgNk*Te-ca9o!zX<=if_0dA++7TylC zeY|bv=XhJq@9;KR{Kg-&VElDptpyXTK|2yQ3i>#mytKoqb_`DAIMse>BcLx#Wa$%A z4LWCGNhhqv(P8VUbigK<_S#0%F547t*fxjTZdbw$+12p2*v;l`wwuq}Xt$U@Xt$ET z-fmE^)^4X@!0v>g-|m)RmHjhek3ADDAI(I|z*2iA90{{UAMA1Jb;j2WPOqPVOXINr zV6=!%qE6`07-QNq)`5ndCee_yKW#=`%tq&UZoP9lZ>@7aZ@{^X-|sw2(C6GNSn1p@ z>~&f$TH&-tw9ILnRJZeC=_SseN_RT{B)w?d-_i@mG3f>Gk(%$!{s)F}|LTOV0({L} z_k{KoTxWaWI@gt>edFLiOwgk(Znm_+-Idl&^r8NVAza_YXkPEcWd4eYS%PKmMZ#|P zO3`BXMyW3MHmMG`#nSC=eKHH&Hp|X)+b`GVc1f3ggP(QZax)_aSn-$#{trKaIE0 zKa@Y;FIv#*mn3ZQ%aUrERwzAtTBU4*Z=+nD?|g+C-=*RzpS6k=KEq1oKBtsQeZEsF z@%=-w*q4cc0+0uC#Xj%(4fFrqR9xN!;+cz~@IS(Ejf*^dE&9^u&(ZP#>^qolL>tpAUUm9*pbE z5HT&Bp-1z=ZKx&Oh314$;TpmN__M;p1T|qXqN=bI>GB!bGNm($<%&aV6beF{74t$m zm2yH>sbqz0Rm})FqM9CZOEoRzHNVIAH;3{Hn5_m4)-8Hf9P zG0>k0ef(@e1oj=w)TX8=OKOaEqPpnGTut;eeq~gMpe!m%R5CMBs&Hn8Onzj6TyA8g zLRQ3V#f*q{<+Si#m6Y(!s!8F8)Dpu#S4#;0RW%`;DJObfhyzOxw3d4UU7VopfD~%m=_l>l@pgH zlNFmMmmX8DkQ&pVm>j)8IU#z5N?g=Nwb-ZwYSGc3szpaVSB;Kh$}v$)35a86GKCM^ z$2?p{+z&%@I2r3O1u;Oo2z_ZmOd1lTs5ViRs*;VVJjI?$QYLVPDPFw1m+K zL@X0W#xey7-(nulWBw07b6XnbAszV^+IgwaM_;N__*9;zOeN_CRG48yd6_O;PNoMh zGsBOco)IcYNskgGr6)$H7Ws_yHxx#FM=m3 zei=;3KZ7ayr!#RtI#UQpdyk|x?HDdOcS37JKIS10`9>~c07^kVewI}rPw7RPlu~R) zNyTF*q1cs+EB4|=7YFcX7KaHUief}Fic+LPigIOw3d`jJ3g;;J6?7>27W6B57Yr*; zEjX|2mH$|2Y93RZn#;uAxlF-3=U*hA;333d8?@IT63 zg$l)18d7wX4b7|?M-f$%xv;8fywIv(esJYXL11N~(7!TEYFb69jCVzYoL70fyk~iz zcyjr6@uc#z;)&%yh$of4RhU%56eblj`N(H#pGrMmGv<^y2&*19;OK!g)RoaRRS~bfITMk!W)LENS=p z`7*Bcy|NSPhvZ!9Ps_Q~J(P2)eIw^m!{l75ne6zge>sNrH;ge4R$>1I^jbkJD5}A| z?^)1qg#XZl`>!psG;OXL`L-I7cbg4OZF45Cwn^kU&zJL<7tBp+i{iPrCG%a|@&qnz zRYK>sR*_@d3aPQJTcpOcosb&S`ao(-%U{xCnwa#MIbim`?8lsM!FUH6u>YYR_r5^| zUgtnN0oq}$xc|HW&!Jqzqlukja_iJ0*DgEI)#t)e&T| zI+@H@7m(@ddd_5ZCug*J9cQ@uAZM`pHfONvPo6<9;~DfYzQOW;*-{4&2AWHt*D@dL z0BzPH_zzw0Js_VR+yn1}{s3~ob*O>ZAVY>5mC0bE4(a20Eqa^mNOzM9>1_5S?ahIt zwKkjmE2NoCXTq`HxDs)PTq9xYfm7sum5tVL+n!T-r$ zisN7f^50dcL4oe1^{Bzv2>q?d!MCFhU?=ur?3Nd z$?l6InSFUAy|00!_H~nJ-xdr0 z*aQ!12+x8ThW>8o?}Z0&0Cfn5kp~~+k?4dR@lGfccTyWS9ZXPUWdp|Hgolft(;+AZ z#fkR}4z@E`P-hUxGx&frUxAnSbM_HSx^XP@zOTc{*?@f@&<(~|J@HSRk&ll$1P|aS z>JU!AgE)=9 zFTq#n$2W4g7*@gsxjH-%ecTwcgdgIFi*&@}hj5SyzXFT?M=a{cI{-f9$usabKtDj; zI-CzN*NMCE`*5H=X|LrRF z!QihcL09^Nu0-<(@Dso=knq0uLVid42YAInLmIw|7|8-+@@`MeXtd~u;0;1S0>}a-pa#qV^FSw9 z23A35Exduv@IHp|*$3efoP+Q2C47P>@FZTrK4S0--tW+nydKFr??a4yUs?@+B03-oY(?a;0w(#=p{fa6U(3o%b^My4bYeiZ)G7g zmSB1I;IjwlK0Kkjh{YZFQn%?MeNErdE%fX^7{N_^+70mef5FtDtAc!AEQ7UyRz?MV zk<5t&=Fkzk9?+Z)y(nlULnjB`N(nx>1~z3Q+@4nY3f|N$cvCkpC)eQ#eTrCo0$=J1 z>S8WoJ}#ig=P^rX@u_DxMyG+K{Q%Q~@1ZV4EC|2{P((e1yc)fcGNfPl*aJcnpy@;3 za-sAUFP3id(&#!rkFN2{=&GQWE(_++CBZzpAnc@b!WDEzI6$XFo9U!z4;>esq@&U| z>9EXCbU+Sn89bQ13aB3XFPJX$ahiL#-Ax0Bm#PT?N#1IyOmGSF6EoF zUG*t#RYTHi!P%^=&Y&>olu)f zM>K-zpk_4f*G#58TG_Pw|Iqaw@Kse=AND%;rZ-aP0YXALr1#!?@4fe)kPt#bNPs|q z(2?E|>4FLfiUoD7V;kG+jCCCA=vZF+=y<>9BrxN=-}n9eJ)V0nH+!vJ&OUqn&pP|g z>5+Tq49KOqqjGWX2DxCnUC!I?m$SBa%ISF*<~{HBv(x2g z&30FvW*cmE`EOJW=zp>MH>TEC{K5X~mv?^x8`R9^*EuM9LG`R<%++zwfki$VX zvOmN@c87Y&&hRkV7M>`Z!?To4;YG@Z@Jh}4@CMCTc!zdP_%glG@L|1?@Nxa&@SO%j zVMh#Cgk3OP9`=w?fB5T0ec|62^@i&Vmxk*MK!1`jj_?ijeK&$O!1hDQ)IWt-Cz1P% zTxt$RD6*TqTXsavmn|`iWm9ahjK{^wSX`R2CN57IjVsfvimlbIjBU{yjP21Mh#fTO zj~O%Ui`izhH0GdDcg$Jiu9yc*I$~ZnX^;8RxGh#^)EcWZY>m+w+(MtD{y%7gH@MXQ z0o&Iy$bEogsoeKZVvWh?TjO}{AYrboPjr>FN&YgL93`ugla-<5Y|V<~BF*yTD($l5 zM!nvoPW_&weuJ*0QNxbJ&Bkqs`%GFAPnk9)-f!BN_`GRD;uog%Njl^DM4b^Bf~`S)P%gEX&By^kx)j zdNRtjT^V(H9qDcQZRx#+Eos9>jcFT=>(lm_)TW&@tx3CLR+aXgS!LSaO)JxWGpS6| z8CRz2jKJ_Ve`6f{kv4b;+xHX@@3HjRlZ*Xq?5C6O&E7e~SvE45Z7<7nJf$x;M3&~p zDqXp0N=I(4rY)yL+mchG*Ob$2(2(6@SeHFyRFl2lq$+EdX+_p?v$CxF%uBMKhEL6k zv;JjToTW1^$WaRK^39D;L8Tyuf~s|Ts>KyXC;043#6xD zv2+y%N_%0n(ps3TG#6%T8VifG^@WvswFQm()dgLK6$JxEW%*+!CHXr{i}H_|73AGx zo}c%md0yVfW_fwPn&#!{jPrAKMz=8JX!sZUL0tx4Ek=Vu2iadr8}NDzUxx6bpP$L= zDVipoCAQLD>MAXze$rGHp)`~wD0O8SnwruAZB=QxUPWoWepyMoL2*gHQDMm%0}FQ!>VI%6=(D%2Te7wQaEUS<5<#MXV~oQsv@z*L}r z@OlJ)`e~PLd}=G_*@ueR(pc#z^;O@&=%7%|SO*kY1VamX~S;(}Rf#bahE6@N2JDgUQQN}0|$wM=J}S~`g#N5dBxe`-$D z#cIyS8v23bx+>ZLy78x#V*|fkTU$F_s_X2fvfe|=>w~1UK3XZRPgV-+vo-nkMcUl@ zD!uIbCjHF19)t9{VWX7V3FD;N115>J=S<^kABDe~#@GC85?`$|2BU;3o#AbsVf z57P#h8qsAt=!bS<|2A?^pqoy2|J#67*-|6gVzDg3^85?V~+ayLmB@RPWKaETp= zmzaSJC2F8Ri5RHVgbg%nLI(P@K?7^_0tR;J`z=4I@4Nf~eV>8<(f8^9Uf-urr|;7X zOLh9bx41@|pTqv49`s1;4Z~7s;(a-`bC#n&3=;bcYb9!xg+z?Zmhh295;p1~p`(Ek zvN}qFS0_s#Cvw2*QpIm|z2dXFTk%>wqVZV0O}k|EajpC6`?YSXU(vdae5-XE)@j{V z!q9)Xm$tsEkMp6QvAB%56FQ&<+eO$;Uxog#n)rVm@#h9(@!e=8J{xVtd!v(hZSoS& zO~K+Z5hF_`QpJ5DU)(0D#C4)goF@hq$B7AL(ZmtOVdAo4zxhSQe$zL~!VNmj!u7CD zr?J1q`K8Q_E71do7>n5Kg+|_&jnD^cxc|MLXAm|Ke{JC&{0?HUU1s8V2fqb+$2@V^ z?JV}Yy=39;V6od1D+~6d%ltjXGH*}4%-!23bM~&6*?afPti2ay#@=UT=I$?L<}RKC z*$F#f`xJL!d;ePI1nN38$~eGo)foE2269j)cm`!V^}mDX0QQmtaex^3h$1tOnaT9y z)?h0($DPFbxTjd12!=SZJdq{SPLzwqoo!-%=P>LPvpY|V$%&g{dg5a-JNBEHAJvKZ z5jZ@>5$x^UNKP0wd!Yfl#arovoy7mx4LV2;#1ZzvIKgumcaaBhh8&FZdSY_XObqX~ z7K6KOMgML`(Yxp=nu{T#TuflH;e@})$##(eb#WXH!xea)#mRS|T&d!C($6D#rE^?GW-c%QP5nB z&{)jTMXb<7W}}JN6C5l-r@)U7QdulB;eWUkXz-8d%#Uuu$M6&22Mivk4``4`tcGVo1Sx?jV&yfT2;w_tRaC{rygZJSB_z*tUqD>g1htMWp%pf;p0s6#Zv=IFG zK7*U5jEx^xkgu@|XyBip0{-ylZzt(EhXzF5vmQhPdyKt6sILR{orzuZSIB>P6C3Zq zUvJs_lp|eA2>IVy2+b=$;mbcw(N<`a$y^XOj$x1mjW7Z<{J)=vp;Edx&>iLxiK_9w ziu#(I5Lpb|^C!-ndIm(*A6nr5>JRF3AiLpq9>y77UxEifjd1@=wEYFK^Dq3T`#1mO z^>^^~Ep&!Ff1#-HzN$UU1Y2+he~5)lD1~}ZwU}iv3}di~^0%W&>_Hzmj3#j>+Q)fx zf-5Kvk7DOVbcv7A3x1fi_Z5Gk^5rdV=Va(iv28%UfvDPpit2~j7OEZ;4yqoMPkpMW zM>BT1u`z&+5z1eO?lD36J1Kh~-E)*$pF)$kh)(bTG5gbK1@EFse9h4LjUl9)L{*4> zkIo#s#>D#y+JmT|KTP(CEA-#pLj+vqH@YexQsR>P+}2O$<`SjACOP8(S%JH=4&`G>^Mbqb{L<+>po7#%}W0kI-@t zQ}RRqALy*mA2_9SOgB0sg)wSEpITyfE^X+F%>YJL47Eu|T`Hu+DioeZY;=4`Ze-^ezRON=#r}j{c^={Sne|% zlY5Oe%O#`Ta<}mjId6PM&Y4`5GfZx$Oy84}=HJN)Ha$KDNB<9#`il*xI#b+-bGYw3 zlj+Y=BQF`7$}>hYv|8BQx25620z<&3w5uW3il{87!w~$H`rDGUUXZLOC|KLXOxr$YI-d zIXJIZ4$K>pee>4J-uYW(_x!!GYrzTGx!{s)U+}1GvwKrEFZ@Q?xKO8Tfbm_YA?IpJ!R+O5ZSgQUbc9o%Y;Y1Z1gOb zanCwg@7b!1c`jAfcn&J7y~dOguWg!PuLGJPuhW`AuWOnCuje)W-hbCD^ZrfK>#du@ zi{mZ6cIPsL{im7MAB)&%6p zXh5;73aC`GM@GWJWo1}` z42EaOKzM<&JiJ_47G9_HhPP^Z!h1E{;VZSBVdL8Nusifx!;b4Uhuy2+6#k@sL-%#txhS!>pXoDB1|HFy&e-iOt0`~D`Ycy+2+GH%8-&}~8F2j)yG8pA4 z1JS{lOpekWQ=)XnRBPH}nl!C3-P-1u722klF};SE9r|@KNA+uBE*ezF zJZ?}K^8x&W?oQ;#ptOCaYo$WH8oB`r{Tz zUxK^zBm_uTVw7|wCP`ajmeQJ7s5B>5Xc`k6H1&xc+S$JtU_)7%KGHhPWB=-SWp34Njti_jM+GHS+eGif> zr90VHI#XPvE!9_A)54`WEnXVa(v|wOJf$|RR8yT+tEozB)mEhS>6N99=$E8!)-Osu zU{H{H&M-gqVZ*%Cw+(Yse=x{R)#>M^==4CFo2=93!E4m@CN}TOL5Ip_jm>gn9A8%9 z%Rn+Q7wyxTW-e_Rv!yxHQ5rM7q&_P|YO`XcCObu`%Fa6HK ztYQ7UtWEm4nfnZ~GtU@iX5KK&$ow;WZ;+9x)6dAz>8WI<>$EB_(&i6g^I|^dLLTEF zmo~tkLHt>oLA-%a&Do~XkTXN-a_yxiZ;4dp1xiJJl$7NsNojtjQk-9?6y{fG^79+D zxp`gM?7Shp%)AZy>3Mq%((+ClrsO?nn4I^zVRG)b2FbZP{p1{-UW!V#POI`9b$tN4 z=ZZKN3%TE1fc}9${rJ<4FHIb4^Npmc&{`^r7D!o3JSNw`WT+#m;#1-oF;|g?o@hbW3 z-Ud(6=2x+OS1I}CCFJ}U6Kg;ZJ~i@r4X-Oo^rWQJQi{rKrJ&qd@+!O~w<1JxDq|$8 zGDR{fbCmSTVkNb*Mw498rcJEq*Tz?@)r+mzsUKZ&r$JQ3{RWX0FB?RZf1@8+rqhco zokSa1qSHpfW3>6b*gsyu`A|;mUq%~12h>0rpBGkg->=$SvTJ5ZX03x{)Gm?qx&TS7 ziesJwqdcie1 zz2It{Hn>Ws4XK>sA=>`#D#k)J=L5%KSPIQh!Dj`vthwvC_uPp7&}1!1XfX-R{0?Y~ zkHocvNNj7YM7O3&RBNt8w3aF1tqn?OYquu2b(JQtWsBCo<*3%LR4yfmSNds|TGqGf^N#ox0jK;nDbB%kK zPUGGI?K;ikTijd2ywE~FU~9CA`|i*JRlG02c3LO)dx-!0h=2P{#DBT9_zlbx-vKA_ zS>Y+(D}uypMU;51NR}moxw3e$T-*nn#dWY>oCn7hr@{ToqQMJ_!{C#O!-~%ohvhoO zp&yp%G!A__%_30uCOV9rO&yHIcJeQv0ZRES8`}xX*n?q!Jup@he~)OyZMC_$teGy( zYZi#p8W(X~<0XsM28+YmSg~K5CJV<3#cr%t7L4`CysRdcT?70dJY7O!KdhBn){#NYoAO~VMG59`x zu{dBM<_D&W*@5|DdcYaH#pFPk7#~O!qXT(jaIi-74=feE1M5X|;2=W$y`t=YNi_Sv z7R^4L=}tR82lI-z@6k^ z++`xlU6xENGr8em2d?0Y$QS`k@TZwrPSeq+>BQ4Gf9ec8!S_FdUqQz}oI>3fc@1@4 z3UwP8hnwjG{v-msz6Y>>1p6n5!B3F~aF#rT3&g>fgh|B!%(&24!7LUZ_`%+_Bmgt% zMFA82RVEj8L-4BqpBa!_!6AvR~; zg9dQrmj3bp_8*2vRPf_b{J2SbJVtw{i{U3+&_(d$sc6VyfyRmcG#&gj2A-x8Pd^P` zz;A#buy!x=GIndIZ|qB4Q|Sm{z4H1IV^zTd3ZsKu40TX zVnI+~!_7%sp3QM0NWhPGVu6d#yDdN`zk3Mn;wt*@E@52ZB1SqEDhR`PJ9wq;9Q1u5DHST9WjLGL=d^kozDilIBsJS5BFhKdM&<55~{sd+3z{);!i6cbU?1zE= z@W1E}-w^M9MdN+RyD#{w+kDRJkKlcH7pC$q)Oa5aViFsUYFpgagA%AyHuWjRP8~Md zX!50$zk>2dD1SX=Z$@F*Nxk+{uVd&EXVE1tqZQo5&a2q@oUZ%@%|OR6n&QtKUxQab zXQ5I)MYLQB}=VPY=8x54-j_%P*`M2{h)>HNthRq$6dysNZ z(hGN^cRYYr@GSp*h$is^dcl-Rr5>OD9XbW6Ei57(w5fLk>4RDALS#ou@4v* z@1Vu}ndfp|Lx1=aIVmrqO+AnP@Em?TBcqf*jtaIF`E(C@#1Tfr8K#lTXc3R&$y+E9 zUr%NX-H(N<|A)!#ZS_&&ibeh0!vV#dH_P+e|)MX2@x3%xMiD z;Ll&gkhZj8}^IcOJ|lM7`5meT^CNsZohe`jQ=S!5tU1%Jo+z z+x(o|VSZh9nm;ewE&eWBEq|5GmO7dE9h-Rliw)DvT&C28OojGL|4jMUW^wt!m-|f3 z<-GYEIc?!AcTV$@qn43!*eXd5SY^vT>mu1>T`9Y58f2GEyX>^-mF?44%C_n2WXp_g zGBM+TY?^UeHq5*#>t{YIW3xVyHM4(_(K+n2bDP<`hVQZe;Ue@0H!eNy*mfq?aNyE9 zk4x(eBRMwRMh?!jm%Xz+D(&bnZMQ*>X@gI> zl5VdRN|)DKrNe8x(&lwoY4N(CG@OEbOe-3dqBOk z2DB^90n3!8fKjC(V6##ea6nTNcve##ctcYa_=ct;@LSMn$^&&udB83Fb;{e=dJ@}L zV$gu1S>HxdcixYM5O)T$*7hg&Aiz@kg62t2u#0qs_)14en6!n)No#1DG>7F%Q&@>K zgw-f@VJ%8+*ixlBY*?ub+oUNE+p8%HJFO`RyRIz`dqrCm_Kl_x-DQ$col>Y0!d^zy z`6l)+#k0nbBhFyiu?}BW;me9J;tkrRC)7eZ!e>icgrl@XdP!4Murx$POMP^*)JA7X zO>~h|MOP{n(Tz%ZbeB>ZJ*X*;9@i8`@7Cl;pVa0>->=P$eo31X{gpO5TBpg5nj$As zr>MMyore=R7m~Qgm`EG&dNuy6z?VM!=!#;k8*L^{F*Brr{c7ss7E2A!Nma#1N@aY4 zR3v0bSwg;)B$O+~3H3^0LWh!{FrdjzSf|N}-=)opzf+r$a7CM*@VquH;UC(xc%5dF zIGrXvc8ce)@jx<5-chw*0_zI5V83vJU7Z!EP5Hd2+iKq``4r99bJN|VE+ zBqdIYQc|TbC0FuON|d~m8YL&CP032>*JPxu(WIs9(59vw*CwZ2)+VJqr%g=xLYtVZ z)2JjRLE;ooVdMUE#schsu{7EMf4cFfg?6dOpQ;oCDNnVO(llEsPIr>RbT28$2$sBz zXvxh;mYmEi$;vE}%*-k!J+nng&Fs}AXN+nRGq!0HGLCBFGVayJW;~;f$@sfACS9jd ziA{slDIUkhogHrx5(6K9--bB^EfAS(E@XL-W__fRs9G1_*2aLeEuyb&rCA&r%QT)oun1INlKxw zBo~HCQcgz_medmVe<-z*#8O*{FLRQ(GEa#u zXAg+-NQo*>l*o!KiKr-&u!?o-(j>4hU;OJT#ILSdeCqlYuex=LXWbrUN!?jxaowYed)?m@ z_u5|+_iCN8xat;_I^|yM94Y6Vgdyy8K?9WYKBtnnuNwQc=noCVK20VP&}=RK&9>s# z;wZi?9^%syAYQEz;@O%g9<5ojq_spAw>5}c+fs3DTO-bGyTp;r6c)8TEDo*j%c7Q_ zl|@ZDWl`f4jt%@u05T=C0IVSfTxTp+>n&lH zSdQBPzePIk17Tu5o+xJHd15+VBPQcLVmv-3hU5FhVEskWU;nHatouR?$GC&ImR0|7|yPB$+2TmW}5!SCXP-^D=NMTK_Z{LbxgiUxlaKH>!UC&@3mf1~b;JdC|=>RZdP zfI6qFqz^{Pff-{TRO~uz;W>mI#GrQ&gYQKHI7kl0VIwYb7GT4mW-)fefg2BtA%I0t z?3DO@i~)9xN*){M^TTkJ@BM|p`2q0Z&=l*b=KwZaplpP5aV;H03dz*#MraIwRW^Z0RL4nik>T*Qw{L689Xz`#;}kn189xOg6(g|DXa zQPi9iSWR7fpaxg-Cg=m2Ed;xs)OFD@>|@jV3>v_NTl&j=*uNjHsc6VYp*g1p7U_m3Y*5NP1W8r@JDzFUV&HP z4R{mYf_LCOLxLpQ;SkygjrcVMe2pJp^EAULt_ycF z4zKVm8g|{WJMVGyFY0Lc9Ob?Us!i42+weZnl!US5punf_Is60YW;SRb|AcY!lNCD1 zJc7t25CsK5pXfFL71m*Faz79?3xWKHN!>x!9@N)rK8O?eLKLLI|66~U+7omg|26?T zU>_VoCpd*RaS?6e0knx{&?eOVIKD%h&=Jc{q2^(z+Q$blsXt6=AFBSKqQ?EI9}B?) zLLm`ypbXS!QQ9cKm-1Io{wQUyr|ivW1Uu0s_M;OVBf>t5E^(Qrd5k7{gZTYRw29v) z?W(pvf_LC`_!CU&9wr>s+y_;MP;)_S!4(1^8hdHjDZoZ0+6Q}K5chvUy!||KwppUu?GCW7A&p5+%D>}h$hVBu{ zKg}s~nc@66dcoVY$Tw&bx=9}4sG{a%+<+bngccFuvLJ?Biee;Nfm+u#5iKNiEegX!|X;aa8#ir}EcK`G4hfe$f)`gDK_)Q_VFo!LBtn z7f>5F?D=CW5<5xMFpH5_DEpOi*{jsa9!-nfq3M=g+5y?29hL2Rn`EorZrQAVTsG-n zl#K?D$hgs)vflV>8Do!@wWj~g57_;97Sj;Z$<4W}^JjATu_pe-ms5u3azcNm95!^2 z{l=cM$0S(pFpZI&W~s8>EJwDQ7t3az5t=Y>kd5Z;vcaNH)>{nAI*V~xYq3jKPdh53 zmUqi4%SU9`@+}#(`c4L{(Wn0pUt#|(d!~Lz;@(BrUdW{ue-85bek(oMXJIM3ENx|* zm5Xe)_LYq`;WBO$FYBhK%i8IAvU*0TjLxW*RWqAqcxIOj%^Z*wv)0PMtZmXi`;hd_ zJ}13%ZphL(uSwV3Z=}RW-c-3-MPHGa*q#xw)6QGeAzt9Og7A( zDPwaTWHtM=teO`jE9Xbc;QVA+u^>y9FDQ_HyK?EXtCL>4HtAW|C*2E2q|<&v+U@sA zoBe5Nakwtc4lhZg!yzk8A$cr}cA>ZB``<~dwpK*K{!2(U19;Z0zc21K{=UnMuKs2GT+uK}s#CS1AKPAT;| zu9SG)rxbfXqZE37rWAPprWAOwBb18g6fa}%;ZVjw2+udO?AR29{Xo{ZzQmoh%Tn5< z)05l-ubI-qUNMb6i>2PzU+R1#q{c5^s{PWW%0E{s{7a?Wzg9~9+oU95nG^-Ak%EBj zN`AmmB{$$6B`4r1B|G2~B`e@xpi{EYUncR>DSyP?^$7YQoMk7h3nSLSr#^h?qHWq~ zmu7zxX$Y{9+Q0=;9poyNK|WFu94ckOF;W_mEF~e?QWR1og`riFAKEN=p}mq5I-+ES zZc#Er4=d@R7nQWoCzRCCkCc?qUzHTJmq~(kN-8{!z01*z#V9l|SRF}zEk1SQODpZt z2(=-GQpN9kmxtL(X_%9ggnLR+M359lL`r^SqU1$pNN!|-*(4#HTj=X`o%I zp*)h@gUD%86g684q8%hJX0hbP_)B(dxMan~NoH)CWW?o4T3o55#??!5T&E<(4Jiq6 z8ac>(1btL_)*5c z6W&XHoVn!0&ycJHJIPFNmGnd(NlOfo)T9_mNlKRFq-;q{Dwc$#8i`A8m)PV1B|3Rr ziA>(BgeRX>!jd0RLX+Q9LX&<{LKAgLXo5}&Q;BB}DeRs~W-KIe9zs8~0cj0VM*9@< z@439sOg5496l+OMnJ3AqPLh=BDT!%;l8_cDacPMXo1P&t>4g%VUL}#~trDKzFJbBH zl#uk@N>KV~B{2OVB_RDBB_Qob&?y0_Iwde=it8!pKxvGHRJ1ryb8woWiq8x1Et}Wr z{Ci5ep(JKlN_^%ViOq75m@Iw+Bim1+vcn`YJ5IuLQY9=WPeOCbB{-)^f^vE#AZLyE z?bM8{Sa~@Q@a{i)tW&fafW$6^}%v)sWlq(s`3$O_Wp%dz%400eXi;X|2C4q%W5>S{eeuX9CTU0OJMLptGG%6lNJ7jUu zo#I|}UEGS^5Vykb#jSuRr}K4+dmiNKluJ3p{;&@EvDFIIplU#wx#Yd)5qA}E-?JF~ zp#=Q_EylleuK2Nsv2WR8@hS5YuktYQERPe9@^o2JUMPzzs>Q9MLtHCXigU$QajZBl ziz=>)L&YoNQ2wnrlyTRt6iRgBSj?{_QvS|-<|XKXMkt4TK2I$q=D>D%8L?jl@qe|x zc+^B;S0>!y5Qk?3NWKms?IMkPmeSM4A)vu5R^_yf~{SmRP zzbtd=UzE9Z--vAuE7ocg8 ztlFOwtG2Jix9v#Q#mi->um1U?0wIMP~Jy z%8WiMncg>BZ2IiQdYPM8E%SviVZ$hywk%gHmQ{)QvQ9BuwpvV=?G}?|=ftS*2{G>b zL`-`BC8j-OQmdfJ?5IV9!B#iaLn&nOSwaJS&@AHDj{Pp|FXb7OesU025Cg8%6Y~)> zF&(iMlaV=MJmMfmBkp1_;xGCmQKC1JCYq6AQATingg+SB3U_ki-Cz=YAAaP5sH5?x z*vQugppAB}fqY)4w4gsw=YSsK|7F;xnE5No0T?9)TuThTo*aygE=fHB} z{~_XU?9LzK8JG>&-%Jd?4GmzYkW^-XsA&e4OtLe$=+5H=a|AB5`$HfJ7}Wb|%>Bb~ z2mf}S?>)o!zXa^>o?@tvxfz=^kVji54ssrja1LPC8N2hhp#NagVh=I+e)JcrBgf!4 zoWPG0_;C_H?xH=^n_#E$SC0#?2@5d`@=R>dM{HRP;KzeL z5DmF3)EQhiXp09a@WFfG75EnI1RtP_{;i_BaweDywlfd#HTT2hK;ZI>)961Jcm_wc zbp@`$4R{0|g`4mgJfWbA7@|{{qls9dhs+^A1V3K%L$8Qu!OF@0awpHv(6BF`1peqx zAHz?;&X)9V5nYvXfIgsoJ+W(lH{$dV=Fq@JDz7UWAw7Pw*PN0e{h; zQ5d6zOhcoXMbLmBp9B)Taq;=AiW|uTXcZXyoJRcoc{Gww;RmTY!hD2lv9z)OeT;zz zcn*kqT0G9!dy;aW1J$N#>rHqI-h=ny1NacA5&`&>7V;&>Z}ibYXp^63qJuaQNHRu$ z%Oa?3f;B)x>#hP7(tSvzWd*A4prXE>3Fs7F90lD@}wRi_#TI@l1};g zoQ`GGp@y=Xu+Tx-y_~j#C=II_1sl;Mc2f31rjXOb?N^vmo??W2$h)7=B>p=GO+m{f{#)9zmD7ffn-szZ-OwKDiJ5;U4sdyYb@!ew@RPGx%{DKkmYh zljtBP7^BBf9*)u$hZz?KDP=!3+Dq&1!KTXI-=WrDwf_mzjCz_s3pW`T*J+E(oFkXe zV$P$*oMC*PLMyzJ^W-@C!%_S=j2{Q_;{bl_V|?sI2iZ+N$Q_K)ov0n#C)JR%cyt{F z@daAwbJU`L!PGAe-9G+oz^ToY{~A;66CAHG#oVJ&r(|PnT2iOEj4&r`d1EJpI>cZj z1smDeC}b3tb4Ju+qXiYHoAL)36{}HWHj|mMk5;*hk#dD>s%K-E!Qok5PQ zH7JrbhLtjEST7?+?J{iCCo7Fs$)NE@88F@>%T4Z-WhR%U*Ys&wYWA^ooBsk_BYwxH zOl2=G;LxJ z(lawzx@Kib=d40$pH(4kv+Jd0c84_2SuTxp)=K@H?NT@QsMK%+pnC2fq|)|7shIbR zl+QzdK#RG}>rCelyK`A~V~GKqU9rt)!w$rK_%dLN{xD~nES)=By5=pCj`<$aHa|dG z7DPz%0-k@cOOpn>T&Y`FBDD)^q-J4@RN41Rh5d+>+i#IlheJ~0a6yV3Zc5>zze>TP zpCun1Cg0&dys(%{z9&l&*a_=ArnHZKd|B$u`i8dYus4%dhZ)ke$W9s@-K5UZM{1oy zq}n+~s+^Oh!Z}OIT?(blr9w(v8l~8^TMAuQO1|qR$#dH;Id119+wBp_a(h=Y-F}n| zbeKudU^3wuZ~DQPTT8GRMtz8N_^caW+VHQLwrO-VmO57}G@W_qIZjf(*i*_D2TIA3 z2r1?{r6QhFD)7jYe2+58^Qe;?&rZqq9F$DY4U+D)SJJ%BNQ&1DN%ndRevl+Im?Siq zBoB7!hCldo9tN}eV;5&tS z+vzmy0@B50IdlYJpQT3)%b*H;De=`vk)OHb`_GWv06WPEaFwh;Z^;Y{mh_+~NefDp z)SygB2`-eR;3`QBZjt!lWfB*>R$@YSNp#3bi43_e5h1Tic*wUB9?Y+`1>GVL-8_u3 z0NY>$dZ9H`)n2fVFU9zh&oL+1RI);BBqL;=q=h<3YN&@KhXqIy&q^hR$4f$Zn#70a zNo;tz#6&bnR79^tMy!_bh#e9dai@et+%Lfqf0Cex|COL{p41JyMW{~hk6=EEpWw(7>RU z*JY3&MZ5v2yiSbKm-tu-hA0%N3krJAaC?N@%5}a5pfr+&e zkk~1Hi7Uk?af^5-9u?2T%i@vvJbWb{2|TGA4{=jmh$Gy9wa^F6Pz6Ph4apFnK>VMm zNMw?kgeTibX!1M>NpY0mlqC|F;x7TIVd9?}Cw{5v;+t9^-f30hmDVO6X@jyPZ9?4B z4vTBrz2cJg9Q;FEQn^c%0?F(kpTxdaiQIyQ5$J{nD1&@RPr`mOabGHH?lj`>bYlrX zi}A~xExwrx#V5;EytBN;D=Sz$v!Z25cCswa&K39Ua&hJ15tr=c;*`BX9J3FKLpE37 z>}OZY4JZY>(dpRFB=*cE{%5}$544yi z&qGgf6xV_!;!@xz&IRG(gm~pxm?;iuG4_QGVprHF3kuiCyuy8ATX;d{6g~+&rHc+@ zn-6*D54k)Yp24*N)LfuusDOOPz-|J|@$ekto;>Uq5Pui5_AfP-MP-&~DYL}B+)fsj zyNF%6r_3)8l6e);Vq1|ab1MpDHhD3#D!OF`4>C-z*ex~{=ftYwari{6%YG9ZbQqQC zB|O`TjZx@^dMJS$Ani(Gu+1qiz9rZ%BmS)<{;JWUrI^X=dMlY#KSyTP+sll4H<{kx zBQ_18V%-ogRt?!=*-#k& zDky|Zh%cfIuk}fec>9Qs)rr>`k3r+OglwGbnxJAS@>a9%4VpRpbGT z8X<0*gB6R6S)AMpz?qvK-oT_b#tAh>V~$bC*h<*Nzn|l~Px3dP!*2`%e1PTHYN@9m zsBbREBC-}D$@DShoi0}W(-{AxFV6TyOF6Nj{{6)N)OX=1`qMh} zmyPHzTZn;oV0$<01DaM2;KxDQ;}Gp}1V4_>WHE*xCtM+b8xrwQ04=;9g##>BFY}!@ z;Rn8t573Fts->I@%NU0%n1@y|2FHm1sVld|#PSaI;lQ497@No7B%Fpba29A@xnRJJ zAX9V^OBN$DNQ%afD{c@1sc0h%uB)`g)vdrEUU>@La3RAEu*);Kwsd2v{P}53->V zM&Kau$Irb3-vc{ruIIW)SH;lIezdU*c5UyX|IcGzwRsg(TMxryKotlTrh=hAa(o_M zgqPtJcuk)i6EpHcthsTEA0K$4hs2^&Fg`zGkbS%vXp4`Ygim<=gA`KdWLyhnxGbT* zcG$JKpZ-_trrPAIlfl;u990`{z+d1kco*J>58y+f$Vn}PW|pr_NaAOV{A5QU>&reN z$=uYgLKC6k|Fst`!PD@eOvU=@GZ<#KUf+HOkZR2|}fK+S=84yLpZ1CDxN1Zob( z?R*p;UPnL@CFGz*lwhL@w^(Ni?l z-%ucan#}!B^FLk#HUHvqQ1c+B@-A-c52_BLw#jtrwvc))rcQy_ios4AHu5RIjPh$K zwVC{gPRi~>_ZXymRufrop#1H0**-M^y6yiMN0>-5Q=h__$Fk3Ztav-Hc;#Qjea_dkIjkKxDE zei%DBT@EpnPjd=g<-cdJ^)b4^FXR$T=^knh#yL0*x9$IiFW>We%Z7s8Rn`M#h0jPj=z)4o>3j^yW4!Zv7qVd)59|pq^^af;tUcr7bRU zo}4A_zY8tq1bH#X#0MLplp0I~#DH-n~OheFswt=k4QX z=19txOoeb3jxg2kW$NE$z~v2%ZZmr2M#krQw8FLY$!gA%5yr-_#A7Fo5s-tOB3aI} zTm5LLee_AMvP^oEQR&ibkxtD4Y1f{UHth{*(R)Lh4Ze|PLuQ%Z@g7tA6I^yL!VxZe zySU^`FwL&VpD{BHI*YlCYNyLe{ROhZ&{dWjdCM}RVCglEmZio#gJF^`T_$X zrZv)P+9EAxOQp$dl{A`7NIkzFRBL`#YRqp)wfUP;Y4NR8Sm^$X*XB)WFejPXchBcG z3-4FYCHBRq0UKcN-sR@gq}O7$bn`n~ozwX32TMO`wG5LM%UEf$N|8pZEUC9DlsfAQ zsj+U9YU?hkv>uXjn+;NCvsX%NPD|1B2c>ZO>ryb|Tgjh+_Hdi&%#{vY4&m@3mLss5 z*DLIaJMpQ@mNgc>bl9LjOrI`IGZsk0Oc$x+H?3=C1xfWRo`0Cl^AEGrq>&N zpQEaMv^%oa$CpNYskb$fT3air;&-(x<}Z@61&gJ00ec_Vg-VfKj1(?Rmi&cTlDDu( za_p-l+rC9I?fWFdevPC#?2r_P6O!z3MUos|ki4$#Z1y1E(O#c8ZiN=LE@gPM37& zd`WXDmlU*^WS1UEbQzHZ*KHE#dQ4(m?~`cPKT4GAKY*-Ah<2Icng`1%F7cba(4e3P znt5Nt=N0%la$4=lDs5E5|`vi!je*n zUs5lz9$gacF)UFYTO`8ch=hCGBcUG8z~?|#q{@;h?(=3z=)-m)@i85L zQhW>~$#j}J||Am zkBMXSN8%Xut2m**I7duzl(IL4GZ#gmLqQc3LKdVzEQCiAcSRBV#Tbcy%rrEenc@?> zK)m9d&{90aBhFuz#D~k`_;_)T&k)!6VsS~R6Q_h8S(LCw91?cR!h|zom++`8i2s|| z#s4C9ab)er>cl>V9r7uE9p(2$lZON4kO%3I2$3#y8Q{2QQ#Ydb{Lc}p8Mi!;=3Wx>M$+o|4(A56i67_hoj< zKV=RY%-p0YcE>Unpc^}NPzOqvn#ydMRk%=Q7P`oc zLT{N~7$P<#GFca8h*eRESQa(Qw4xPaQM6gii;jtD(fwjt_!@jCW(BNV^I6rS#cZH{ z{n%-OG9bN2XikZPFl_r_o2f^b0?~Jfe@eIqU2Z5=mF8kuX#;b`qS67}#k|r_%n%h# zE0e{fGGB};YsIj#SM&*_w3P=%Q+cmwDxQZgMO)5FzKlI6U^O`R^9Yj_5w9{bpx-j4p#MGU?a4WM5*nf1Yx2_8QNZ5UK@m}C}m(cxk@ z%mh0e4Yb8@CGbbX8~FF*@F3s)fWP@Ce~S+T)U%=r60VzXsMSOz;Mr*!Z!28W$Y=*oYsS>{*B{h9EA44D<;qHqip3u>V#* z%A0)u$A9OeG-HpUBRLRHTO+v(w;#k2yK@JyKaBpf8vSJ*b~nNn*bX~yaR=?O+k{0Z z?Xi!3IY7T0#E(N# zkZFc7v=Q3kuMfiqy#G!zCm4s+*_U>9;-6-pVf^8y{#D9i2v6Gl14q@)i-7x+H>F?a z_$K@X-huakBLA(0e9k!e3O~M^!_92Q=nwSCkHu&rOVLEO1LNW+3jRrb7DmmxP<03O z`43eGv4*+e489Nv$)M(fltL}EKo=}Smlz^mTuns1fhcYpTESlQk7MWs=g7^tMjZJp zSqC2zY5qbiubV{GAXNQB)dExvVoLkC9sld|UezCFfITdMV2DGHNXJG#Hp+?C>L|Yj zji8HIc^Tyop?$1Do7jk6u!E*JK$D!5_Zc|v$W5B+ZJO*mw1cVK6Ez3pF;F!KH4lQb z^^jlA!M;N42ex>6T9fT z!_@N(k?u9@J&z}!6LC&Gd-5>HE1+r+=i&E0Fmq8qed=T(ACY(PK5g+1TFhI-n6Hv| z@FIQk9P#$k`0)q)c$_%*Ch_(o#M=)M$v#N*c^yCQCn~-|WPTs2!#(Iy7xCh5N>g`@ zRdtWEa0-sYQMl~`vljK7dIU?&!64HS9l{nnP7F&gYy?w&47xxvWoJ=#0cGFL!)T<& z?Pwlq9>x&5#2DporThc5#%bdB>%4jyX;RzvgO`; z!`;T%1`HUCZH#dPY)mz_0aHv1CG?g&1VSJQgqlF;#lU;b3X&)Jy&vB1&x?N-XLa`M z-1p4fGiT<%b7nb?pk9BA&G$exZCb3q+fiN7G0Nf)WpMy4WhA+d#*@o)p%Si;F19HPouwU3V3B|2Kcf-phgH z9c~L4=@Jic?pn&?D%!~9=u(%W#cZdoZbN_AN^ERKFWiV9>+xe9eypWD)=)32iI0`E z7t{wzV=X(`Z0FUrw4R&r>>i@#1xo54JZ1LtfAF@BZjWyM4!8lX=I_fi+|uyrV)~3t z=u#WdV%CZiJ1O{|gVs@kjVfxt0UIqSFP+%v!^R|Runz>UX5r0DJf2R;O`|@BiJ~d= z6+;NQL-Z&^`0*<*(Y4=WCjS=f!**V8jNmqJ;PxIKAgh9OK!mUcB4mmTh0Kw`&}A|ybdyZfTpDBV=1@ys_Hf~|uUiQpE$*UM9^f1RZU=Lq^%2kVe}F(irs< zph2L;D9$Dl2QG#*_LwVN0H1%`P&iF4!erw?Cx@TEIWCtYzCX^)SQG4XC`O-PlNgj{J( zESAxURWi!iD2>iBQt#}QI_IF&xaLT;Yo%1WE|GEDqD!tuO;hiXD-kDPBT`tAGty1W_RtkLda=!DD>-#f&CwX4(%8;MT-bL8EB8_2h z25|t>GAVnGz1;FUIp2maE%-9p7c31vgVd#1(RJdaI@Kdpsp(RgnkN-$rBa?&Bc*9g zQj*>&Md<@lm_A+d)0aqY`ew<=xJI%vj!Q;dl8l^-mAf<3XWyD;ZmGyLf45x*GZ84{A9_?&yd{we90*&m+S(be<x236IbCM#8tp9;pi`(d_Wi4hn+3?BWr=C7ckdh2+CF;e*LA9 zziXh3_lr3e6oyJ3zo(tU?>J=_$4XYQTQW;hCA}n9(n|Op&e9r5DQy;SX}2Vo4vMFA zp18{BVas+&LfKJ?E4x=>OMfSErC&)r`imlgwK-Rlw%QM8K`~k!jDvP)f;y;x!V<=t zrF`#L7Ko;!lQguLRQ9Y%sZ2ym@k(-KmLydcil?el+&nAgtm>4+s)-U`JzHX{S4&Lw zWfEO|SfZ-WimmDuiK_ZS>}W926(j7z#u{wQ!N%mW5e=x7VH#{T3H%xm$0BTJV>_*mad!jrU$mI`CcVTq zqdzp;(Nq$|-t3jA<}9(b6p6K^Mj{ap%q@LlYMCYxEz88%a)}sP4v4+op)blsXuA~s#S=jcqV80Ff9efAg#T>9F2yI0xx_*;r`)v}|A19jr zBnj=$kPsGi1@+fRKtCVoAAs4wM5_NPg6%jwLT2BC?`VKO`&ogF88E(y{*dEnsDTpx z&cSx-80P<-*zYF)y?h7V&m3$*poC8jm*7Doohmo{L2kH1bm~Jspb-yI!9yf6)C0q? z0xspcqwoOyh2X=7N%#N@DcdQy+zaYium(^XP+mKAK%PA$6E_a~6R^*e(J+JtK(c`| zG<58Gh@fNPMnBs@U~{vZL&rOZ#x{q_oI?iYOoD}QF&yN&d$|5B_=eBo!whCj*qKN^ z$3hEbUCoi7jg!ni=KtfF|B^%$c1_cm1J6VQm?Jb~SQvz^5=Jl?=v+*Qi&n(uSOVO` z%`XFJY%4|q39Xm~>tQd~oZ{M7xjB6W_^^as1h}N1GVUb54IE1;>umDu9mGEQwPDvZ z5B+5k8o*L)uYff`70X)uSdSk(A0(UbV>5ni#gB`VxDjVVB~TU@li0<};R@crg--Fe zT>DQxgAW_|79D$igibTZO3FHK3Nbi?XQ1X{AG=0A8ort}7-WONCF#I+*a?@yrn0B(9E0O<5^jat z;ZC56{Syx&$#<<0(q_cP+Ckpq(DAYF*6#?+_VEdVjOD}CXxOyn#62$ zf`!c2my`Zl=FM9uf=eliJ!lh$&?at0n|OfYdW~ZHoH^~t^DpWdl>0$x5dZxQ$cQhY zcpO1~?BvIdy-c*6V)T_7(r+gHPG+v-SkE$vl!uY0W|95^RF36@!dj|pD>LlNsk(jg z2wKcT%<&!|SkB?;eUzHgUDW!I+d-|rI0pYN1FD8H`B(zTj~<;Rl6=HbZeHxnPb4(&jfwm|s4C4swoJ;69l(Kk`vN$JBY@}f$pK>cF?RrYMnY24doBcSM z_aBoXX0>V^#(cVqWmsNE`r9dqYiQa>`M-KP=C{n_KZbwrgK-(Jcftj1egd=-Y>&u+ ze9M3xJ!NIVMl9(k$zk+|8>EnEt03)q(rza04$|%+#{(#GgVewbqG3K=+6p|_h*qGU z1Gx^Z;3Qq_dCt58BWo}=U|X>s=(YVaqiTAmf!+gqYDu1k$TDKXhK&TeNiR0C@Vf}T zgMBdZV=I1aMkm{Z9~)S=w4U-eB&PI8uTR|Wx@Y|cr`#157(gyUgb!b9siH+67=qmhJfgj88V<~dh?D zBBeTk`WR1-FrHlZ)5`m?tr!nVgE`4&UB@5OX+3q#Q#(52?mZ_P$4oJpNb zr%r}xuTzN8A^NXDG{Z^Kh@DpKum*xu`|)BNUiMO2-IV57qNj`K>ZE;i1iUHjfoRl0 zJbMug9Pcx6%cEQ0MVGn(mccy!=Bp~1jz7b?AQ=wT%amZVOb&9$#9;J?kQ5monl1gI zh0>=fmtIYs^k|x-JFG*x!uq5$d{ElMXUmxI71F9DYP8o#llEpAt-W7HXvjEl+kvZG}_~&Av#IwqSK@{I#+7g zJGa_VEtQT@QW4W3WoR*_F~d?EyHE;a*Gqovm68{GRB~7olO6l2WXFBMk+lcEa1dMD z-3&oJ40)3nazh_<@^>reoA7NEWz!fJDD`oAsYQ#aj(12^yh|z)d{Ul}C1r_)QkqyH z#fc44l-MQ(&OXU^4oR+azGOSsN~UXlkEra{mFe4Q^1 zDU7jrkEylr_=^;KL#5DbMAxyR>%>ZKid%A0QYAYjM>123B_p+3(o;uEYHF9Hq)ilW z+H6TmTP2>fo#IM6D2ZuzOG4Unzz*F!ALRrWYjdu|&RT5FOD7gHhy!SYQJ}P!3Mk?I zLdZ)GlALrsx{g^gGosOS5+yCuE2){8l9E{P z*dd9<>;X~iLsQ9;*pecNDXA7mNwe7b!rWFmRjj3p#ZtOe%%%IpRC>D@OP+#{#e@c9 zLW40EG506^McAA|`n{mm;50xblt4aY7Gl4cxnC*YJ)*_Lqs7Eln8Z6T zRi>k{0ib4lChOI6|NXzaJJ4`*AT z=sLpDR}2!=Wu^e_K&RS8BUVA(MW(uNc5D|6@p=hd0tbP)P3P-?4?Lfwm`DC5)$$gM zh8q4Zg*;xT)e{4wnEy6mzm@q5K!S-`Q8SfbBTrd=B_9 z#M~b{-9YuR*AQjEF&p=N9n8PSV!s#r*fmT-e;Go58OAg2MfF|j& zPkznV)lWlznT6fCun?$ThKpz_u#)mvMR}~jkF}J?I{er`Y;2?h-dG5f#Rdv+10UY7 zl|kZRt~t-OA9AzChc$ekguQmkxsGEom$Lj(+_)W0e$Cj`&1VgQvbh4*z&h9nTYw}P zsxbfx92tmSs%Id8A6MeX?nKCdav+0OO$8FTVlSLw;Pz*}y!!!|#Zu1dStyqJN%jR>i8uz)ovl&^U$`B(Ye0m`Pbw+{}&5x5a< z0;-lTum7!w+{Np=;cOUMh!LFvKORhmV!l8n$nM9`xlOF+I0}!zM{q$#QB9?|mrkA& zH_-kmTOD=-R2~n2vUxM$x_=UKn&Z8{a6iWf;bC|begjXzGw?j|`Z9jJ=74mlVINh3 z>{SBk)!k?pIQ+`1@VQj2=AN_>9dbK;0AfU5jwXs+kKQ!eHU%wM@r^r)-#O!4#{DT*Tb(q&{E0Y$U2BWvPS1s%Hl=ly3eA; z{FXW9lhnzh%yS>YkNffCUi>)A{PJ#Qd3Q1MyMvkFZRjy4@#6$DxqrEc%*g-Sxv51QIjiC?^a<~u@aWU->~;As3v~`Zh&jy3JE5C_P<1nxrF$< z82w=@?SyYkDd}ROuaflZNuRwi(G6EJ!(M?O%cz$n%+eR5N-X3`^>mDSQfCgOIU7Z5 z7Ev}EyZ;;2v{mz0-QHff9JT?y88*U5-$?pa(vKs34=JaUelF=3Q|gtZUr!5d!bUsk z_fQHGXtl%qn~wspijuhmec)Q6_XM0Ldfo;0p}>z{QQgkT0J`EJy5?YRZGqedIY)0s ziwhB$E0W*RlLkkw`Dh~CTb@DQbSH%jYs@F^?4Ms`CQ&G9zu z?x9=XVq!P|b77e7bu#BB@ZLappo|aJlBWo4*{~Cb6%V#jv6D>=6v~)@a%l^wm6pIJ zX%6g=roeGBDrkx{2F;iHpfyq#bg9$?T`$!^cS=>z(^47ykyM6UkjhZ>2e=WNyDZdw zB*PmkLoQI-M<3_9&5SegrBfe*wxX8~jakNM?9v*RD9vHX(iEO9qr-D$RCtLrXxV>J z+bDI~F;b(WWORd4shcYmy46yq-zg>f>!n!F^AGx`;6o`i{0L|eXfX$|cZrQ5ik+cZ zH2DXXZc8VBkKud^@AGx4G#fS2WH8DoL!>kqVx->ak~(9G)EIgG!B{9&5fxGyQ7`4D zRw*;}NQr5(6q)8qfoZkmnRZI9>3YdF-6ff(-%6(W6W}fY+2}BPxy5gGF#L<94B{B} zLKn30_vjerT=-LuFLjZDQftvkjoBns7Q0kf5~MsbS;`{Qr6e*>iX%&<$XX)>)+Wif zc1f;vqU6|SNtSJ;WUw(@8oynXV!K0pwx`5v`-mfoJfy-^*jb0o`3wQoSCoCw4o&=B zAJ6!P_o}$25?{*g8YxAKDY09n$Q~<&(Qe6)PLaImY{_*LNsfcxe?W`LM2ks}882xu zGbANunRsJ(!YpRLxMOY?SIiUQjQN1$PyEeY5iZBhYHZGS5eIIjB+vp4{H=743MXS6 z{3)Sa3gd#MAkH9pXfZkQ(UKjXC|U8zl9`Yp={%>DmQXIKiS^=393$SuagvlcP27n~ z#F@BN5)=1IeB!MVoA@}eJ9gra{LKgxh?P2Jp^p{#*S8%Qb3VA=D z>vEg{lI7B(>zE|n#r}`(I7xAP#OF>GuO~;6JtgAt)QTG|#+B47iAhr=K53D}CT$i+ z(jKuVos_7gN5z)J%E+V(V)rmYfQw0gG4`fY5|@#Lrjjl(nfVf(St0h!Mv2Pm6f64&Te9YfIcvR`vUZD+&Fc)A55e2;oftD1 zEx}sSo`YSs^_0$x5e=xCBaiz?0q3$I9lZSQ&SKn~&HOtjTw-#K;>e8@dv1(G<*^3@ z&RX+wB$5Rqmi&4#=eLU~e}Whpup9E%h(3RpX!DOsSl&7K1AHl3G#DMMB>fqg%sEMa z4C&WH1+R-B7cwCgsBTHjW9*;L*tal5tVMdU6r06d90l>{DoGMil7YrjCgnm zdbk^2BBP(d&wkW%X_H95vw(VlDo|^H@;FEJijUWBY{z3ex{NjGm3#+V9Vmv{FbPMC z32iV-P(zdiHqgkGnAt#~Ht>OlT4;xfFdsI-J_`Cyc%Goc2Xq+4EH3XarafSz9{d^* z=W+nak@ZE)|I4wD?Z_JJ*D(ia43MzV8VPFFQ+N?09eOLBZ7U79l>locW37Cktr^Dg zdKRpMYq<6{c!tk?3itq1$y*OLnxO{NIv^xI$>5k=K@3zg|0GM14cH&e9Gqms+L(uS zgiv@|FwnW1=-lYgdt!kOMTwkhlc63y)H4=_VI^Ea<2%8%Pr~2%96rEAY;-^)R8>(A zxUV!IA9;4xV}BI(u^WM1T?ZNfNitYKKMAA&HFRt`ZtzALmYGV8;-(i*L-zs!#q&Y_ zAAOoUiQ{74-vP(ykRRpxkNF%vKp(EQVym7qt{}f^9Z+f`8dww0fV5(tDm7wP+shnq zJhmsmWEg^}_%R$xCrf$Ez>it@!M?RJHxW{S3Y|-0bNe_y2R3t4ILLJmAjG}zUxy;o z8BbxW32Mo2F=d_6LJW+d4Rm22yGHDWk&S>U*qjD4f#euS(z(GRSVFxl4X4wmURL7A znrQS57i968vRFe#R!@c13>2^Bn$ukS4gYD}baJ@Q&yq8#r!)EtJVN>Snv1F9q>q7k=zY0vgxl6!v8#dMSzQyo`bC zNv?a5>%U=L3E$?E-&*Vzkmu9^+Q1;~Z#wp|8=~^4Y+eM)))u$~cEaVb3$B8zVGrzs z>nM|h_;DCNj>SL^x|KhAOEKbOsGg6>{2d6rC_ z*$Zd6?k`NLE=VuIJO)p|uXdi}^$YL{{0`oLKf#~jJ@|<7_}I=d9Vm;BXjC7q;oN>86CbN5 zLw?mA0(h+f1E@6^v5*YePzLqT3f(ZkEPjaD(hT;>n2Wlxgn9ES)@N))GuXlGVKN${KefW{`_#VE6&(MTEVSe`kbKUou&%DQ+>QBu3f6w>ZFEc}Wp4rng%ygb+ru77~ zug95Ll*%4z>KbgTbr_d|(jvCOH`x3HR2ZpHR^fdH?&Q1nThU@} zVXk{ZoP=izC7VOqC8!!zq}@Q;Eu{VHei#F!Jq68R7A3G48y687+iBW+(G5<}wVbDP z-le8S_P|)n@gi6aA7bx+;88dQHv>I7pA5lg1JPwcPl+LIH_bJTv~x+jgtV(jyPmY0 zNV}abtOw0wfbMe&n#62sWGUU-1~iUcygGzerzxSA=x)D;k*B-nbDRgNDLo9g!406g zyvyKX*i2k(V6MBKx$ZjVx@%dJvIhNOC4R7q9etpZ7Ewp~?1f3JE+AIt;m2H5f!Vy8 zi6=8C?P=s_Dm6BRS{*{sn}Xe68RGR5*iv1<0Z`rH7Erf>m9T`eScLwtfcTt8oy< z_x{2?*g6Et{sveMbl2EXUsD8-emFiGNuS@oq@7GAHYO3H6YygI9i$&W#^FaF_0o&V z(2e>qma|=asspdvDY-V1>mCE?8^oLk%@Y|EbXeVv9oxWoX@zF+nw9;N$c*Tws=p2)1#j}W9)<96R zwM1|YQC>sISF^gT8rwC$a2T6A=}uQ07!tq~7=T{RjSXk0hCdyFVc0T|x&=FS>?Du} z4|YmOhr_O=+Q zdxnJ5xy`8|SvRkFl$|vljBj{v7@qyY4W+gUf zMo|Xb-nyb0V{@#BY8zur-YespQgeWmMubU;(ICY}ixjd~OMx*_@{Gxn8<8P75qW4T zW#}w*l7SYJX6ls`_RaO07D|$7qj*eLi_3Ie5={?Dg6SPeFn_~q)*rw&Y%Imzu!Es; z%*fi4MvgU5&bbnND#X76uFJOsOCDNGuEiwT7Mo;R;v^%|E$NY|k`|dGDUrqEvsQ~2 zEhdTIF>+f6#c7)_iMI6;Z@WrjZ8wR-_JBm&*qPe)tvIYatqPm5zYu$B9Zq*VF#vT? z0mYmvh-Hk!``P%Dg)bTQFiA&?Nwr5xirpc;Xs0AcdnGA4Q#?GU3m`Xv zaUSod<4-F7_+mpOIaV)8ab|JHvHwCG`!BF(OkzCGKg9F=Ljun~Bs7R4pNB!Ls+IUk#>5r@|x zcCT5Yymqnq63|nUCDNB67GEJ6OSPC%TEv*rCx(=1qEA^S+LTKqJmr9Be5WMD_nd_K z_#IB~PZH*3zd`H|k#4t__UNM?pqSUWz)}~qAI7`jhIrm{q%ihIi?OEb#FD}9fM!_H zQeuHUAToFcBQp<;rCfBGqePoIR>CreBs3G{J9CQ!X6_@P?f^y!8LU`MzaW7;BQ>A2 zCy_SWn@VFU^#FyC&EM&O)P&~5+?Us}Y3L6b%zv`bA9BLPm}?M2u0`~@cJ!14bQLf9 zN|tEyiY0_eV-OE71msPmQs==2*hA*HL+3ulgz6)>0L=Zd*H7AQ40~%yUui&jyhdJ< zRL&=Zlh<*aw`XBLhxvD2pco4@qAk{orr0DQ#a0R8Ii*0JQ<7pnP?8T-&;tE13)b@9 zZn%YDdjdX!AIUK5C`r4Aw40cQRDsffav(jEG5|NkbDkw>tmVOeA#-XfaaN1Jht7T*0-+;c*K5L%@e}zNsW%ouuE8$Dg1y zpe)X%l1viE1kO1MuwR7zQs#i=%)u*}ht-5oa9TPOZYcF85}*@Po!cnNViW~83TH>v zLI(`OQr_PQH*)PGeC7lA$*;rolYT3g*Fq`e@wd`|yd0hU9aDmRvSh-pp@!!m>R5x( zD0C!26rP4oO-BNhMVpx$jg7#fBT+(Y2bt)o0P3S-fcNIZ#dH9Nx%NRm^FH813o{n- zHX5p+7<;+JlmWQ-JC-DD)!4_b9=n<*^cRw3z(Bgt70eADKYH+^7eB_~$9U>wfCfLF zKpRitjcYR96#^kIwQWN0*^zYKGn4rI+g=wqK0nIFQ9 zN6SD2KbGUiN;>uxUZ6tx-5yzn)5{iegS?b$Xd8>3=lX9LI5PL=?HbCtkRy*@inEP2 zNPbP&4aaW4Aa<0^Suht!hN02Oz>{qG^^l7=u7iz142Z(eHVo(-kqpq{A)VKh#da!h z+YAQmn^-?`n8D*?XbYb)h{cCi$^g48^%GAwZII^7S1=MjjWlP2vbhXa!CD{*rcfig z#CDEk%dZn$$&q6AYZUv5k%Rbg#42*w1-VQdsMv#4p4yva|Fv+IYu`gV`AMoN>%3m- zfIPmKb@N_;@)M{UeL@8dUo8RPM9unONSyb1mo5GCq`S%G-nO zsMWN8@*JS@tIG5M90g_b4mbn%!9(x}P@Vn>($gHDg%{vu_&vM{Z^OIr0p;tnt z8RUHQkEP7rFG8Q#iqfF=CEbtyahy^7Io$ajZheD$KjZ$-C=w%kaNfjGJ&Wf9LkxOL5;K7;Y?Po&)G$YHVxHVi`aP`u7$E&YRF>&{qd!+(qrtp{AI~$V zd4^f|Q_Nr=rw|{Zz#d|r`K$h+v=6oRVjt`Q<-_;zCHxaUhmV+Bz0dsaFU+g{%zWn0 z%&A^ye)$qJm1pteY0BdXI?KnHbv?|y?jdG?4=|rOhadM)9%pDkr|Vi9X%IW$J8XUm?}MsBA{84-3(%6j89gGJ^j)N%M%sC#T}Ik!);5Z? z+eo{MwEJip6G?v<-Cz!(w3M1yPcyxYW^@4kKs_1r6g8xNMQ23!*vN4mC>`SO*n0<5 zI6Vw1%vJNcnX)*_{O$;I-NVc=4>7kpKy2*8k3E#f)yx>LVup7GGrwKjJT4UAC=U8J?S>lg|%a~hZ*h!N?{mHVgc!|AzF7Z`@W9Sxq~k6 zIl9}g>EhMOrjh4AhB-ciJ=N?~H=~*)y$3c%x~El?#R}%!Y7YulD3f*~{wL#q1{TzF zAjPCzLE3es-9!uF_dqF+k>@~W$PiagrX(g&7V4LD2bk}UC!)t=SD{u~Db8WZFf4=_59J ziP3J>VT{EOzFjB%V$!c9-Fl+5g@0qw1}6ARW)!8@NDVg7!PnEl)}g%B(E{tRtyEV9 zJ*L0>R|TPe1an)50bch8Fg)SCP6;IaaMD-$gZUTzAr2dEZ1`xUnb^p~275r#kJa&4 zEmv3LZ51U}Nv)L=m1RV5DUn`6j)mSDT|7xrLpqlVjKI71LELmeGM6^?CT%)jYo z)sSrz&jvNnZ`GrZ)lnz4#6~UsMm24v3LS)R`>|7ioifs`CH-bfW-J=VB+kx3sab=U zmr-IzP#5n*HTe^K2RwrTYHRjYdWHww;wEx?84GH7*vQ|toUg{8s<1$GjxebRFks7s zB^$P4u;s)K&p*&+3rH;=%XxUk$`I0L|3zvtlUAUVyfjKXl~$KR>-W(U_~;FN*jH@B zjvD$5n;7PCYZ(KLP{ZF95sY(rk8LN>ID(}pjNhFM<=KZ2i{u5{B{w8aazfl_D=FwI zS&|uAAnBnMk`~$^shW22X~v6JGhLE2%f+L)MBJJK;tahDo|VLqze{51Pm+iZvk@C= zN$_L~We_={ztnOphhokZ@LnGNpvE|;R3JGYq2pKdjmFx z-V8Syp%O~?J0EhajP-dx9e>hHL6U|RlZqCTVl<1_7$wOO@sbqb5l=)aI!i9PN+~)^ zoy42kB-Yd~4$};=n^uU8P4ca#8^mHdEoRdT@DE=91oWGvKOI}+?8HGdQw|4XJ1F3F z4(Bo;6`y=urt(kF3|SrQXjguYTEc5917S^LCl zohBCRGBH~(5fl5w7_FzoU`3I)ek{hwpMdY&NqY+Rx?`vZhC9{3)KT*II}=jz$IJUk z;KmPj7=J^M-7%N{)Ti7qg?C24bW2zV(OT^&V zDtgC0(K>FI@aQMuefVCqcJ3LZJsDe_anwURaR`Nw4QZV7;*SShT$2E7)hjV*G0|u- zQE?`*#o5Ff7b}r*Zn4DkJDl;k=qu%7g5n>8 zze#Z1w-OS^9!jJ+0efQ-sE0)A0rDUdQaG0cF5XW7WFLveF1zAH%dW_TD;;a&KSOfvT;-5%_Y_Rt=ZMl_%-UiEPv@2YZPS3v5$_=EVFX75yQdXFxJTMU$)+vX_|U-jD5PHvMUaENG{VWlF^<2Z3bBt}rT>Igp#julyBcP)mf>;S%Jr&SOCpa0FaZ|dI>+T>3|A)`~z-M`$wV2`nwt!~O zAz~4`3H)s*tj*+Ck6lf}|FSuT*PYM}y)X{?1Le8_5;_v7h?2}*pXx0Fb1qN#)HmOzyqkJ9+ zb%seJ+Qf8@GXofig>VxlJ{IAJx(O_C0+qU$%q$|2gE+})0D zki=`sVk-%6CZSC$&^@l@x_i(w-e;W(K2Xm2jkE>oH@=NNXe{CT2G>RN1ag@cWA>Jbsw;kp>>f@Gw_&!_khFh|#re0pPa!f`CA@H8>VGRgIo~FQ_;De99QdnKW= zg(|#?nbi?W*(uwTBkE;pnk^MmTQX;odJHH{s zKlR&Iy2lDw3QC7~2YV`%lx=zqfA}5Yc#yK#kN&`uzz{{+2{aEcIm@J}6p(Hi%<7+FM&EF?zeIIIm?ce=E`ZTe{n?iJp{i%GkJbnA!+eh(BsCQ>g0SR2o$$I*eSU$5;&<>)3l#}b=kvD*n9 zzi>OY_5rQcZ(DVbGhrBpU=n39k@%cIolIb^I|2QHC)l}-cu6}0ts|GTi%7SEsHh{| zX0(DXX8#k=31(1BYR8a^@oqml!6_o>b+|wca?k#SL)cOISGSybK$pd*ng2pJbV(rT zhJk^!%@9TUaYUkv^u45?LHfC*Urd^nXao(^OdHlmIv9aQpnX*gp%-eXz06a7Dw1bv4`>d4`J)XrnyS2n&T1~`mk6+-i7ah@7zg$5PMx_hRYVp0N7SRa)8Gh zBn6Ur-ve%AFxrYnTzZ{2bw)|lStOp{a*kuamRP-8V)Uuv(C4D9luDGLUTo}}8)=v* z76ZH77}iUKf#)9#JpZ6S5AVQNz<2JXJrO(Yk;EZGjWWoCOi1BeGPv=_$u$XPes{tY zB5`Oju@U_KgpvIhBCKMM;Q5CLo_{d${DUb6ZKV{Qr9n)lPBEG$iGk;n^rp3iDJ)St3608(fkIe1N|XZC;He3(ZxoJmW#p=DKv3j zw3Tc$mJ$h!Yajz%Fa%2&G;W8(B!~~Oe?{$+#h?R`B;UEdVq9D=5Hsy#PePZ zenfF3MlxhU8F0tR=!4m?k?XGG z`m+=^KBWG@=XvhH&Geh%lGi5A8+l*nWe!d@gVTaZlbcHx zeyGkgmkuqTvdE__^7%l18c-kk^)MEulgW!Lofb-T&%U3}(EI>QTmo^MT4d7HG^>W003s0Vye8c-}p8+Of{H)2ZubYxHm=vWzon78%m)GCh-1Zg8y=^y_L@B6+ZVpH)dunnIuQm zWphj=oRhH0vNQi0Bs1rg{u7Q}mPe8gsDNsyg$5V}O+nmj@S}~`P&YpIt@Vqpon)kg z4|h->9lg9a2O)GD*WJkV&oD@!T{Q6xGT~c@9kzh>KMUr>u7kfV`Q#tF;n)qV#7+$; zo1>u_+Molvpc~W|KmDQHfbnAzehgA4lbt|WOv3Sr?F`U{d4E0E9Yi~MgwK4AM#Bt_ za`o@WgD*+gRnLL(IH(x08>aHw2*29wwAC?U9=NT;Uw!-evf|gBfrN* z6XR_+dA8$fL@V}H`Hq8$plr?nWp5!YgH>=5P^_{Mw!k(ZY5$!4Dvs9@BM0Fb#>f1+zz&X= zgR*lqTnqbwDwe}=Bb|12|dIh zRi#yVo&#Ls&*x^2%H}TMBmbd;+|1v%!0kYDm(y@BJOGaXS@eI2{WA6PJ7V-z>f|MY z?s*FO84C8vOVBS!;8AvWz;+b2^^|qMGCsEsc-#NQ?_Q2KfXdsga0>2$`{6-&1bzcg z!?W-ryasQ;JMcI782%Z`LmiQ5AwF~v0`4P>zP| z3^nRhBi{^AYe6cZky%n3?~g^7=$Eeu)Grvn|C0jz2Q$x4n2mnOyy`tF>(5lqTdewc zorNl|km{p+`gXVhl=g8YTn69c1Fh8m4915X6`$b8=lH=dj$tPjJIQD)S!Ai0tW}|Z zG?I2JX?LO*^f7Or$l8yo=o51&k|k8bS~QC7%%!iT$WBm252GJ^z^H!YH&~Tz_58;c zP#VN%*nAKE1gZ$%gg?UTqCxjCf{i>RP}Dxs&ZhW_nI~70b^~cQGh^u>r@e&41Z+$p z6lW7AOVKDcP=%MHQQW}4yU{0pN9bPg=X*1DF9M}OEQ7yb?^SpPo&r_Ps><(@0Aw&7 zX__g&XmFCF6w=M6go|hf6{K6|pULi}BiTdebu}H=$g?POu(h0~x`q5+L$f{3tH+q- zd`1b4Xdm-A&VgB=!st;r2P(u5!#+^W;3~A3E1BP2Nu69lZ0uq#b18mo$B%8q$5w72 zo27>GY^1AbCH=8P!+5%nspN4Ud0b7+>_81UNY`?P?(A*Oj;y~>8pLFn2Gp zh8>`qFY^RQSVo(OFvI=cBc5gW6J+Lc7i4m5*9)Y$#Bs1Lbfe+UH4KJJ;Y}>b<$02^q@bC#g8uh=wO!F&P=zB8E`9p@XHfK zX#?rDaZB&x)fBv1MAU5JR=<~8yOZ1g|G>zKFV)i3$}d&#D*x(Mz@61^yNlx(Xn`ge zO<9a)&OM4cVfQ*-M-c(+L5UxA_)&`=HTY45AC>&Sf-A}>fl^Aogjy=5R*Q(JLSnNJ z^{s%`UVv?d>UFkbLzTbEe?QQ5`|Z|qtOjmrq!|brlm$IRfCU?NY{Zd1zdwl|h182u zANVCqys1S6XeDy{=vCAXs!MryI~>N_2jC+ZSp}zhKUD_vU{V0Xk-(85kg9 zbd)_5LJp*JE+w3MJMSlH0>u-|o{NFu=o@XexVhgSiYw&1^4C)a}(6E?;mWwHH zrmH{nY~AGTd|VTVMkxmUEIZ;CeL zYtaVtojdlsu`$X-9GZzeP}8<#{&qn;#Nbaf>tUkMVQlQtVl{-LujnLF&;E~ko`2Bu z{DYp~pwK5ugx-hNk}U>9spt)jqBC?!xM4`b42vbyuuVb?2PIg4MuPOONTB|o5~$-l zckFj!ryjc{7Umg{0v<@Pw>Klg#T20xlTj~5qfv|z7BNKF(OTjm zNwlU6^p+y@mRbolwMz(EOc2i}1+ZtYuuqIcoMGVn8hioVGq67fdo>JmimbE)@PdoK zq0bS9w3R=i4eniJ6IzFBoYnCY|)6x z5-vtCfG$#t&SMabEkZ)0SOXCiEkRL<5*Wpp&7K40&;;~;cJ4q?8-W3^4IgZOAmMND zi@85GMq#(iPJ4`|9>C3OrT;h}3ST0vltE-DT9Za1z+h$lg^fKZqQWIKS}#E{MhS?a z!eaP<5-ik$iP%gifl(A_FUOg{U^R9x*PSMDMoZD(;T!i(K30$2Vh8OZhI#-euYc8l zY^(!{M1L})Lq&igDp+*(5D9mLN@z?NdJmmQ0)g$MQk_(u`mobQWu`$P)WKNJO@lQQ z@E$nDXYe8Zd;DV!BOg)gkqU5MJ%g153H%*{FLr#hqCr`}6dl0&q5#pwpnt>#vA!o* z0u$*-U34%W8hA3n?WHWdWXhXB=R$q>@_@UJw;hIHCD-l7$en!V4GR1RKF{uX4Erni zNNytaz|jry_~F2hDEuJI(4Uxpb56&#Y}+KkWHZ3S4JDaQ-iIHlbfy_}WU6DyqAXN_ zszo(fSx^ow1p7o-!gagI>}_ZigeNmD%w<^?Bz$Kgjb_N-vyHID|0j`uVq z8B4Wz|3nS=d{=jO+f>oin&ShIiQ&RH`QW}rT9^fA658KL!H!G zfU>CJgEhHOLxqp!ed4l`_E~Y1M(`A$`<4P=?oZk|kV@V>d?XQ@EIShmua*9z#cqhQ zOH=dT#L3o(2E(xmYM>4p0~ze#M+<(8!4I_oRvTr}md*{ijPq@D0#g_qtmV1`T>mhQ z;R`;?URoJsjmvx#n+HE)v8&bq8L_LyF1MeNfdt9;2Q)Pt>tPf$0bP)P(ACMYo4Qeh z0JTra1lq|20)BwZs2?Qn=L6#g7#uD`_qZBO;Vd2WM+}9zTNyO`kbKZ$C$5~ebl>FJFb4Z7zsfG|`tzu4@)5ti^&GdrcDM|#fZafH|I$H@@cJe= z33tK08rFiC(Lh{iAe6!L2Ov^c{JSv+i zZ_1vsbpQ^-F}N9Sf!pB}oP~369)1JQz$@^EmgiHblUGuCUWE)lgP|u#{IP>fG#~Qc zPRyLrsIpdhUI|o}Kd)DE+y_*hKX1y`X}AyW2W9Iqpj!R*Uf}pDyaDgP`|x-80=_gN zG9>Vz1(p8^75xFm-o?P%%K88NFCZv5Bs45st1}oQ%$7)Nls!5oHZDHV>2fC}ds9-= z(lfKNbMo>Fii%50%PT6YYHI5m8b^(8ZfPCU-qAU>yQgJj;iAP$mn~npYW146>(+1Fw0X(uEpXYaZ1-2D$cbpGK-AA9^aPd@eZZ=Zec`4?Y$`IXmx_xu0( z!y9kD^{2Pr`O9D5{oDH=eE9JvfB(m4|NQ*RufG1~yYGLv@YB!#U;q2h+;b25|MUNU ztNsH5RgG&@$m@xDlL~qpQSVS;pP=I2L*V;Vs9eB8~_o6-oY38i}R<3MPLn z5lU@iR98w*Ixuna(9~%&X3d$uaPhJgtJkjIv}N0lOLtwld(Yne2M!)Oa_lC$OVwN6 zeVU-~$Haey#7Io|gF;0GVL@CF7(@o4L2SJDH=^T%4~dVDKOsW?@#&|Z5hR~~@x_^#Je(&DD`~DwCh4#ts zfe&7f^E%GYJkQVjx&q31lOcEc=b+l~J?MALg&MP!&~$L;>lPP>`>ozV7n`x{KdOTy zy(OglmV(fVI#qacMHanBuV4)oNV%t!siMpOmiG>S*6@8D-q+!OUQ6%Khxg~h`}5)d ze!jfFF5X`k|DWqZ_1||GHF}3B*)a!d(K}3w&CZwY&JOoFy$GZ2{+wftcv(+nu(nxG zs;<$6wHh?kzJeUa(d_h?lM-pd`F{(%aZsZ7sJso|K`C;mH(LR1HanknIXT|x^CXP+ zVq%Beaq_N)pu)QH;A%s0NJ~LJxjS1)J1og!jwC9$r~fT~IpiaUGUU)`w&GRift`>0 z935{Rb`L&3=*vCa6QF2r4AhlZ5Gu8LVpEPbv{RZF-Y?9JILgh88Tq$>`oDE3Kn~?Q zW*YnH)|7&)z#0`mIgw9_V2iH+RkD0vJ;{Iq!*`d z0FzeO7NE?o4=hTn!j~tO5o%-gq$Xw|xs9r&cl}!+?>}!GK7-=*)1hwrFHgGm{&r`? zYWwBUgNIICbqs1h?HN;k*iWSH49Jo<;qsGf@%p$bLV0u)(}=qYyP)@=06`o z9&#vM|IO>h?LR;0+q>-AY0ItWuN^ovblnl(aMqJmG~}C<)#I-ax8aoWjram~9if;} zO)R5SgjJHts5So<$VU#E-#&x#bzeVg-8S>qkv&T;jPBcd@{YBA_bmrp#d!}#-Z9^J z*?@m)QWq{GwhgajH4`+{29hqcF07bPLoNNcKml?nKn~UGrakD|_T$A9dlrsdHQ(C* zV831CokIbI7u_kDr+nEdM*|Xh1A(d0y+Lw%7crCE9+E?BC949PX_|iv=#WG4Z=bwq zT=&KG!EN82xL~@l_qO@AmSv3HZ*aSsq&^&`bx z#8DaN2$A7uNKqlD$n4+~RBqr%1TWwiEB@aC`nexGZ&*5c^yqIN^mf2v;a!ftTpv0T&~wn9*q3zXi(XOuErC zXX4=Kf{&W-ZJ1$ry?wC?OxMd`-%cUeTC!hz*hW0}cM5wFZvJt?(X(GoXudQ5ld_i^eo({a1rjh>6%S^cV&LFT8n~NqtA-{}pcU7irO+XJY3+#zDGD=_!EK(yJv1v2J*2#RH&Li)Pt zkhkq;(C=9S%3s$-26|@YZ~Ylc(KD;c zdLz_0Sw64!_PSMxBcCrO@`m%m<-Hm7{I*nPSwm7xeKnukR?ZRhmPCn<6frZ8sUxz7 z{wZ%A($RA>^Y?F{067?tLk)7MbF_M1@8xx~78`!9EI58h7na_gOVKsUBdTh}td@#I zPIn29f25Ej8c{_lPGqsNhW{y$iJskW9I}vucI!+i-@OFt_HTqn2dn2zp58Z`{3&Ou zf&@qOAz7Vy($|nl{t(qLl%2LD~}(}5vQDxCdp5U5;8{q zDQ_JzP#toRLm_ggL=H_>8(+5_vU<|#=5wRlmv*`Zmo!jIP_`LJWp%~jjpbTKXQ4V~ zK$#UcESD#p5J}{x1SuIK{}jkV$Ks7cE^;W|@)J}ehZZa27oGN2k9u5vt_}FmPIUzc z`&;m;rt09b@{-U-t(MxEo5$#v=f)gO$&4R~&ybvqlV^ysJod$+2KSIN?(|;N zch__VWNXpP|S z{M4O;p6!=i$%Yf&T;(7}oYslWif_T^#WWBKBWg$`q(-tfsGeFBP#am|R})?OPXQHj$XoI;RIdK)LC2YkKZkn72MT z;Htmr9F%pzi!M5e;lvFG3Rr^#5w(vb4ebe25W2!MaUJw*OdC__(;B1tr$FAU51?@I zl$Q;wKDjoq@ykEXZ~wXb)}A#DPxtMucwz0Ry622ZyXH;eU+||#oyJGgP7t|ae}wUg z!xRC2h@ON!9GT)b7%lc5;H3UjAb-{*C|fl7Zs&@RPMlu#Y0vfTv+E!4UQ+_*rW)9P zQ1-;xE#a=Wf6NUmfqsP$8g?<1Lc9=8$DgM$uxBHgm@`pPzNa}c-lw?ie+uY+ne?n- z-lVg~=1lCly7G(qhuh{9!_HN?uy>mr_U{wCcD9Xq=Ho(t80ZyxpXe8Sha3=in});Q zV&MI6vVwfC#{_#{=Mp`y@k##_DF12V)xK{g^j@C%LH&aj)AX=;P7ZAUO$@s?#DkSd zG&q@2!FNCDb)YT&CGimU1=$hvjOOh7l;Ps@G|JWM3ER!%G1uMwG2i2#@}Tt#qy7`0 z7}Y(P`H>!0ev<Op>Z6Ql#ymN%U>u>1EBKIaQ6M{MtG~ zNqrSw-!NX@JN#L*_jPz*hyQu)ygwiQmz@u(=suZ>?vq*QKACSi4~nf;gWh2$6noh{ z(c&;S)nU~0>GZhc;;7Wa3Eb?S*o1-(Rv#d_FCRb}a*(c`2Fgt{pb$BfS*(T%dsC=z zKlG@~AA7BY$T*!xP8`aJ$m|xgw5^G~{Lb8PZToon%R#pK zD^MYaqMh@g0y$LMnL>@*p@&r%+?8?y^OQC;c`%on(8o_SgJvVj zKp=;jeXF3s*7Q}Ai^GE^AN=J8T-5P$Vrri*EU#5bt*Fn6YAly=+Y3d4-n=ByKz2g< z5eX-ID21)=OJ?WwjF-P0(pP;2`N*LZIn}PG`60N%%ckBH^G+C>u&i$UZ8FQ}qkt@_NS0TL&pR78$F)gaYJHW-=G* z%~!o{Ik@v#hm*s-E-(D0PXCx=O?YW{6)C^Im{MMW>rT^^O;iqx0d7Lnd<2B8PJ1&|tpuWxLJJ$K6g2cltbnF82AchuUz`wpwC- zm4RGVsH4{_)vPu}c5II*BVj-w7a!rtvxnFsRX;mL-8)|XbXfE;WFrTi@pn*x9Gc8m zKI=NL^Fg13!_6aZLFWd2*!`VYX=4LHW2g)()97jS8CqsrY96~gF_+&Tmn}LRlbv;h zDOdF~rRv`C0`i|f02y+~K@LU8p~_^=s}{2rk9$!L$!|jbyCk;fXA>d5 z>H`ZCD+v{B1F0^enA{vzNNW$)Ms?u|_}yqi(iN!9=ol}Mj_ROTG!--}KZo)S-#l;L z@yp%*y}w>TIgAU}tnH3mb-*>9^N7?RMLCS_fb^7>z&tL>U_@4t%EQaUs!0ZFU0?~b z(Z4vh*|%8O;!`XhH;{oG(vgF11>ieW+BaG_j1_PO@ZyLbqMY0rnibq0 zo*URoQ)8N$8lOhC)}tX_=iVSF950YPa}uZ)d;}H0O}*c-?z8hF+rBw+*>rKoUGuH= zPpuA=JhJo5zTqAu8ug{doyJ8oM+iL1(U3&ak?`c8L7Eub&ye`_M#;Q-VioS)@fmJi zf^h@6$U!&%!x#0-CSM+0J!R;^#&0`rn=Yw&ws&*!YfDSjb33QBd+vVx8-9eS%YkI- z1tLAZ*>RKHdhGW;R19MXNcVC1kpPkfn(|bal7n6fJoduURpao zGO9TF(F6^AJ2?Xuf0hW#r$@ospTf{gC=hln^Z~1-?%?p7GkC3V1njCqKv-iBA!}_R z96|g2ATWNn0oJ+$5VPJIxQMuM1HzdfKq9(F#iKcG8hU1Bn#=~3#WGMI+6YQ7bI1yG zd?F+J-xe{$t|V|-=h*_@sYp@s2u&tAO3q9_9Fm(oK-4Jv2!(mQ__F+NT)nOZQ&%`1 z-aGtRulIF$Ux)vB-TbdOACmuje-Nzx5~S$IypP1q_!Ew)BK9v}rdBrTi@ zN$9;=j`Gd9J7$5_d>QC$H-XM=-)oJ(%R?n8@OlQFeqJ2KJDw0H9^xcs_A^E5Zkklr zLCz>?4N;ai2NzT|1{rE90?VpO@s(xc;jM!ZIV7WZ+H~ZgMh=B$%Rql{6BN5zyejl_ zy`LW(bS0OT0RdmTF2)TSa(5hnA>qSBK~ulwoDnMc0>~{>C1@Fw53HG8j|_7 zmGQ|!xKU&k3LDO{0=!MOah?2VT0Ah43-qaxt<=u0UgAQdV%l0gNlLdKI z=V^ojDv;w$H>(m;+B4&2U2u?^)gkORts zJa0Z^alh5w^IDr<*y(y4zt2EaG#7;DSL8+*3Kh|{sX3^}NggC03lBZp=yx#2WXWTjq-lco6tTD~F(spimz3AbNzk@);)|Qc1El^q>8*nT<=r*wPzD0! zQEGQDe%WSe{J6``;#QxF=Y<|G@{v}5erpY0URe^N(G^fjb5xNvvMhE}iXy%(L6*`P zCsA~>C3!upWNkZ3SkyEg{&Gl14*AHTZ0k=@zkAWMPKyl>`fT@IA9C_IGvGn)>+s_> z)CbB+Du{XN;_#C6f{1F7I=V3-hu0dLCG3dGkak68s=Mh@Z96@!sA)Vv8mfaB9g9rl zp#6OsRBZk6RnzW;k9#cE-x;zozkJ-mW8|oNSVxa#Pf@`VTL*b^s$* zbl`H58VOoMoN=7458&(^p52~V- z1ynFAeaku3o@I$O?q#xC&k9x5cmNr4kj?uD3YJZUsx_b8@7O&3(y-|-Cr0ccP?|M$5M_od(Yh9||r8a$B zfc%F^kcrj>6)&CgtZ_BUKy3Q*#M$jL`>*X;)q2ljcg=lk2i;vqO!^hiP~kZYlXC*k zjW|k73_BbqBJ@)w*j|R*uRAK;t1C9sy(1yprCpTk)Giwrkd7Rbb3cTN#UI}7T=~(d zlWRWhzq0+8=6kzWS3Wa0Eqb#5VD=+N578|zT-=p_(8vox^zbtwEaFKDC-8VgT)-bJ zzV9$Q!Rsh5(S0aU=yF(`;&fOh8V`^)VpYK{# z@Y-y9_G>H4)EABpyhq-ith-o$%FW;)(ls&(e}xu?y~L#Wp*0{r7h>t2=lKk`b4g5> zvtpLh8EMpbfE?sdK6Apg-tQ(1jQ%{S`R=M|WzVw0voF z7-b-u?));@@M6{1d9ZPo47M*#gxza7V7Y|>4yI)AHX{Je5(`9Y41^!_c^zTr^@?Th z@sfSW{Y9LE>+=Lhm*+w!r)R0oj!$JSaNxNG1K91}5NzTBVWw_C+lAJE>~@BjJx&n2*Ae(;4v=VW52Af` zAQ=~ZO_Po42R|~>KmL585@t`8z^~KzuzE%$7|#lY9Sd+^zSIY7mV1EPDi^@4aRU7B z4nSIW2*TIf1AT)XL>k*dwDCdUY_tL1rUQ_;$r_S3kB7Rp4~>fd_|PZ^zMLe1Ia7JC z=!*z62O+|`A26`>7Z2Do*Ae#5w*!ZT2f<^pHDH$P2mDeiATG0nuwN~Jf}sDl45m+i7!B(do3uu-d#&pZOeZ~&((afBS-X~+xcbsJOZZF&R6J+mQ z0J%1+A=`BaWccrYB_+8&N{PVUN{A(2;Ru))B1LhhsnWy~VVUA%B$f0iL7RRAUy?P5 ztI8R`wy4^CTU3phj;yf)@yH<#ImDswIO5;GhV-pJf^yFSP#;(gYUdr0>v!N~X0XS7 zDJ|%FN(_CJ$BR83Ele7TNJ|@{q-P9<8T@cW9QbgP5mp3o0sW!c|mM1)%vyU|nHxP*dJmfrK{>=zTH)IY?0!Bzwybpg|6A zGINEFCZP2?_$)ur`;Ia!^pb)Rb1Ef9c$Cjq^hXQTU8sp@C8NAth`OqYq_3$ADy`Au z%PX~n8oh?tP%u{BI`EN0BI++>$RQUw6rkLm{!Jd`kjcwp&x4N(vA(zRL&&39wAkZm zQR2gi+?;NXK-b1fH8e9aE9+@_H8qstIz6eRP7`dXQIV=jb3*HNV+91L4*dBaLDH|E zf&w|HP$ovV>%Vyvdz0q|ciRUg{(d)$h?Mgx3h$UAQrd??2JP|jMGc&kvTByRs*I6a zTSC{?7li8@azab$GRc)?>EU%nW96+wJaR}z4(TZ8owpffK~Ns0++x-13Okc06>he7 z%l$A{OM|Fq^26c>GiZvA)F^dBVq8%rFR8qQomQ=j%B<5cH4Vy$;zl|0kWwou(&#nC zV+9h?JM9~X6qLEoL=GC{pf~*wRPI~#vc}frQH`tZtr}mupWRdF1 z6i%^TkWgL_C#q3#6!qCL%EpZ7q9$=faZ?JTyfQhmrev(Vbx1%CBIJ;T911p}90qcz z-nZ&`{Xvrl4bHaL8@&C`Rt7{277+w(DzdyLBSKx07E@G^%qv$WB-do{r1dgxc2ioc zwmFei)GUZ9s}e+4myVS`9Z-)WMp^T0^c_Wu9LjdiMOl!QPn&FZ+--5Pz0%_8f1(Bx z(OVKEXv`0lmuJ(|IytLInaV9sPfo0n2-50P;xn6)_?l*3Y+-X;tf4wCw#qP8APF4{ z0dhz~4!LMnTZkMgkORtLJZ?L%<94T`?P$BZ|Ir4&h>r5Wgc==5u2)f1>deTZ3>mvD zEj6JgStP1Y5GtDDgz9E?LSb`syrCwBUu768e>q5ygKG5`P`vSN2I3{!5948%_4XTm zhYp_Ya`PW(_MtadVdKm7!Lot^vMO5{Q7FlbE)yzv)qH7kJts}p7@ewYjuaO)GlZqJ zkx7+hV+Dk$4vDA^vZW}0fE-FTehW1wv!At@Ex*^lf7_KK_BJQ_T`=A4-n6>90Df_K zkW^V5s+8x`bjd1~fuF;zVrM4SG1Dat^z`f|YI;#qxTLg}l3H0dRzNu8PX`6c{pX`+ zXW7Q-FB?o|J?z~3>#ZTHEu$m02ZoL~`?Ym@QY)G?`T-A0ai=dgw;7izsSVB&RFXC9vWQ|vNlaOIF~5>jB&rE2%BT%2 z($?Y%%Blj@Rfe$wDab*H9I_Wrf#Q{)zOLOc?S9AhpRNw=S#0TMBk&i z++4Iz&7zOMu;SyVO&h+v(ZBusbEkLB8@^;_+;e5WRo&87L)G#rcRSx? zAhv#ebadzZ?i*&tO*btqORw6wsV=$Vr5Ai?38%2^=wX6@G8iHv_EBWmZbpW0M|8Gl zdt9z-Ym&;TRibui$yGbF7LOT7`)(qnqjexf^FMsr_$$glteP@>e&aV?*LN;#xWCu9 z46R4e+_803+;a63UiT$(Msc)=Gr_FT6Xe*SKj{2`qpU=qBOIaUVZO+9Kqz+Vm!uu) z&z9Kt6-dVlNRfl`=Sfg8chW7CfjD+%(d4dc#^2RHFj-XkbkByoC;N71JU)0Z<&mo! z_nx0$jV7 zf&4h}ansBRCr^Amq3i0RsdW#GXOz6!u~7AT*IM~2^Bu{r4(^M4&vT}e=To+e z`xBn4>*GXsm&YOxr$6403{vZc_|2`Eq z&Edh0rA)LJ1qG}(5W!_jAo%UT0DhMbgzWJIs+l`5&0QhJ!WrT$oj_pa0Ll9gL7KH4 zq#v+_9GkIny!u0R)=*g7u+b}zw!)pB2OSnUbk zzqWuc@$R1_3txjDekVJz(E_N3dOF53WmX!Ec!j1pamah|8^kykbAl zR#-vgN=t}dWdX6P_5pvjIS5yqfq2bWsc4;Kq&Yg#NdD}TiOKNehcPhs<8U+w34|5j zc*FYdonhOLcCh#717JPN3J%TL2X1rCz~?`EAzL>VnAdRDO4eFX_yXF&3vIUqW?43gc~Ln3Y`@W__Hjz08~mEit_ zF7|s!Rs`N7Uhfw*Iy$M^#tBXMH^qEWBMMAt>+icf*J z_A4~r{2nB`=78LK8AzQsKpJKjB$E!jPKa=Q63fNhVT-ACg}veNHl`0kPHVj%8y{W z(!0F7(wlwz(&{mXlg9$EkwX-6i23akNJRaGWYc$$fgExymqCt$F=Tq}0R`Uvd0M#l zJz*5#I*(7g7$sz%rArb{kTXTcNNVX&P+`VlTzSqwK%=t5r&-^aW}2dT?dhj(~byonM2tGN46OitEfM#`j9b0&q}&5smr_?lQD)aMc%}K>bB7 z%EaWOJW9Tu@#{P{v!_abhX+|C|6B5i@QcZ8_9|T+N)Aa&;;Q5?T#;}Ag=uL{2 zw8inW>!XwNsu)t8ftFKTOerYUg_j%9491WhQdg7_)}m5`cVv!*zZ??K+5;(a$k{L* z3bv!YtoAN>Rd&Gmaha3Z?NTr2ONChcNhOilub>FpL`+G2LTol#3!~9Trxt1>GxT{8 zc?KoDq%4C{SuP8&(MzaJd18knXKG01+%bR!l`-1FQ{Iu3;!`-?=W>;$6T~3wx68g1x zW=n2JLX{jXU=_2n^ONEM|)AAS#8yxMUNXT*WYTk+k3IW)%jSJPf%AeF0ww4m|)1HND8Ep*|{Q) zMv;_In3^Ci5%Mz11aX=Qew?8)mR(sE%cw^^j6h$>;#ZWj`1QIdev4`>ymg304q`Mn&p{5lb!Z=mtv^1m-@V{ohsEz#dJpbC z)#2pS+u(_BsK77`#X)?HCL}d8hnks|!OBaLadmMLVM&ZwTE-ORRz!%3DyYJWa(Yr- zAw98KJr;n6>cB+~X~;o|?$^cWeXeTjjHk`J=HKqwx8~xYjp=ZYqhm*lM__f0UqqoH zke5?P6iYRf452bo6_>@)MrR205sEYeMV?homTAjFq!s01X?2BUakF|X03Y?daj5T> zqGzZ2w~wLp_b*=7ZTbE|yXoBPgXSyG4O{OV7&zq6)aiybH2To;s%PU9h5AJr>?N@aIeh1#-y$bt;sv{rpMO z=5KHHn$Eg7V)pCtQ&uKjNA2xv`dk7E+r25-jsCHzH9^AoiV#_JDJ`3>kID-ziqjHw z$wfGwLhrBBlwfofdVfuWHbB*)8VeBr{X`JVLi>`;pA3b|ro5_I^Xa{gOm;-iJ`Sbn=s?Ra39_Z~WranXNMhFYW%diI+ql`@ZNglSR)xy7RjqbzHK<)$ zT610NIy1)tBqE12l;hN(y)dg5eQ>*T#pKf`R)0KraodmWH+KD6d)<6n@#O>7Iin8l zsps4S`KSHDStkQol;a`X;9;5|;0P<(dmuK|txq6z>P?k9^kgdRdkPeGJ=Kasy}gpL z07BIJD^Q#A^Jwoz=|ijCi4PnOupf9jM%>4Ggxw?h z5pGj(*c*{SzSr19&nx^;x64AZ^TjlZ!)Q9yepE@fy-*ZkbG~v+K-$+6K=%#GKzw0z zWaJa0)|)?nQ2BVpSGrf5f6jijV@c}E-M=TiwA>c+^3WdoOHZq?mpI#?7bJ&(XA~E| zCro#*M;uT02l3vn_mh2{?@9a|?xthx?#i+00BcsN?DMp6dpHI}l>?s+r;wvHi z{sRZC!Jxy=MIo?nc_18Ig8{eo-r&Cx?M1QK6~eYULBw_kh~9AsxF&WW*l7zXrUyZ~ z>i}f#wg&Z{{h-@B76vLmFsg6)(5U3-WFytwzVL5Z*voM)PV=gb}8`|~!y z{<0NFE&G@}h(^$A*D^94k2nGUhLX9DlQ z0^qu>05;ATSm7oR!8QkK(g6sQ+5;)u`6VIG{dr)a*V6!l?-NWl=8<1hzyqI7+#Sz8 z{7v_vpewGY2^X9%5l=f_ekbo8{;bjejyj-cMc9JLKw9|;FxGwn>`l{ww`(RO>|X$U zmlePb*a$IUrofCs5hMXxmX+r8G9=UeS+L6c30{YJge}EB@UOz(_iH5F_30$u^y&+_ z>V7nI)b&i*N$1h6;U!(<{*Zy-Ne*WuKRY-uXy(}E_fV^Jnnj$Ip}&W zqStMd`YwQm9LVVXg@WGW-ef`IjlTh*$xk4*SO6l2l_2!q3<-q25Jx}wIwscTS)|bS zAypc9H#CcQD_BjwjxVHN#Tp_nV``%=`nI#rqX+qMk3ZrD+>gh0dYoppdY)sv3!tO< zLpXAv{rd6SEJz~ivqU?70;%}|kl3#RvF8>L2AV?x#qK3H#^X`6AmDC9D)DA`I^`-! z8F?wFAm#$LlzYyn)NQ;I#7@U1NGfJ)Z-*? z`1)-oMqxG|6t=5C=DrnDvHM;KLmi&PGkxyI@`G-&Qo^rLWz5l#?AWseO~NUhUU)p9 zM*Ih+Q_}0zBWv^SPi^!)l2q+C5?78n$$l3Qf$Bg-4ovi2FCICFP@g5+{v%}WoexuX>g1yzNPr zuhl&{!SzNe6?aJxMLEOav4lm>KW6yf^P z3h+mgG(pGs?*bUefrcE|i>81O<-BFcA#3XoC=)Xe^7pTNrg7Z%K;^anW_Fn`(yehO@`B-;o+noXr>no}Nk2C4P#3LdKvpb%}YvIHt z*D;f%6?92PX?RvnQCPl88){JJhc@Nq5L=X)Xafz*f0Zdef2-I)B$;70g>oXfUXwaOr$aL=0 zvVF_%R@iR6Qs!cPy3ogKP#H*USA;~?iD|sD#Hf_QIG!wzEzHbeNtGFloIF{CEHD#LBYc1etP9sxmb?GcSdu$xmXH6~r^q{Daw+ z%V+h-_^biZy8sq)U?7J#ISe_-8qkx@@=1J(BNMAY?@>YKa`i;|{SDO5Cx4~@LmA3txPN98b`zqZ%nu`4h7k-N$-97P=Y#-OQ88zm< zv$bu5Hf9xFj!p$F9@va}KXOV{U=**6$d4+aBvXr+k`Ns?9bX{K4#=11`sJ&YKKTZf zPeEI*pQ>Aq$?Z#h7r;ine>}=qr=z_vwF^FcRsGxKI~{939zDAG%aOA?e(5>0cXj)pkJ4i359ZkV)opV0UwY-DtOFqc|GP9#+^M1d6?DW*J8;aw)n^f0Kh+{#L`T+7=s z-SxdvkHP`by8sSyc$1OJM*C2f%$xM6amfdl2UbiTIkWDI-b*`XwOrVA{8~6Ep3RA=u1zwDQR43UEmI)6Y=}5BeJDOnI zd5rfifQRM}iti>s(ToWXo4%iLdgLpk-b?d8Y`XpX*X4J%&n>vMYh~6A^R41*2lw%= zINP%>d%K5U#A1RklJMA38p-!UG}+@qJk{lbD8lhvdZhhXHS6FRebj+7^-%dpW4)gk^8y)xMPdIxd; zwc&RHX>=6A7G&lJA$#8e&{*sTy`>e@SXn~L{&#VrXrfVj z?T1EHT^}0hPEJAVFuwah3QImofEClC&>DXePC2DILRpE z#?%R7_~Any%>S5y<{%-c7FgKygEyG`>;mRFV zEZYk)zwQD4Z@WRbd>2|2yAv{3pkVaM9Z>jADyt_N<#!qxDUPGRriV`_a^Z&$=x81k zjPf9Uu>4CmSoe(sZ2is#cK=`rmOq(;-ON4U{PQmGoV61$vrGU#dpnTkYy--itq?hP z3;H0l8TkL%1j2b6K{C%6(&xX6f+qC)gXr(-&l?%B;q!?Um_8{G%|X0i?o=mO^65cX z^@SxEf3*j;e{BkTzS$0z-)@D2-);tn?>2(#_r~D${d(}5u?}!Eeuv;6)B{ zYGD4j3Sxg;i9+P>0)kLJ3yYo;m{n6D`1j8seDgG*@0x)|ZnGi6brH~TD*!y>Rb!eBwIf^uXn`^IfM=mm7}PT(2Cu z<96xLJ=eeEy~CgN`Lhma9u=@+D%$qtGoWsq1`N{~5M?zRm@bPUB48Cz$;LotZ-)@! zZU~m`gP`2~fYaJQK#3h-svN+#$?2tchs#rsUe`x%L+*E7PJ7&Ry6Ab;@tW5KhdW;9 z9PWDj9Y8=cF&uITTs{@Zt3L&X@id4snE@P&*}!&O0#Saeff0hnZLFOTo?s545^D&~ zvIV@x;We(<<)wdx`wPE%&u2dE-VZ%{eeb#tVQ#pd3ApTh346}*8t%B$ZNFnqcYOa2 z2u2Qp$bo>q;ZRW)Bx=J~z}fLV#F@{AxI;^Tg^&!VSHUW` z=Ycw}r&xpEV@z$pL*F*sU9aAt8}38Fmt4;h&p2Ho9do)$7;w3T?Q^*u@OJgQb=_B9r)NCzzN&;A}Y%6DI>w{F(uXaL1<>+-QYapt-vDk zjettpRljD&WuHFQS&xzE!>*@dy4}t(+T2Ge4W3s?^`6&)|BgQ$(EK4BWqM;!zZw7g zmmu8o9f)_&f>i6JAaYsvItgR)oKLcP#9=tzXK}r6(~|?Qhsi>&1n1B$1{N^S`FlJx@i~dY_A^@VP`T_qiPUcR&c5fe?@bZQ(}{i+a68G@BD| z{uYhjXMx;m2}m8+Jxlf3`A`^SeTPqVy&eoKbdD^dpA1&S9L44G2K{vjeSQ^* zJ$@}ooxX>K4crhE|T699UD`_5b#i_K2aZ*k)k8zrREb$BX>M?jGz-mO z(n-hovXE2J(vUOEzXQnVy*?EE&mS9oZx*8SQikpkIcPSQxBaK5y1fhU7Fqv(MeDTd zjMB^gumb1PDGDJrCeY|r+~}CnC_Z1uNKVpFWg=yGt|Tk0P=-Fb$`xVVatWHjq>_&& zh{z||Ddf{pf5%$~GV1eUkwXe{NMC{0Kx~-yx?uZ{kBav!xLIbkcC^^RbVTE6*P9jK z(<}`Ru1==VO5>R^+E^}M6`h!r6`3Yh(6gixYQ9`dEmx#a+tY>AzO*FTP@;f_R&&u# z#rz!*j_N=`eRmu}jPmZ8$RTh2S1*dT{czu~d;Ya5%hhKMhjt#(x!ZOqG2V6QLBWR9 zFq$qYB1Xk$^D|@PlcX^sv6z)E6-KHR0!C?id}MPvFS1w4V;&L4MUHT}ktf-I2XdGQ zRMhA3kV6`}K6B9Ax?tUx&q}s_f2VTSe=gTqtU6g~XVO>fX4|as^{UJcBoxU)sA@3{ z{RK%BPa4llN{LGr3pg?EU8-&j<#$KPdLU8k00Uw9Y95OV4`pCLR5=PbgwE{ z_R)*7HJ{wA-!$!VhsjSTy7v6q({f;2eXXN)X_<$6UJ)iRBOgx|D?^wGSu`#uJxaio z@lvRgl(ewabQw{sQ4qxC=|QQTieT}8h?p|WCnb+?{|=zNnZuyxmgtvBkTVahKS23| z>Qz&3wifgPzy~BEIRindxUA4Pwj=>izDaM5*7LX!i^C%pqk{M6Q<_beH zh2o%enFO1zmiea}$88&?|1t&em$4N%Ge+SUt%wf>Ak%r!FRdXjn>7oxFG_LsQ zO3%7aPLFK(w(p4P;>KR9&86-3mg+`VXL*gczpx^J#4RPznMLGiijEOS%I63IHG*V} zS}OKYE7LsHB@&OkmNakm;bfnjV?5uiI_TK>v~^NN4ta`chBffKPzeC;w}H;PHF#x!_>t z+KXrJ#OJ>Ez4l%t04+-druvo=Ls#b$cb+T}xb%9v*a^IswD+aqrq*W`%2kg@h6VTO zq|`szPSJO`+~7ZaeEBzngSj`NBV4Yh#4xYr#?!A=BspAbN+w_HO|idz4tbPhf8l8? z`Rucp-vMcuf2dl3`hPz0;EV)u>$U`O@y#-UWAC?1^t{tt+WOXD)803xy9!^~>ZZS> zTf{zRlS7~Jn0`3LDa{UA>_v`VWcMq!)>3O3%7atINavm zV2I6~*TKI7^5zOaqZIM54>JhoXA$TBlpqE^tq|(|ykmCrCyiy5BYGPPKAOmRRZ$f8X%7n2@^n{gS z$XBYm?^m{t=T}byw}}7~mx%~-r-=kh#zckI8o33<%YvL9Dg|r0Up1uAUtp++Yi}2G+38$O;eRwty4H=5Wiz4F35o ze{>SW$$lZC=e!Wn@}~$_G_ZUd9LgRqkN6o&|QB7dygU^vgcr1j-nLZ-#JE%@EH2meaii(Q`tOXu2jq zR6G(Q@?n8sDy$KSf-MpOAScBGm4z;#y_5mQD=1*K$_^-Ntifrm1-Pv@1Mdwc5U|l0 z!Z#U0+-3tv-J%D%TXivnsDlR(YeCC4P3V@-_m>v$G<@k8IK7f4#Wh4VV00+V^W!D_x5kmjoZb%7Ev7b=3w!kxffBoE$;>?uqF1y6RQ$-SdwdO*A@nQ(FlZGJY4dCyy4Sb^&z&k@3JWJKV zqfQIBZMxvrqYs>ZLvT4`4D3rL;C$T_n0L*=@v+5s`b*2Nw6~U{)Q?sp4qvR^Qzm}P z|8Ht=#pl4;DGuJslHjj77lMoyLx_zuggR}2AifOvNA3ilG!^hJ)&S2M9pJU<1Gn1< z-1^WOJ7oqg7tMin%@UmMSp8r=ww`3Xv>Btnwf#i-iY>se8ZFXhGI({;rg%pje{fGZeRI3a9&@?nI^z5f=Ph&4 zebDhe_c3FH^S9$D`wyoH$KUa94enb-z*klb!f~HBPDKimbQVF1sWhb7Z-QjDJjC&{=?F0sC{eg}BrbKqhP0jO`HQ1d1!&w(`UMR*MJO31d|1Q|~9km9*}Dj~%1 zTXekD*YGULXiy3Bliyy~kNhT&Vb2clcih8%Z#bs{U%6ZhdBJ`Xc8~co{D$*T;APgZ z&pGx8@1*M}=XZcN`cWSEP9Kb3OdNVKX{dR#H5XzGa3vI2Z-P9A!jCMj#zbm>@t1^X zyN}UnjQ64Wu5SX$JqP)<{?B-=A&=dbaTOY2>sm&IUa5bbedwA-K2 zDZV#Db3(2JmPB6g-5YzB-;{XLt1J1qXMgGd?_$~!-h;FQyvGS0o-ZQzdA|v6_I~fv z%pabSe{1l?OmPhA&2-e81^C%0M_;T;ciHzEvkjwlB>CaJOs&CEuH~a#KgON3aQABo z3BDJiGD1&>6+|5is)+CNuS-7U+m_bja}cvHr!zbFw=-M$|D-heJd3UO9Sp1Sd*}Z< zz!#rG0Pe5Gq2A2GHBpRuvl8=zwK_{D8co)XwAjeKsb^?CspMGQE#@<>W{0?+OO5e6 z5uXy;7o8K`8(xyo6|y&Fe_&HaYd}|4lmF4|2LG!$wE_3j_Xa#ks0@4=SrIf8_&dN4 zeV8EBd5K6au8A@^am<29O*LvS9&I&V`@Y?3$BP!4=KWe1%j@M{^z#LQ*wP;9dpIRA zv^zc{W`9gUVoO9tYC~9kW=+WcoT}g>xqE^y(BA?6SVJg& zN2H?mF2Wl2ZWn_lCCRV*H5Yt1U?4qs&|=$zF3RrftJnJ*xa<|uB{nCD>*@;Fs>cMJnPJ4NvPh(L;NOf*P zba_^KVqsc-T24wuRz^~NZfas@K~mzeg2bd71xZQw(-RY)BqS!jj6lL?0s_#7iNtl8 ziFK6W=c9g`$dC4&vp)5z&wVqXyY$g9(@i%Ilhn?3Ihyvjx;k{#d%3n$`SWYa!a~Z6 zVxtT5Qxmgua?{c>%d(QvYjfk$_7_B_^%q2?T`h=C|2rcl?QvpE>dQzZVkZ8rAsTh} zuk(}ktjF^Yw+c^o$%}pHQ=R?tq|TzhPZ@8#GGMEEvX^dhu-%2y*35IMtMm1)s0s-# zERT-LE=fvAEzC+y$S=-{&aKW3&)JtBntiw+IOkG9Xzt&cVcCxo!!ln){tny*6NEVQ zLGo~2?8W!GeVc{Gdv}PuJ+3_K=>^Ssx6c`FIDgzmc>wQl>N>z8H|^)LtD1ehitB@d zva2H_Q!5kVT7)4%XhQb7KTsNVrWIDR5( z??SAlb`73Cuu<@HpN#OUvpYo}UQ?cX?UMewQ)jG{4jywb+;`ZSRCB=1xwPGzm%A?@ zAhjtxJgz=2Hlj8yDR^&Qx?fdAmUm@i4zIE&*Q4@GuIJvrGrcMvC3=^>i25B6j5Va- z-hb&b)Sc1--+R^y44>R2`0SdD(A_^2=Ulv|w`Sm?<<8F2Wc`L?OxubhuFU*iPi|Vb zpKp9eXh=j`OjJ;7a=cG-P71H7EZx1aA=9<7JB!_TI?J{BembZAae{mGi^$&rq3C~P zp!VLgL;&_J6BzGXDe(H@3W10Br3J3vQ;*n&Umq_5}&z+Fz2y*%zw?`=9TW=z6NPtnrcYmWunevX&30;!iuL&%rv!$_C9!tE}f3b(!TFwE}4n;_DucYdUk z?`H(kqy(UH4)LT@jJS17oVa*doH+eZk~s2atx(77opTyrX)UjKX}CG}x#h0pr)2G@ z$4rx;M;sgPf4m&r9|SVl4pYR)4?u zvc5Uu@jD<-k{D_fCjK}mN}N9_N*w#uhcP52(Dq@yNd1RhQf0#$%X8lAZ%le;CKvVA zUL|OVsm&j9GxQkZo4LLTwsL+GZO3?%LUwqQ>tH`rPPZFsU|0`zGp*j9U|PPr?PT$G z$jSW07v}GntPmpZwhI#Hj|da}*oV=5TbyVapD$27wo$ZXOi?OxZ1>WHF}=0nW2RgD z$86=iMj1-Dk*UTW_11J63)Z2JMe9??k`2jYImUM5Wu`Xcb>>#%?G_ea`z_7CUa~Tq zcxq+(^$RjOBM%w~;^G0!KpYh!IxY(n4fn-}JyVMW3#Yb-rTvtj6Z2DbQ84yk@WEs) z7pyn2fx3+e>>YIQRB%8+VvifMb`Y;(gFPTtP^fN!hvb_&7ob%oz?^Z-G$ zA0vpG%LGw`IgE@Sk^=FtR4^3Qitu6UEH_Y?;{gUa=)(70X&IyNXnAJRV~9fxuJbC?0aUX*0a zVZ^{3fk5;@c(78K1sgo?l~stKNx`Nd|j|!patZG8o*ej4y;A0;J#QH zyqD~Pz$H5&a;ZFi`OBgCyB&&_$wJj~8EE+}btiHBYdHQt1QGs~AbdeWfP+2=1APz@ ztP-}sIWdOq;(D-amL}|$PzPN}WiXnp21}>c)A-XuQY)CUK8v;Y5pLMX-$wO zwZ`nGw7%HEjQp>L?IOTX6ayFKS>UcY2Rw`x0MBMA@R+NB>$4s>QCq7fR#vDe>M=0aw!<294Lk>SpUORx%O#H6~SrKsEAqrmT z#rUbrhCto<5Ny5-LMW>t&}}pL1?~WT+%E9UQUh+OCb-w?0;kmgT)NQ@>oW!C)8^oG z)dH9gtiCZ`SfdYQGeZ4j`_5t9?ltwh^-CI9JfnlzOaKet-xyefJHAi*puP!Go(*Bz z^C8@HDMXOgKp1-q1pCT?f0QzMQX1e@s0SXEM&RCP_LI|YIpuQD`aAoW-8bh8_LEL` zDPxW=96mDM(uQcC>4WsI4o?~1?H@XV&0mgSH50(b8k~N4L70&W_~nEA>J`(R2QkJ= zA=Y*c#5iq*2rmT)2~~rDM4cZ#IY!^S$}A>4YHY^cTkJ<&yBxl-`_T0{%^Y#M?mWzV z%6`Lm@A8cCnf<_V-0=?c8}+8sl>Ielu$u{RK|hMUMHs(!MIZ!qcl1sPNZ36Ob2&>P z*=h|W(YHbzSMf(=pvFWU6xUs2@xEFkkr5{ELy~NV{Ih9;-X+e@xO?3ma~eG#vO9eKVjc0j>wMP# z59iwf*PUJmTx5OlKI8J4d%|_xWq>ouJi_@wn+f1x4P4YVfvCM>Ph&VBi9Zxr&mZR3`h4bA`;NKJ1h}sf1Rty;3iW0h&Pl!udJu{d zQ&k%CMr#a~zpt~{^rFgP*Mm~F-pyQ3>x=1uv{Q+ZE(0-%yh9P0z6U}IgW3c4gzfWh zh-~(4k7@Afi>>py7+2%_G`7nBb;ut7cYYNCBfPSJFK#pOZw-Dpe(W#Zi)*3?Kc|&D zXHC|s&mC#dTlTuyY{TO^^3L0PoOLf1d03sw_M`Qug}L-5#_>91(tP(t<_0x}m4?>_ z*F^0NYK`3!a4^0+;7mek;NJ-)!GqyN!EXZ!Lq2*JgnZ`C1aR>=1fUlak7VQbb}4$5 z)$-!wjjD6rx9Ti@vEOv<-_0b2Yt>BMvt{m9M+q%+R&!f|#Ux%bdzxPdz9`T%se`^T&r8$v8)Vx)j zguXXz7a3_+n)RwjW8TAF!&TQiY~;>1)3y6+Tr9dPcn+<_{;ayZFptWtSl`n0)S z-0ds=E zmzoVi6Z>R@-yhg1_WX$YoIj7~OJDA`-f^Ohs&%k|Wxl`KgVIp$%c?34;g;q{`{d^& z1!ZMsg{P($M<=9K$HyeMCPpP6N{&drm>ik-Y!bfqw1%u&!d2a*!bBFme0lqi~ zzjSXto_$!oRN(vm6$0;%tQL5BcAdcO3p>OwoYq=4aLjCL*AbFhQxC(as>8*uXdjP} z)#T@zTp!{YTNCXUzBf5Is4^$Qx1u!KtGqtWqpUODz5HZ?d&S>zyz;?t&(gO6Ud6-w znScPChot$az0rfHUqp;`FCt!@SVY{vwwSnfMON(81&t+#PMd6QJ7%X+(?>HX?PXi% zbaQDb?LO?-wjhtNmPmeJQ(_>$AuE(uUmW39R~yZ)?TBI39glUczZ>IH_bSwN?>m1^ zoTTdJ z9d)+A+h57ChrF3#Jpr76?r=}<&UhbgM@FE_{-O{kj0HKiwMWqRAB&*1-;QAHe-p%P zdGG7g_|a=7AOfF3wiE%?bBKRB#fjTTXA##f%_1(|lOm4)xkIvdXmk@EhUzj*}L4??ky&#Qm7YS4i zZ51sVQj*GeqqQvVwc+}(SJpCquc(TigDf@9pobRgrJp|IWtfq}%Xl;Umzfr}FN>|M zUe?%J47S^u4IU+#zPgFLA(_6Murq!3!)7L)RS?AWeFSl$SCHsEAw;xZ6(MRK%p%G^ zFB8oFEF+fsd6!i5Cygb+pA1&3SH`%iJZZ9k`L zT753mw)nhP*X+waJ(Dko^^Hd_8W@c}F)$n*F);l6O@Ai-z-Mp@a~M6CL1;OLXJOtJ zCJKjV6WQOTg%ZDS6^r=3bGH8v^#z_k^p?8*Fq3Bfuw6_2LEk{0a@}k<<+asjDoDoi zXViA{pGk72KXc@bp&WZZ>VNfp?9zjyO1f}WNe5maAC+g~TpL03Vh*DP_4l5u*oW~z z5c@FZ3dDl6PzY=i<--mMH&Bss0__DfFj`Ckt7SGoSz!rGX)}BYGXd{4hPVl%57F2M zl7c-TdD#E42YWvnH}A%SchzBFiyB-({@FSchq`e50URH57&&(dBH@()5ef4I{9&1( z2dooe!8S1(?35sZ`W!3tAI#8yFan!}`aoHv3(Un@;JO4i;+Lv}|1wpGSgwo>p}Qb^ zg(6-KrT}#-<)KYl9u7&%!O0ofcL>KnLlD_FF@u3Qj8G6E_#h?V3X9N#SSe%=8%3-@ zR?Gzb2LtpUbU;s1159SCfz=#ku$S5ew7EOcdyogWc{{*+zAOaI$HtKbTOf78Cdgm7 z5h@mLfQCiuVgHO&oxr~z=OFe0LHNHS2oE3#C-gzc_$;hov7ia864HZ>!kQo}q6&(l zN}wjD09xWZz(8CEOlNHct67_YB(V{w66?W9axHKqSA(}C1~_J~fauxFAbIvu$epto zN@fH|n}onhUKs2YMS-d;1`JIJa5RzvhRs5tJ1qs8&q{EJS_71f^o4+PHK71jKNP?c6n-F+mLNYd17N@Le>IR5 zM1Za&22Q&rfMp;B&ejXTiLneEy`+H=u?}dd8^NJ)3sClM19G!0*mv##(qTCuosb8+ ziwa8GDHeJ$92E)R>Pi9E zd?C0~mIKFqHMj(A0O$Cvz|7tbj%D&dM^B2jPYI~q%HYta3Y61oK)I?8M~a#;~E)^r4Px1NEW~INZ_uL3yb^N%>$fMjkgD zvH$VQ1JWC!fX*ulXbw_9V>-YAeL2cDVPK)I@IW7gkA9FJ`eOdZ3&G!ZIruuQ1#j;y zz>C-c?#YVanx_iv3XPx6b=p(ReR|&=4;Xx-_Zv;p&X|0q-Y^|=cxwL1;l0HN%DCl_ z!w>U8Di}Yffx%-s=su!@&U643YoMe5z}bu*1Zw{P^n-%c=Rk(u3&W)y@ne7&z91mK3q#w5(rd=kzqdu^I?Jz`nNgX3U zqfgm9WPrt=j$nF=35GXk0BBeP6F(cj^uHhaAff07MXJt*D7^)EMD=orqOAKF#*vu} z@>3l5joSU$D@E@kH_zlfr_Ay#yT}u?w?5eT-++vrbQ_x1Aq2ymG!r|H8P< z{6V?y47QiC;qW{Q%+9gEbUJ{(P7qjFhZpMqVAS1FsJr7ZCzXJnRHEtfsYKGcudyuI z&k^3r!$Be1L%#9GgPs{y&)f^hk6kM04_NigdrocGA=m42%khNk4aaMotBmKIOUzH~ z3#@O9)9jy=<1Sz~fCo_byMo1Z{9A)7YCK=`Lc;M`#NnDq-X(!oa?SghZnX4ky7jt| zWJlSzv0Rmx;r@D00wXQ%`y`R>cxKaXa!Z)k-1f3AyEeIAV0XHmW%Y4SJD>A7>HIhE z80#H(fb*4g*zG&L*Zn7@+Z}AX+`(o#fU#B(+)?`npze;w=aG!MI|F;xvUf{O<{B*d zlxw;6Z6;0jMY5~v!&tt-o$yf0>p^k$m;BP`=e+ZsPkENR9_QA&54gATj&ORs4!NH2 z?sdJv?{*#Z?&6NRwR=oB?c@ERwRnPkvnPq=wk$EBogxAU2*S2DQzXOsLakHvYE)=6Oa+!7MvP(I3zjjVrWX_%fQsAk6tNJqi!it6HX~n zlg#M=4z7hD%(^F_=FMF$02QkR#_Ki+zC{n>X_und?RM?O7n?0M9t4KFD8E0EF(U&NeevhT_^A--iLX)S7YIc zPSf=VTS*H08yH&k)vhL$6`r=mCH}PB!Z7FbyjZuSoK(-)tURBHjPiicw1%L-)UFW! z)RUopX@7+TWeocSrGMrIr;W3N)4sB%13YmKB2jl|&L^O30WsOIh$G1@wp7(6tG z`19mE;>wB5LMM-^&O3P6aCO@O>m79+RQ2+G&W8C-?p7J~d`eR3`6PH&s&gK<<#fv=tFW`sskbv61K#KTW zJDYgWF^jm zm~@&B2%keh)-2*ll@M`zpD1zpAo>p{FoSSSoH%q{T44WG`B`aRz3~ozpRhH*3fAUVz-l^P=MlsoH3V^XzW{OMpdit463?Q%DoRw|mmrD< zR|w<`%8DkxRGt&{LT5?PbJNxQXQa*CXN>Lar>=^QPrX#APXl+`KZ(+^eUhSU^(4>0 z{As0;>C;wY%5c&m(`tyQB`5&T0 z-n%(O#)mb6@gHPG!-th-`wwd^A0b-j-t)pUoa)O6p&v|Ozvh=KhC(Q%L<>aY)@ z6njCk?u!t~6Y~Y4C)NlCPizzAPbf;bO=!$@o-|lUo3vPJKWV?hcG7W`)i<{_=HK}1 zOuq$hF#Z<1$?$vnX1(u4TXnxzZ`1yk_eTTCKo!k*6{uaU4DD-nK|gW{dqAGARfN~m(%nH2^+#}g%weSAc^FZT z2qJK57U2c+1zcb`dJyY{$RHzva#hR>cF!_G|3ME-XKRDClm<}ds)6G?6>y!e1fKI1 zA#i~_L@nF_sf%Qx5PLwX7jJ`oOSVGqk}Yrsxi~G2I0vOT2WgnYh(r&<=N&<~p%21D zAA}5x(1VZ`G)4bGAGV8V!!FU?pf08Yy5hUQNL&FdX32q_ge-aw+t7R10&bEUfj@ga zgw9zD@pD$A`ML_qq@Mo1nMgk?cRWD97BYyw@8^VJ-wK?s;|s{GjbQu~dAkXWzaZZb_F zZUnQVn^3`R2Gi@vqb*?i2KlrVOeYcem!J6ayD4m$G=0cW^gFj1{Cyz?Ca5sYC|9Dz)rC1-vzd3m44dXQ2uW7L}kMIo$8qNnA&IS zDYX$BQ2t;GyWZKs&LKOHpAN9xBnWm}FblF7a}el*xXX(HPhA49shSHO){DWNu>xFq zYk?KI37isSfRQctlUBO(2en%1JEd7=lH8#-VSi}%SJDZMF}ur{KYE}&V*5ttz3ms> zA-gHhn2u{ zTldpFVC#3+m>mB% z?lyVE`WE%O`3(jbU1x&+WhQ8!XM*N*fXxO$a75kvYyaODYY9Q^AA$X1ks7lh(s1Fo z2o{q1#u>~{vG7}j4f+FY(47t-p%3GX<9q(nTd2Du(U*#q$4rWv^O+3_^MN9}=6nC>0Vc;lPy85UPL1vZ!16{Pd5I?5TRHtH#6FZ~4b zB;%Ol4aYvmLB}J`W0WJVU~`BA<~?p;-0cPi(}C@ng4hCy83@!{VOUEnu8rjFB0tlV zXN{$6&l^rNT{f6RUjI0jC3iR6Q|(4jknUyQXp?i^$(E;h*>=a>i^&6=D(Vr}CdNT_ zC$op$@7&G0=-k12!fJQ@L~nQhY2W4n)~z03-s}O!(}71)3F1G$g5Zz3Hwtxk(t6b2 zGQyMDieevgHRim|HC*yI({A0}Waf@*v0SzD;eL9jg2POY`o~)x@kuB3dgVKG^UCS% z+*)Rv`#x5S+d*~{=Zs4O=TFyq_hF}c&+pVaFR-ie2Fq%1Fq;mb~&MF7exH|h2eNCEUL#^wg zV(hEJQ)uO(In3hV5>`QAH77S@7A&(6I^B(E`Pq~@Fqt02OKWLeuK*|UO>*)X% z>V03-dr`RWp0)@z-!g&Gid6!yYc~i!YLXScS*J1ge3j{{W2N>o2lJWA?OAR*&1rm- z+N2<>%J@k8(wIbQVPqyVH@uLY8Cu0j3u)#i2lwz2gU|5dgCFve!auWQbj`j8`7nW4EoVgrU@u@2Xvw zi><12jx`vr=&iQd(uQYA zHx#qAs`7b8rP+R#`5B?4tkf84YI2HWVqz{UHolA#8CS;*i|ycr#2)tyioNX_n()Cj zJn@@jc;Ziohy<{k4sgeH5Q4fp3CWit#ww+VH_cMS{jRyhwXThVXZ9=4?rSkv-dS(G zxp}X{&gu$hjj|H1VSa&+MOJRGU21leLqbN9V{}>;D?GKtH8{D(Js@d6&o^nn(KtOcXg2Y7&%1dB`=43brW*55pX6|+O&e-R{ z%jn~IWL)KWW({%pIg?DE>?x{GCXl8Bym2i=N#a_NAfA_t689R!iCgWnh^vPriPMMI z2=pD?Ijgfd%5C9N44x)SKZWVPt*8Hf9uGKF#C|wSek!v8k1j?k3L5w z$E~2n-8KI(mtAm~>st8Q)xCIv=}|aE^(X++bbv3;LEJ3jQ?3Yczfy#_*^K#vE-~U< zzc_JpKw6-uPhPC`kk-PQ1Ey?dS8p++7LUx>S&5rRSKP3 znd{`br-JQV(d^2sILKjCT;w?J8Dz66zdEw_{B&?B2hwyvAU=a65#mXa0BT=h;&L0F z!O$Z@96gR1fRoFJ{U^4I)F0hFzoO4*RsJEHt!X_}#rQ7g-4Pw``a%2oroQ`vtUOyH z?cAD^99WGxbY??2lh)Aa?9kB5qBLA!Q5s)5(Hh5Tj0PY()`9JGK$sXYl0gvnQ2$?T z6e3P{U=PG$JO|+vp2c{65z%mdt8nERRjGng2Fo*!TW*XWpvXn^F;xN&bF_R8dKq~1 z2AH}Yh_H0-POxQkWsx1a${g%F8)&wjJ#?GS3&=~FUFSI2z7uT8?O-(>5JM18Q2*c9 zi~7G+km&CgB)ah|#@2HpMBTNyMCG*&f`wOhNn~8oS)6deY)!;D($>H;3{6AL-WZab#jlZoqFZ`DA@_?H*>%4Bzwz*wr z$+NC`C^4@3sykc>(;{7o*R{EvWng)^%*gC=qlwAoUQ^>MmylQZ`&T2AOJHDn5%j0y zaSqNw6+s+tA&3L0znd`wP=)t`7TpvhG9S$*Ql6|5h<&nMB=oVWq~BwMg}g^rD_sAf ztaWG-?<;JgBMv&m~R$hl3jW4@P(E z-~YMW;BQc$j$5S!aT=dN5B7qzU*3zBwEjEEnaPecr@6$~8OF5*3;BH=cq zGuL^@bP;{XZaL)*L)vc0b*=S~_Xdlh;7z7OF(q#;Wie&ZP*6z@K+a;&{?v$L? z`-gH`Z$HXuz4;-h^%{0e$9eRD4q+Ch6?;LdQ2*v%#6F1In1OgAN(7HCBz#8K3UWtf zgju6X;*2pZNsQ3XwHdQqU^zxvWH#=!)Og%|nc=wa3cc|#X`QbLtF*r6tkL*du~vPe zd7awCq4lbhm)5IHzF4m^@nyZr*B{ez1bv`=m_@1T$MMmFNWvUM*h7Nw8<)Tw!a@Ny zYHkK>K@UPf1h=lm%|Tbf82twWu#wUQ2RxI)d7cKiFHi&Dg?I+%A|*&%tO$8a6rd7& zLH1!U$dP3`;1c$OJVQQC%YO8Ms&NkT@l4P}^dLg8AI9?~LAbyy!V%^ZWLPd>4eJHX zKvvig6h(Alx2PuQiK&B$xC&U$+680@Y{-zrri9ts!F%>L2${15;-xl0j?_ljGj{_t zV=qYWymfFIxiu{{JvjfE!$`V<#D0&c5gem$U`sjmb!A3!KkQGt} zMd6*GCM*ZqBC=p0vK7okH-U}l2J{}*fs@#3a2J;bKk?-dIcq7T%~}G*vlc^*#3E=z zdZ(r6D9%5wfsi{mKKA3#-k|^SoxrgPLJxfq4fH`&V6nhXkQUg1&tn^G72E_n1lNP2 z&>Bz^k_IiI6`(J)6ikH|gSGHNpa{6WH!WzNI-@t9^fZB3#z6CbYuiU zM?nY-5z}45V5Tk#ruyPwY$XXsj#6OYIUn@G7J_cdV$dmA0@{_!K)Z1nXzgDPTD>bk zYXCXB0yM9!1kH!YYh(nOSP5G2|APM=;QuoXx>$proDi5G=IF&(s)~Yzt~i)mNP;PS zE*N{v2g8s>V34>3^z)X1Ud3|It6u@S`&NSPft8@sCk;BMkjtw;=N>YMd_=yk2A!Yy z6aI%TNdMG@mD2&gYB1O?1ZK#uT#SvfDA;O?gSFXguymLU=57nXG;lGP#4Q8ktmR-- zx)Ka)q`|Od6&Q4`27^OuK>s*$VJ+zYfjnOa`oqW=GPQ0>AJ%<0fVJNYV9lfftey_g z+a?IcGD2X99)#U?VX#L(h^#3NWRuxoM?z1Ejh>X>Qm~9(4(937U{<&qOe@!bNdx*( z`_Yf;K`&|mIlB?n?ItjKjJ)0a!)SE#q|uMf<3_M~%osL~q8IhW1lCOl=x-JL*ORhE z|ADeq7>`B99wqdo7>2WfYBLWg%tc`DwG8aSR)S5^8n8w`$f|4uSk`O;i*w1yqeZ9w*!Eo_?(Fh(E7 z7PU7GH69DKznhFOaIs&Et0f8Urt`qnelf6JR{n7ESv$dq*fd5<-uA^IXU7P+RN(`u zde^XBlkz*;4%H#+L+Y=sPinlfx~BQU@~PGns}Gw0*i5S5w*%#SBv8C-4+?k4u;UKp z{{rUdKag?Hn~A#94K=tY?)&r6pYy?-6yI>}l$Xuo2@j_97Y=XzM|SYmcTTZ#Zy4#j z2B`(AFDMnepW4@IKPI*6{$qDQ@4oF({d=~T4DQ(eV|df{y}>oogw9pW;asMI>LnT| zU8IBJ1v>2fFJOg#KNa_z*|_iSi5lDw=OYkvO2Mk)Q$c!C;{g_nMto_~Z@t_$4!Zlw zK6Q!M^^bF++TYA9%{z=@om;dj{TtKg2bJ8%;*GW zagIBJ<}oIyA9VudfoT9+d(8G zk683Fr;K`7pxeU+&2AS^N7VickX8zSGmg*urMIw_2%L}DEkcv=3L+y3>JqQx4dy?M zwq0>A%yHw*K(5?nA3x=DUf~+2JQ8$|xn&skyB3%pW>=W^vg)i3IJMbyG7s9dGf$J+ zn0HC7PVa16+271sIbhi82D;7epowVw7ogy{E;v3PHE$^D-xz5DNZKGcnz~*1ZMw49 zvou|)dx@6IugB0fUI=sDaVp4L`KVv8<`I6hUax1eahFH7S-X3&As&*?$$>wb-P3>c6&xE;f>msdV_fxAHQLJKo8OVFF;4#%|)#hh?+MBb$7}V zVmy1Lz}tfL0#Ax%h3*vWmb{#8vh-A%{rdhy=JuW#cjb-1L$zPdX<1{OOqL_mPqwIAxs?3}%R7dN3IL7jVLP z@J78C{!4q$C8o0H5yQm`h^KoN6L+dM3SO#EnRT+*VDaHRo3)*oQEEwb*;SXw)2NE` z*DH$-GcJmVwa5!gwaE&}Bc%tGJ0u6z(-HzY8F2x}m@xsjm~lbx9TGyn+a!g9IbxKE zht>TTVBtFOnN2_>YVVXe#OJ&@#Ov}o#KRgX;zrGSfpdG6#0Sdt7xffbt!c}1kZH(d zDORO&cb6sk=oZF@80Ex9n`cBPTc<>3lM=&A9Ad+2=~1Ek9V0>pm|>yUnc-n?X;G2i z>|&z90x?F6{tLL^TJS^786!!IrAwgh6es?!lpyZZ&n7N6tPwa_t0;E3N_SyLnZ@d+ zBJ$SiJg1%IS#Iiu>E1dyDS<}mNs;Es@d?)Pahas(m?BDe^j=y>)IP_+$UbI3q2F})K8>Ap!{Ndd`V6_|`~BFSL#Ux15q5F$p5qxSv>v-h{rgSb{FMqFwYCr<8L zK^$t`A=2KcIj^zSWM$P}yG_Lvboty;wn}=DhgM>~uR%<1sA)uYjAd|UimiWo9+{t3 zLG?^+qH|Mw9oN2k5Mvvkd2)r!?2RF_K$8SaI{gl$!@8{{{Hs#LsAgc$`TPw@L+x zOXxwIYC-+qiTb~15z%&Ft598+>g z{{kTd@idhnZWa;5xf%gtphbWtQK)b#qnRyL>d*9U(f*_E>}ekFd9luj=aBe&^bI$KBoC-QC^Y-QC@t2qYm+ z2qX|R!Gi{O_ZC{TxVytRBlrFGebV$f=Y078b}22_T64@r_I1rM=L+4>;u5sJ*WGXZ zn5WPBX)mw!2a!L$yw|^T^IiYT*?%24AodG+hjsX^gOHQB4`X)Df1~$Xhu&+&UK$xV z%0_yQNs#uVnv90S=A0FW9EA!GcuQr^1}moSi&9V6o2VPJC(|%|ccEGEu1d>*oh>## zJA3RsXT}`eXQrH8XAU~M?!4vXHuKKGeFxZi>;P+z?O?T#`wiFw(u4jV{r7gvAl7d} z{atfw;B?Y)nuRo<6C`!#lxgK>jaUoL*zshZb{9!L6(ALNGD0EpM7(O~@pR3=CnHq)=tQ=VM2DU zB4h(*5G&@^;L(4!pJ0%>i>##d1~>X(St{pu9Y)&kmTU>voq40L`wE3!3l$H#8Y|;> zHC5jGYM!#kl?pZ2D@__sS9-MVua4>1UEQQ>bM3I6&9!^FHdo&1+FbsrV{-|#Z7%+s z(@P22yApdaFoPJ!45At1 z_aKnp>p_&T`-3D=mj~GrP7g|?>>t$2*goizvwk?DVEJ&pqQ#?qisp~5D4IQdrD*oo zHwCl%p!mOWU^yY1FoT%Dc{qSIScH2qsxbp7xK1N!57|iKYe5q8MwSYDqd^aRW5ne1 z#+t?BtuwpJTW?Osw;|m2Z)13E-lgzcy~`Cee^()7`o2Zj`2A8*qYvX^1|PPF>3=*X zuJ`f2xZa1iVtQ|XA@DD@VhvW~JRHR5pdE9dYRmxgt`U-Sn~>;_>?G`m015aZO?m%N zp}GImr91sJW3u~c%WVD2mBr$h51T3WJ{UtZhXJH;>On4^Nl}SsQnc}CKtHbsGp zW{#fU1kC6LV8hS_XC_?e#vCG$SrsB!lp%pd0s9{0po~ohvj=JDW0!&nc1hU6E&&HQ z#Nj$}=UR%!x9`yqsqV{t_Y<>}9hB=5Kj)e|hhX!7U3W!q*AV@!+7M+4oWaYmAV=Wq(Y16|mP83C6|TVAQ|^hKqT@V2BS4RwC>8L4Px{6FGpK6af7z$Y01y z;qa%BaFAP%sL=K`BtUa+d-1Iso6u;>*8i)BJ!zDgL( zHy~R@zovWBDX}r>?!hI6wJSi{xAoTZ^&135dQLCgz)npfSCqnLD-MtjJRVC z;(=L=CuUL}PORYS#{tgK+~AnW2lizGV27EM?P3wI9TWwd704Pfu$~qN>ltLf1X!QM z{OP*nFRMqA->u(DezE>0`Oz9A-&=$DI~x#t`(H%y^B;f}<_}KTe=z62z0vQ^WikH7 z%;0Co3O-(#2ZduEl!AFsA?87~BLB^UmSP^XOcEScN&RxzApO&RtIQAkJ+j~IkIH_v zzbN;~?!MeR`&aU>?Z3*uvrAo;`*Bo+d;m_N8;{@{cDI{-gJa4f$r1VGRU!V}qgm>!`(l~TZi8~4T*u@;xJ)X(cbQgt>pY|U z#`%ECYo{}+FPv_xJ#l)W_R#r@>U|ebzK2a-cice!wj0Raas!!#;On_*;3-W(0Ok*2 z=)t2f6N=GgK#V!=)pFv1a9`e^q0vHLg3`r41{6!Z^Q)D6?XyVnrB|==bI)P5ryk=P zkKNa4K6Kln{lImv&VAPtdUsuK=>6&XOz(HMPuf>KK;yC}s9p2|y0Cbh%8dQ z9#*4uIiywhLeLU}vw?%grvp})p738|e#~#P#S!1VmWO;#Ssn1bW3|ukjm4gTA11r8 z7iU)p=;59b}=s9E2^QF$MeM_PLRib?_(5AmDFlBj~=fHJ8+gI?9v`EP- zNy&=m<8#$d#gywDkE}O160ykmU|5gYZ0NAX-r#ZTT|pacW`cIuZ4W$bzcuix{pO%& zcGDqWEvK>ZaVi1~rXoRiDhjk00=}plj{1q{yR&f3y#TMNM2`AUrbc^KZoqWE)Q0^= zp{Kxw+)&9=8S#ooQ!_OVBo*oIjjuA=729OGBf8UaYh<7Grif9ysqjgM4Pl!c*M;qK zS`&KKc~$5`=T#A(Y$u|@Vj>1iCSt*0A`bKx0s&ZqDAb?x-nr=gON7byN=fQfjUx4+ zR*QbK#)9=ixf}lp%!3Z)#VGF0O4XQ2&(+(SQf9m(qN&w1}cyp{@n^07{Y zJZY3Bw_8-GON}NhCu^Mf4p#U{?8g3}?fFR>n{u-B)@K$OuSu^opGa-AUXk2kH=4NA zaX4YrWgvcyTVLE(kKVXLp51Z3dv+(hcIip_Y1fkimc40U+M5nW3xN>y-3jRTveA2& z;C0mSkT*?y(!QY$-6D(h5Mqk?+)!5VBL`g<{jDiQ)dAX zhWDO?elHIx=OAzEImweYE^@1jn_O5dOC9gjXP#}dV%BCl$$-XPA%eg(X->o%$g-28RMz4mnJwA15XMO6@pLjN8eRXQg0h^{= zuxQQ$(}h3;&Vv+IoQtgFeHAOY-ONI6bg+?Y-5lg}k2E>7M3-q-rw#Y!R(H{Lje&CG zwNYxLRY|&o<(WpkB?ab-iz=);3hM1!^V^-9a+kW*=PdWE$zJDOnYGiWBI}fIdG;f( z%DgYm)dgTzQwUbIcwqiQAPUDnotZ!}GkH);CpX%d$>qhY|3hM*w$^u zwV~5hWL2xb?DD1v)xo+1o!;tnqpr$4vqj~lR?Vfg_6^0YPBlf{Zj}Wio@M!Kyo>X9 z_!j0LNB;6DF8btBS_1avrC?oA2IdQa7@P;04DzvzMs7FI$dz^`^gOusKfp>34vCSS zgPQbBeHNT+x?P0FI(=n^+QL+No8z>*@EvDubvb5DHN{qSRn>M?70pg%<%`{lONTrQ zN>+R4mTdLSE;;I(RdUxSx9p>9eg!xbR)Td=6<90;;@An~(8v??d^egYa-j>)9_V8x zM~0cn-etmM+cI_f`XMup@jfTPk#29PrClM)UF|ViZLP@$jm=r6wT*>V74;Q%CAE!C zg*Bb7xm5!mS(Ovs>6M#(QY#Pnrc~bYO|O3MmQ@Q5*>zx(QwNp{!4_c5VFQ*1<7bzV2o2;){j?lW8G<`Lbt(JQKL);qH8x>xj~H?FbmzZ~M* z!6u;tEEfXlggh!IAX`J)>@6-5cCPy7zhncVF=c z>3QuE-t*HwvKOqQdck5Lkd666HI4y#-=lrle}H`uTQCDyhu&x9GzS^kBt#ZBX$ZNtAAK}uL3ilRbaA^2Uv#-ZG;@gF_^&&Vk6dI95aAH z^nYELgS5`_lZII(s&bzJbMamquADt?g6X^b#FKW0$;8gYDn{-|Q48IkqZzoZRM&4? zy@A)(PGk42!=|oV)|xwS-EHo)^^&>smX~HOn|~O)Yz9NuO<=H)KX5Ldz%kgn4EI1_ z4OXp3|B3!@G4=s8Vh&PqoQsqmmnH?rwdq;M%vn>8I&sGz@fL_W5-b{aI9e*`P_mrg zp=?F(gC(jS2kSIk4|Hfb9T?VeIIvdF?!X>ByMveY?DoIXwVVB^W48~q5v_%s$2{%; z_Ty|>fjuC&7h?o_L6%?!&~yOzAz%)YgL{KAE(wv8OUhKjB|}E^MO)Uei|(937Xo3of>{rL`g+w((;*5}tMTb|#eYs0Os_|&;#e3XOP6}Y$WsHmcV37a z-To?Wcnc&9|NJ+*u?FiggIJCkM9&V?-~V6zEbPUIzfLEi_gG2LGXdiFT$Xq}*Pz^< z8_}FzSTpQjxG>qg@L{of8Omnuz42sj}wxK`mw)b25=ku5Z<#B_s;^v`HK{B z_@YADe9@&@d^M$+ezm0=eRE+LeDh(_{T{-k{XK?R<3}2c+K)n3l^^wNN3;xhkZB)TQG#0a|mb5A-ph$2*w;D7ITP9%pof19MDc@hhbzboeg%-S>f=% zScK0&#Szp$kNSTQ;&Y!6`^SVBzaT^p2+>4TfQ2Yv4k8QuL=r@aC`b_@ycT{?CcL0Y zaf2Sk1tt_bSX0=%K{0_3ML`%vAQ>q{>S@@Vi7Z2KL)-#DTZaY)=>JVHhcMN}Ph&b5 z+Au)RgBf%}SwJ(H71RsaK&_4)R6999bpRPdCOJW63fYeAMUEopIYIR{{@zpMJ@Op^ zc2Jwg+^-j+i`vGRJ(wXDn8%nK(!tb{0mg33U=Yj#dI@ZxlgAEP)f}L;h!ZrIBFnfy za}}}y*@Eom0*yoXc?P+OJVxFk-w@#Zr3IWn|3(uyel7s?H7GDc|38-j%=N}tV_%Gw zIRh-6nZYc86-;8;z$l9Y49Yn{znKg4mf+A2AuD-6Z!Hh#P9r;zSzgdRfm}r%@PgiJ z-d}oOdA{ob&sXG&9&msDKYYT^`GAos1(r$_*kT6ZfH}xqUy7p%=0Oga2l=pqRU|uD zq;Y~-F&CKD^WZmg%!K;*z<3l{g}KlMWUBzKrXxp?3j#lk?g@M~ekt(D__M%!%zxfu z{_|!I6X1LOUwHBJ{{m)n`f?OF$x`5gIe;7Xrnnnoe~=9mID4>yV<P^3`HSMab`uh>ZP!b@vfV2A(q^yJbDQJRPi?MAKeTxw zbKmxZ^c~w@Qn&0t;tzWezhMuezuSYzLckXD2UqmD-k3kk^#BB8{t$}w2*G*;*|UJZ z7sprMaPE&@Dg19e^MzlzSBXA%ZI*cIvRLY|^MK4lr{%JLIZn#obDUPV>#$Stw!>ki zKOHVA|K{*e`HJIv#fwhA{%C(KJ5aMr(8i|A>fF*p4fZfkN!UtuOkYtDF)|f zv_Ac3loj)*a5wfhAwfLPg5m`p`DcmV_brjU<6S57r{^O18y-E1*W8DduehyLz2vq| z?Sku8^>eQKG|sr5(md&UNAsBLYmLM1KU5E5FU|o^kl*hGva?*kh{u{FDN?_Zv}hj^O_^TCIj}v7@#DT586|i#G*$d+aK7}#zzX@Z{*B6~eLGZ7 z_$*aF<~^c$#Ct;fkk>|?1D-Rwvz~|b_Ih5?-{tvKZ^rwJ)^=Y|-{uD@+x$Th-z+4* z5O7EDAAtIEo-ZD~f2s)jE*a`mhAQ%s9Dq>1isHDR~;>Nu@etiM0mH;#-Y};+B~9#|&94jUKn^ zj-Ikv9JSZBGwPgONAwe$&bY6po$+AMg@>afnq5i%1cK)3fWAA8nS97+CihF3$sd($ z_K{ zwm7lRvLj*4x-EXaZA;uv`=+>4_Koom?3xlkTQnzwQA-NwwWNY}OB!g+2STt0aSZY+ zlTIEKQRI&bCUT_~`~Dm7y^BraWPhU;eWuQmZBvyi|JpKtvGL*v+0lXo<)PdRjlS%B zo$jnMgRYD^}r#@J5(nhbF56h=TM#c(Xu8TOlmX0pe_@1 z76M_Iza&uPLk=N#O0nO&hC$9ZFq6|QtmJU3DB0bjLEF+~&bq$NnQx-nS9G*IOlGho zPN}yjReflcCP&`o z_1>a`H6hZyl`)E4Wy$J`O0u+@iwgDY3oDIl@*B-6^E$1{atCaSbH?opvNt*AWgm3P z&A#E3pZnIPARo+%3c#qi5cC!TQFC*Fko)NQu2x~sIqv^Ix`;szbmG~Eiv`H$#mdy$ zP9v5Ti|o0Fn>~el8-k>|>Y^0esuR_kDl@d|%JcQBO3RGOOX|&vi`y*=iu!DG3s=}@ z6-+s16wErM7hH47EPQR7UHr>Drv!}iNmaDJbU1)J@fMi>9gnVN|ylQP-npR~^u3l+%iBVx?t!Z9G zn`L%+uT4hTa{JWM4UWmBdz}(XFF7Tby|PWM_-USA0md1XpuZ4^Bjg#*g&XKQ&o<)T z1N7f}yD75058r_>z)e;U$WhDt^_T{FtvQ!0aTQ$DI;ssH7{)AYk!z0)`D?T9T+6nf&P3T zk&uTt7p|cHKhc8Tdog-n^uN>C2QWFpNmeYACPO3IjGiG2_V#{f{-&iqVl~|%GUbb- zm5REO)blzrwX+r#=%=++7$>(jn#DISwu)&Qwux+7YZu|=}tbKUXGn>ff@8;3X zU>wr|`V0A)iunWPFz3+w9`3+*K%n>AjQVRahZw{CK|?FWN$&~`ddHY4YtyI$Z_Ti$ zNZDYZRN+9Rd~RQYN=9$GW=c< zOYipB zZRK`j*OeztTvt3ca$E6L-+c^pJ;p#A(VEY#GR&cx3E9_!{(l(f0A>KoP`?{@IloN>O5W2|svc9V z>aJ6LT250F+KyA(bR4Em>ez35tmClZtCr&i&~RK2Y74o9V{i!bxE)x7bz|tiF^B0z z{nk1CUDyMFXQX5w5F+UZl&Rzc2K2c7HY`#5T{*&L{dj_A!vy^H#fo_EOBMInng@v4G#d(O&!N$2cfEhqP zW&n#&zh*b~Kpdb*DxQ%Vdzza>pOzvKr!}dN)28&m(+*6&r#)G{P6ctepNiskIhD-k zcq&KG?o^qu&B-QF%agt0<|oG`%ua2SG(B}n()8pLNs|*_B~6Zjr18;zvmf)o%_Eor zVGcBiIZy}c*P?#mK|)eb;8~RCm`DJgk>c|kKk@oqo_PGOOWc09q&ffYOn1ED!?3#% z%4~BZj@9x;2D|xLw@4$P@331QYY4rj3|r8)|A;} z7n;dqU%KIwFoynt|DYwuW8x*&cS4=hxX)p1xsMdHj=2 z`4O-$WbND>T!R@H=1}!>_kS4mPvbt2OX$IWBgE+zA=a)6#J7V?;@@sF ziGO{^B>wp)lh`Nt2g~Q4flas%1@((iKjjP|;a3Rpx``P8<{+jI2{HQ4f;j*W(FS46 zAY_OddVbsof_Z=;<^dL%LpWd#;ek0s0Ok-;m_ww~_@NZDh*r!ZhLH_)ZrG1pLtY|p z{y{J1V2wEci|6#uqW(2PJpaTVjC-j4h!Cw0gs20JC<6y-@}Z^(k;42z3>1hks1ZTX z!5qQ}a|kQK4Nep%cvI{UO0hr^<`9Jx9hxY7ITz*;lbA#7K#n7)|3NdJiBd9G|2cge zdk@T@tRE1f{}i*2mjrbQkwGM|FGU16QInVOVNVDTW+7a7EgT?ESV5IAfi|JxwGg}( z0xpO@5{+ab)u`Epj3Da}Jd|f1psq!MHu`_vIWT|FHll%=6&;k^7@!cs0J%gakjZBT zsTyPvvJ_dy0+JKRdSnZ-iy0&jAg7S)`1=o$*T`2U+=cT0kLH{<`hSBtuops4j|N)i zbkJ~NfNCHUD915@Vh%INSF(V73)0O3azn^UR*+kROd~sxeaLbA_m`0S_}{%mJ|n>V z6ZtWZ@A%L20qr?$^#8_)i3$Zqn9CTN(m~&m0Xlw6pc%~!>KQDcR>A_R4XmKjiS)C9 z$|$l5*???Cb|Z(8bI5JvIol7FPpn^6f3bd61=df<$Nv{Ve@+|yzd2?Q777UF4;F@) z4cTHIm^;|uTKilMV6Ti|J!~!MVC~8T7D1Q^ z#bYLv%?`$89AMPQ`O~nI>xW?<*Ehpu++PhQxIY`L=lNu?mFJ_u9-j9G$9UcuT;_RY z@Q~-3;X9tkhCg{88UoJ)L*Tw|^dGqH8NojQvpIdt9~{tsJL9Jtj)fc6!xih{Qs9}{w7^rd8NtV9hXfy) zofo=q_LtCYv$sNjnEw#`-5dn2SpfeP*C;BdG}Zpyv-p-x-P55z9}0#*30S33B97ye4%!)|`Gd+Lh&8L?G9R z&=`Ru!D%7~0`n#I`Ik%Y_N$kh@m-{_-KSe=tM`z~X0LJ8X|E0H8$D+<)_We+Tq*^aXh?MWkpzv z`e^79&5@8n?V;dt-Tt7B`b&d$8}tU9Hs}s|XwV(vfoM}4GnR89HAOCnp zxajhx0=*PA2z9t{?oKJ z=AA)Z9B9?YgIaw8DAy-~(tIFbt`6w^QwaG3edonOJoB#<`~J&V$o_I6GE=TfZ7MZk zT3hVMF<#)!zbrRIbTBJMYH3D_+>-QcrOwnM)kP^)8ZAjp+Kq`_dUXi{hBfiy#+C7# zOv~fAYZCIZ4OQ#|kG%8a-6;YlK1krIW67p9%u6gI9?=7Xsu?jjlSj9wk zSM!rC)k@U*DkH{3xgGmxi6`GcVUS2qew1WaZldg>>=D5Gw`Qh2B^#jLJ0YkfEf&a)wooI zo)3NBp&A<5i#;LR@qCVr^$OJLI(^3SY8$q}3U}U~vH;=E;s}Yh!g$%H{4~Y7yj+#) zoDz+S>>BOTtX93E%pSvnjO8Y|>Fdq1(|4O^rJpy?%6M*)ll4O{HygC_vOzsB2UO<+ zVT3$S!aC%l?=8i3cl6);>M1hQNRjC#ZnCCXmaJ&fr4QFzvG&%w@^n@C3AI&(i8q$T z%G8#mC{z|@tCSTMsTUViY2!hpdbxQ^46|~VnWX2eHA~IeX`YgE#ylnWsYzPiH~ox! z(8??TbwqVO5Q%dEz4x^O^n5r52ho4;LhrW)z0W%A4;o)2MV7T`)B0L0SQa-qaktg` z2sYM+h}Bd_OIK7T$(L4SDi@X&sOObdXl0i)>Sh#o8Kf2s8z&V`nkE$PFpDcZWgb`f z*fgQ|tA0{3XeXC|MoI~&%?Dy|F5vTU5&i$M8XSWr^xlimcXgom?P4dRUE*Y*OM|wg z!<2c^A_vaK7Ek_~rXbPs#z?8+`UJUx+H|Fynmo14>N3sLs(PKI$`1Ybib128@>M2L z<=adn%TJg^mOnI!uK1!KQvuqsm7oz<32O8C7>_v+J`ZQhaSR%;_rDE$-@DNFp#K`} zVI~8;BBZ-lm1^%XW@=hu$5GSe&WGEego_r1OXRi2$z-*pDx^2(s3bR*Xe2b$YRA?u z(u=C=Hw>?vFb=KVY7$&~4Ef7AwC=NBSRH6Z)Pp*rHlG*Cg#3_ z+An(0{y~Z?84@6ih7_r$L48I|zcpLgQdjQ6USGkS?hr9NzFI1^D@iV~GfOG9qewNX zy;>uDQLA=H+fu#2wv`6{t(y(~T8|j{w%#}NZ~dej&CXP=5k*h~Z((AC}|#?-(y>7?UN{V>-0bQ45xWWlo&gBVK%ILqWnxgOTEK0}0a6 z{poTMefdfuODk0adz&@wr zh>bp6p&Ntw0yjhp`mIkE@m`-T?y;^^%5`0Xtkc@X@(yc9746neDcP<)sARMDj*`vf z2L;>JAdkq|t^(QloW^;uw;ksp)_)b={}6icF7%-F8_|EE4^N-LeE_?-N!%_e61_{4 z3g2Z)3)yAQ6tK&K)putAhu6*sZugl4UYD5+0mmIh!uC7rL~V9-iCgVhCTX!_qonzc zgHmQY?ns(#`ygqy6(r5JfP~rRe{u-_F4*3SHNg5W!~5@EgFO&a*aNT)&!R*h8b3>s z;KM8=;D{jcKcYZ5&dT z<0B*dMn^UX7#ul>+!Zi5^ie?nAPDLo071R||76z^tijM+4v2FA^;^&bS8T`kk?zI& zKZyGWz)2h~iV*vYO2qb}KC!uEMOj^Pp)4->(#$T0(Ty)BFbpqeGwEM0 zXVJaf%Bp>NkWK5#IyTKK``IZB^T4Tg4!GtsJ&1ikIRE-_{M%5! z630Gg7WI!15^@qf_&Lk~F4KwG4K`wQhnE=Mks$haRfyg_eWH8MifG?=AzJr+DUJK# zl=@#uG}XWIXvz<2=t>V3(-j`BWXM0<&X9X}jv@Qt1w;1!cZSS8V3N7}PgddcH;D7E z9rdeFKX*UsAIGyeF^BNDNQmt<>_fmTKnLHMtM!tXXuJ|7YOmyp%4-dx^xBXpys;#5 zZ=8tCTW=!uE|ijZmq>}d&!a@&*HI!LdT2r)R?`GO?x6{MyiODN@S4W|?gx$k4g7;) zeEvGI{xztdcL4QIp#FJ6+^!H}b(0W-JD5NGMTqhTCZhPAi^$>r9O<7DMDnLR5&NY^ zM1JXEFN7Jv4FQ-5xML0xggHbU!54{Q7Ey~?L@&jL*#j%=!A#;R@|6Mv{?I z1b+|$KK!s2!H0expoIB@su~4qh^i(93i=dCThKtGTDLU|NNA@B|k@LtMolM9LT-(ui~+{R|KuMOGo}kuAtB1)v zKZ~3|t}=c}Jz{*9ddK)I{hj_%8WC98`;k#(0$I=eQ*jIP55?Wg-xZHAe^tD|{7LaI z=68y3m|iJ;V|u9s%+C=#xt96q{{rZvwlR8bQ!xZ{0COA@Gt9|M%`s+k#!Sec0lF~^ z(9U22tzzb%nzbz7G}~CdYIL)F))->_s4>R+L1U8jy~Y&lTa6v8Z!`|DzS20u`ds4< z>toGVtPeE5vEJ7Nw!6q}O<XGoBA)ft{Hscx{OU;7Y%^(JaWzuILg0(ew!adDAQWr%j*mpD_E(f7A^44x0n-A#>n4Xb#*5764YLYmeUB8NKgZ4&ja0 zC1#r)-7< zPgt)II%d6A_=xoukwez|L=IY?5uLTZFS^J2oyblb5Z+-6g4=CDV4EHAA-wYeThw*N z?7;*5zaL)5Tt7yjD0v?!PoDZ~QTKc;sGB}+^vhmBEN4ApIZnEzb02dp;5+P6A#lLC zL1@;gU1YCQujp>aVX>VKHV1@OIBC(JdZL;+xz%C8pf^q&B#XO0UB+o!7W-mRs#QE5FM1g8WL?C-N)Y zzDSRGfW&eS5F7Ob5kz=C;DX-Y3%!3JuJ?vB$nz)`@;D0L`4@@#17<>}qjaewkv5Fk za8K5qp&^{xf@AqM1*QpZ49F8%=T|De#F*^{bDZA&qwPbE3AtWEIcoQMnK8;gk(8i`639gNJC=nF5A?hUJv zTN2u$&=s;+sUu`ar7d_uttEJqdQ;E=jmF?>nhhbZ)fz&7DK>)?;L^Oymgz#1Qz+dLT_M*N)u7lM*`eGR(WhD$zCyhwe4}Pn_+G8b@C(`%5ic|u|NA%wJ5(v4B`lqt-yM8W3X7L<=8SKxQ&XXr=bM(EI1(nsXs~E^PnYT*yt<70HnCLLJ(&d@H7b zTo?B4Y(JjP%rL>W^jMLmv=oW@)NJXR$MNaG;`QF^ExxoUBIng4uS&8CRnHf^$83nQ>=@klvX${Kxshw&$DMOl> z$*Z-~leg)lB_G#KO@5%0p88oWBMp=?(?C8m9c1S-Hy0k@T)dn~$ngRk1JvG8fxfd6 z_kCBgld&psGE}Kf^;VcNI?Ei`T1z~+8;Sz?YYHNSEAr#TN^{esigI#g^Rr78a5bhUSTBk~N)f&-TtL<1DD&4uN%l-MvOT&bUOJc-&xbzE=L*`>C&p(ESSn#oY)JSy}5H6 zgZMKWB8AiH6U3A2(xu{S^JQadDik8Co0P+^PhEk&x9Xi}w%O&JAU z_N>{R9-QeN0lX=T!UYrB;zVLwQzfEWa;3wY%jANa8WaPY7AyNTE>rbsT(9ocxDWYF z-K*iPicbS5`Zj<(A~T=cScg;iEbPM?Y{mOu+ll!@H+s)r^uGO=KMZh?vH>wtG@wf5 z4H(fg`)!z0`&`)*m-=$W^@i|8^+XGWcPER6EXkG#TwEgU*HtI$-PNh!(K)Q-*11mE zwR5kEOXpP;myS1zt{oun)(*1n?I1m$YxsB45qu7IV*NLw{y63!1E}4BKDcHXu0Jnl zBDrID4&|5vNgLCpl9pRA;+8wHM2&i}hc64{3K@yy3mi@m^c&6)@g6D^^BAm=bRBG$ zb{ZIxbr@JHXFsq<-frNEyj}lmS^K_U()N8I<**ba=W}*$4C-(m;P_9X{s`tU-DpLc z#?b$xHA+V>p0tLWB&?Mrv1>J`sI|tlur;=f;5BZ{0h4}gzLQ~`UaMod-B+jaxva_; zbX-*>Y(LQ^YBMn)ZZ)w+!g6A_q{YN#35)U95*90diCc_;nB^FV&gUpTi!(R})}#Iy z>i6Njw_y%aHHrRnJ^HU{T!Y5<_JnK^BtcshNWfNI%6F>;&1DAoY)y%z0Q;S{ccZ^a&n8Qh1khaztKxQOdMG2%R{LY!s|h{LQEWjE_W z+06RVtY$;$=Cg53rn4E$#=q`m??4y0fb|bY^#QYVW_up}p@Fr`DcdT$;Oq zOLG@+&u1IX{YlgxLH)(3-+=dCycuhNJ~Vm{_F!NZ;BuHERwtN==_y`fateDOPOA{3 z(+0%wj1@6B<3jY#_))rNA}H;%Ni@x~c{KI2HFUMJOBgC=S2C2(ZD%N*JI_!$^NOK( z>IbvJNnlYp@lV#_{9BItJ*eNbfsj(X_l#ZmJ`mLQKaBdvF#|Y_=RjOw5Dk2Hu<{Ka zqI5%yDBV;diZ^wM!XFlh6OsSZm&o0UAhNeoiS+GaB6YiklDsoSiQm~siQPF$iQaxp ziT?S868!^cA~*lZ3atN9)NeukxeO{3?>*)a>Z7(ZYFnJgeHd2=QM*Bi+-)Wz{e+uH zJrN=jPo;_2QxzimOqU2hGb2LJ9f;ryZ^Hi~obbI&A-u0jkVS<1^>V`fW*gypbB=Jn zenB{2{ve#s;U5g3ejDn~Wl&lBQ2!|EqqY-jo1wPO?}RAbB1GyQA)+s6MEElY5&XPV+`l>}Zz8Mj=?>2=X zgJ~bM;^}WRv*<51O6botY8g+|TN#hkmoOfx4>0~xU(R$-eKpfv^(m&?>N8A#svl;$ zseYOHn#N=1iy9wU&S?V68RQgl5;>s>%<};w)Hg%#ZH~U*8gl?!ye1nQ3oCW<)xv~& zXX;43F!rN8Hj1SGWtc+0Ymm>lWl+I%Q@?@vx_&$JRsCL;%X-7C7xh-Mp4VH)c2;jI z+Znxm?5Ffjvme*L$8lKyEyn?aUmUXrz>aTKX5V7~YzqN1)U`tIZHw#Pj(8o;I37-7 zDKoa_=*p;n`z)glD(uZ@fE9U+``<`_8i&vzJZgz%^|SocMNQj`@Hk>e^!V z;Dp}S4X?w4i+pwGBQM>=$zRUOa{8^@aBiYw^Cvr`CW%8`@EaY41Q6VtqUN1E2zDQ)mtygr&ZJGFh>!d`V>sHC7 zu7{+0Tz{8Z;`Ul%vHLI4#U3Es_j zP$9A>T!m~4H=(9O9U1FFd{|Zmg|e>5_*SJ)>*R;$cuYIzuUKeFsyq?Q6`}~k-_61Qy7!hpt`yb}o48PzQJP1Lr6NT%& z=y{IC;yVYUnaG|fKC&%FfozO1peCbj=_?~WSVqGG*@wa+x%)#C`Fevh1eOHl3wH&U zi?#>Wi?; zgIE4I26+D$;?Vb^=Q)sso-2Vyw#D<1jR~@3GC`MG5pP8wiF0M{kMU#gi4Nmh92v*g z5s@m;7M>&A99Asa7+Nh}AJQUO6S736DtJV$B6y8_S?~^plHe1H#la60i$gxkl!Ssr zNf?MBB8c$cfG5@=82jC0aPI|bA4*02G+gV$qHGMOq#R-|fC!zt$Uz9c8+B?;c_ z9q}PtZE?}O%`r&=4bhpxwNVA4)sYqA6_E{6Wf7e+#o>c;h2g6d^24_(=7k?s$_c-x zlpFC$HZKw+^P)f;5k*A)=I7j8MDKkCziymB-C6XWGg2oh@8qH1K*n=e z$w-a}>CaUmOLB~8?b)_W&6#ej_38ec)oEco6{)fOCCSNxMakJB`ANm%xrx%v-<Laktmp(o+*-* zULc;4Rw<}!uIjP*+48GDt(GOs9yXTDa5$OM_lERc-K z0&zs_Z=Qr>4wQ^_z%kfWjPn5f?_@QO0n%NGz4w(oq^U}p)KzIwm6c}nl5z*;f-(=b z+|mHf%#v{4wBk5{QJcrN00j$CHa?HVMa30j7?`yzy*E;OIuHzy#_2Q(WUX3cQ zGp6O&*)nCWZUM)Sp0Bnw8DXNg3V7mJ0J)k+4Jw#x*R4$AqJOv?L~ z>{Re6Igh-M^DX%)?N1pcu4458qk2605pvl>MI=2{BY0monl$9*H}kGJ6%v||R-iEH0T z5uO*5vzVV`Es-N>OSGw^#b&hlE_+6Fmj_Ekr$2jWM;K>tdmK+ddn&*0qC7#bwh9sV zwiYqh);fZjIxs7%&y^00IMiB*6dhfk=F1`0AO{FOah$5&U z0*Yb-8+K#wC2EX``KPKeMvaLvMiUcL=6f#c8{d5Y^{sDZ{W4dSdCuAA%*@%(?3ufN zyxY<%$^Aoq37Pj>3B48+8#^(;Tp~bGCyz>@!#5t`h(?&K?gjbF64fzk^6NM zW#Ad9)Shva+%touJ#(1YZbx2Jx0fJdO^_&bO{9F#YN?X{stk$mszMd7t}1o+t|gkT zon6|_of~x=JNM~2bY9kVSoxifL&qy^#}%9w(0rHkh`}L@!4C9)a0TxFL%R)maP3;$ z|LueSupaq8o==L|pg~cCh7>VqPN9P~Oz@x^8?eET@4F#P;JqPMmNLN)87nkln!4(n?rXPABn0DZI(bRpMXzE^0 z?p=0c-fe__H&Vei^nKA5ct9iYp!N{O>_`6(p$6k9p2Il7kog&TGCQkAre`OR$=MlX ze9n?)oO7V*=e=m!`4F0VKAxsr$fC&?<}-#DnwUu!dYFk9Mwkf~PcsuP+-Jt0`;FB< z!}0V_kHN=wjLpy=tA!dQsz=;Qci}kBJ869!Oqy7PXxz#r^-mU%+W)K|)lY{=<+G#UHmTftPAVU9 zq;ivER6hKNKJin;A!ZehX-&8&w*SeGP;2ph+C*b_*_6L_Y_Ix z8r*8`JD{H+-&eLIg7AGngjgCLTBm_Tw5^GW!ijsy=@6aT>ou@BA>^8ktb!`}gi zmlXX&7xb5Ge_e-^jlRdB-+>n}{#WoE%5``EH{rj)577MrH4t}*#NQApK9eK)=MoY< zS110@dc^*0Nc6KYumVoNAH)M3_&?*I{}~hg`A&cxzxWpX3jX*H&CsvHoXdF+$KV<0 zUq%fKv@Jg(n*J%#gs;%|uZfiJ6Dd3*5Z{WsFB~Bl?wjR)&23y69 zIEA^y3EYTd;6IQQpd*a)_P-DvYcZ~|NgUqJOS z{Yk%$wc;hq(Mujjzw+N8!1fKCA^ZnJ_zwnP0zT`=;$e=^+9M`DXip^kpcIBvEC5xY z0knZmctmT#dcd7*NMo~xXpXDT=7iORS1NcCrWp!zp+UyWnF0r%eUHMVczW+2x$hVL*# z4fYTDQ_nopXf)9=8| zg1!a(2TR<0pNn?PMVnBc@|VRFdNIp_o|rk&LsNgcXA;ePWs=H#W}MI5GM>-gG_GZD z7&o!kjhFMT8n5PEG4A7CHr~X)XuN}e-uNK@tnpdF8RJ`m6DHpa-ZS}Kco^QxK~qkE zZ#5R|{}-GU>M!OZ_qO2?XODKkkNC?@ot{~br$=*U&|Rx}^qHk6eP|WRT(eAIE?Z`@ z7c7f-=PWAur!DIECoP%embnTwE~If?0ky&8RCJCSbK&892%&UDT` zfH`F!%^tH$<{h!i5gfEF5$?CG7VWjEm)m2rRDQQjhr&+l9)%I>LB;LX!%Ew%4~n-~ zpBE2V-r^TNrQDHJ-oAHC;I3k|)~kTqeK8xkh2gsZnv0Q>%E;u}iYvu}`_* zakI)g$5GW@$CIi(jvuS7aeAV<%IQz#F8D8<&YV)G3y$n}7%Q)HL*9$i`|UvFok3XZ z2lD6&YBSFGXwXsLsdT_^F75VnV@G@gd0Ty=1Vi3QqCu}r`2o*Dg+9;uO1&NnB|RQX zl-IZ~S6SuWt=8!_sNUhWLt};8dz$TT*EHMQA8EFFyi#j{_W~qKJvs5aaL%}_q5t1N z`{OX=d=a?!AC7zP!T8RDAXVBMY)CsoENE+p6T30kmp2d;CRi63FV_>0s<7HWN2$xN zSiI7=N_n|&o$4~57PVHNPK~ABeVR?)+q9N=AJA^}zNl00eP5^E_oZf?FQ-=L$Eg5i zAbAUi82sReoD=%jad|n1OWkpBI11l65H6wN$O*JLawe^hv}4vqc=Ec#g9Tk-(Q+N3 zi3;r@8A@%z`I4nUWhzZU3)Jv!AnFZ)%Qfo))@m;b7}8k~uvfP_;H+L%z#YBHz!%z; zL7WCqs|@B;fbv`Z!lmsYV(>{6avfaqPbOlW4My>K81G9Niq)fmIAdBHXU(jNb>nr! z_zRXrN658A#Va&LrYbc? zKdDG9H?c}1JF!taL%LiyP1-vyMY>HtS$cSUqVziWVO&z;UpmQ&oF-6DN#xYt@)$nM z#~Ax_h`|x)?kPYFijZ$$y9&#n_8b*z&YeJwxu&!z*P5-)apld=_7#?9g~}CW#wg@x zB#Cp=GnKQ_3RE-FDm2nk>$Q?om+2&?^ynp|Y|)QPIXFHxyq;`1w=kUUN^G>_8&>TmfH<8Tr4K<0(*hW_Rnv&Y3({Zo9uj5hmo0ex4r{P)2sd-g$s&BalPw+VA;Aj=*B9`BM zi{U>s!G8c1AQxmbiz&5Pmy(uFBk590CT^)c8{O=|i);?yhc$%@gPY>y0+*yJ_$|&; z@>x7z*|V`;)xB|rx@%*Do}aL1;pSG#$X5f zzi|m-(1IAWVIH(22FsEAu0ZawLY`tfG$+(`X=k{8$V|%-@eS4pZZTk*YoA%SH*6m-bShxMEV%^Fq+q8@!!FG(3 zn1_28A_nMhUn}MTbemRU9J+A-8!1U*H@*u3@2m}4t4x8tdgR|bjeL78$h+5`@$7YH z+uKbZlUSWcA?48aiQ_hZK3hT zUqmwoInj&_oZO7{{~+_BHbcJ`eeYO<{JR%580(RLLRY#49?W*sAdKK$lsl0F?^Ytq zJ(^^($AD(yQsLf@PFFb2@h--;Rl=tk_ua~Pv|7sGz!z=x27 zzsDmZ)T9_5*Cd1ElYlWzLS4$l6Am=tgcprJ5lZ?erKEQ~T$ zfaE??BH?FhB=~$B@jo{r-WM~8`N9UpcOF;}g<@ft0&t?ey%ZN_OsqTD@tx}^5d0C_ zSAhEuD=_x;7s3~FG`qXy+NJb@iFQzK0^(}SMVRcCM-Hc zK#ZGEYPbQSiv_VE7DFajn9jump>9~5p&dV<9ZzuJ|F{ec5 zV_%NgXX3A8PQrsjpY73Sb2QoLCZ0q26duHv==)uG5Z^%iA;2!icp|{Q6|pZBqGy`$ zSth`vn1)5VCDHHBNESlikEFsEsemui4mQCPIS9}>?#_Rx*aHs=v5!5C=b)f%{{fzZ zL7xpjN9=E-2IL<4{s5Zafgiw609_d4CAPnTSBQzslle;(`$bH+$-o>qf*^o+aA*gI zc5nzhhoEzqXm8-O;5ld_zf=PfI3i}2>t`@ z0tdixa1ne09@5|R9Q-}zf&7V|{PDlQ=}kh@ClIFvRPb403UT~d_?HgA4@84BPz0($ zBWMGipa%?qAutR^p>+g0XXzE)K#0Dfm-GxF`x7=2H^y^p{|z@DxxPN0|Ih^*@FFCL zhmt8g7#sLOp72+~@LP!>7nB28ESkV_unIj~2ZcdsZiVJ9`USCgPA8%KK8EcM{fPg5 zpx-flWUdT=qx9Xs;3gs0pNQu_^wr^cpbkR|wIJ%#5fdwv{a`$lFdp);sHw>SMfA6* zivASUV+yp;Z=w$RRk((p3;XGra1%Wh4$~9iUV1D%N{@t>=z;J{x-a~J?#aEPyYOQ0 z=34qn2K<;W-+`NqTz@ir2LlQG7i=fMi_ym%(SaYNVTXC*Lw~A7&~M5~^g=m@o++2n zQ^^8)B3Vq|OWNo=Nhdv&tfdE%4Rl|!mF`J)(_P77x-B_RUrIiwPgH)Oo2swq13ch$ z4c^OD@IGo|uE@Y=8Qe7Z57UtUPr?2Fsqi1BARd!3K9i=?ANq6X7d=;csuM(ywPWdl zRyuv7RX}&OD(JRWEq$rgOrL2jr%$w2)5ltUbW?j1-OwJP>pBPMs?KS8U*{%W)O*C7 z9rqh^Y8>1$eU3S<&#}j3z>9ecX99f__zx!VAI#7WQ^aG2CcT_GiGG}HMh^{a>FbH! z^u@$*x-}`0KAe+bxYn;NIGR|d=8<#RijjP!s#trOY;}-UyaRrce1>O#c=PnR5J1W*mQ`419hIH>Qo#nl(;CHn`^tZqE_XZTM3k znvbK)vrXx&r5zo&^rjglN6RhoU~q$D&@#KSVuNoT%H16RvrU0NXLfUUdZh$2g_kaYg+B z_`nI@yJ#&(=dCsAxb0LrWH*=g+Pl#%`#@&cE{ff1m&Ds_o5|Z`TgV@@oiA8#vryP? zvqZGcrd_nwdW~F<^#=Jh)*}k5tPd-6*<4XvY4fe(3fq?o?eJfg*>Q3}^e!CqpF#f{ zT<$*hL9XG8`UfxMJT82C&k6MpE(WyIbvA8tbEF|RA7+DV7~Ag>$6M!;!tZg;5v*}8 z7Orxt6m>e)%5^v`RaoJ;QnB4}ol=|Q7IBN?KJik=bCM>fyOPCDFT{<`oFY(YaOUJ2 zTsXP6aQ3e+b6-OHDo*WZgOPUzVx8*)|G^XfgSS2n`IyoMA6r`Ir zgA-SKa*99!$iL+m#NYw)f0+k#F&yiBL0(hygD3+rWB!UW1>l zh*hE1xCyi*&Xnrm2Q7?qVXI?(c@;4s{Ick1VR3Y#s4yx+K0hj7F*kC)I6JadIWwYF zH9ewRJvCxTBROKfW>Ul@t;C3jnn{tb)RH1O<>V+%0>nT`#zTz5hlu-W^!*@~JUcTG z1F#N$LB9+kM#Bo%YID;0z52{RbV#}mnyyAo)eqns1ATK^%loOXKpB0y@m=Rkl zPK#ZroD#cKH8EzDx-@2^Mtsa(&A6BgTCp+TYR1L>t`--|sl>-}5+Hty%oDtZ@jsCa z4+h%XvyuDfAm@W0(S~?5rYO;(R2`~Lol5glEvPiro+(W6VDpmw`8mnqg3P2?QF>Cc zTxw#rLULlUQlhj*k|1qTjg@w(MJH^~h)UR_87V!d6(Riw+m~vQQcfjG%1MCuEw?ca zml9BWfPU|R_Lh9)8wGgw0ox^+xaXRQ+Jj6@D$ATq#aZT5kZHr@X1cOj8NR&qj8H*p zdbBVpJyA}YmZ=b*R;U!4S|y22U91w3vQjN9WxaZ6$}Y{2lrx&aDPLpzt6E43rxKdN zNr3n*pJ5!%CEwuWEkFKYgo zoHCFEWOCxS+{C<-`EmR5Fb*XchxxBPm{x47(2in|TdYi3#rl*{GM!ROtSG6(fsvMY zuyMr!yqMx}K~zzkFuW*LF0?RLA-J$iDWITE;#bhF;#06r)hmBQ&9mTyx<|p6YM%Ko zR6X-KWv_fr;+4mV-*Of6{}|@L9%yaG(zmbXwFlFLZFwd1@w4;_B}%T;ro>7kN~oGe zu~oKAbd@U`S?SA$Rfh0GDxw8J6-grh`B`$l^Gg)G%NHtnl($OU%GWBpmTy;aE?LM!Ez-Xg0_JA^S9LBBUHz+9|-?ZMR7BloU@KK`75?@^AaRi~)B zNfc3MLSglDDWuMk399pC1L^`=$ko z+b(=hV!P-QvF(CqV%r){2`JiD|HCQ7U~d`x2k7@hw{tOa-zL-_G^737GK7_C6sk-iRC#jP31c*%@^1$trXfcH_6RwUM)YT zd5eNo^AQEh=8qICn|_kFT*Ap)E(VSNaHI(5glfc~0WpAHdnMG}1bT;y5 zb#@76b`A;6Iu8j=J3kbfu6!yqS-}ZSmV;W&DwLX|;u8$?N^=XXB`eJ6rKrK6M zpo5({FvuDW9AGC8TxTcu|HzxX?r)x9FUL1r`wu%1{|(SzjefUv;&@zxdC-e_fK)kV z13Va{h^|AZLD+&EaGQW;Y*(h~+jVH#upvzyHl-=UbI54ei6#$wli^4h8H`A2(nt`rnP?Ms2NSSmFX_NLL1JXKVOq!@m(Kzf#>W6(u?Ql4$ z9Z4b8BgLe0q>+@5bd%)B4iXb;W?1^-~qrF&^(1a_^gl=FGxt?ye7$?pFncwr<3SB-a&D}fdm(P z0Gu>|1?{-4jK#199>&$jqTC3c$V@EEY~fR&9glD^e3Xj?J0{(CJzyt*x$@mT@H6;j zjJnM@ALFl55%Y*+vge?ne*w?Ipudx#s|8)%G#g}-?hP6PR^njU-yAcxN+YX|Q6lP&ND5g-}lf%%{o$h?^4U=`?v z&Iaggfe*0*TKnNP9ETTi8UDwY2-6P;*KcFK$ItlJ&%jUrg40H>uZbE6Rd^3d@FE28 z9|-;f%>`~C7{r52Pz!)U?RPL*@rXp|2^>&BNZx>1SF9oyE{; zgU(8LEZxuQn4V=2G2)4>Ja8WNv{m-$Nh1 zgSH~#fxKS>@lc(Jv4J(JjF!-4q<98^X(UU38bO$~~jY@L?{(gSi0z<@~?k4B$UZLjJE0 z|6#lW>P`?39gL60WcpQg7CaY6dMxp!Z^e=HwK$2s5@*wA;$pfbuA+~`_4J{5DSaSb zLDwW}=zU2)U6E|2OOjo5LHP)sQ@KQERPN9T)WjTx4|7DFqeJjt4*m;n3iK!A5RqYo zb{HtoU*lEjncg^hq&1Dc)|f+|tGm%h8bNejGlt&ROr=X&xpZEul+J3`&}r=kI;GP> zCv-aKm`)EJ)!jfxbhpzX-Tibx?+oqN`-Jw4d&2D0|C1RR&oSHIunpU{j2*_)V4WJ{ zum=yPG4#+-p6-uVq0h#Rrw_)P(d7yDbZ(LlotzX##|#qah(S6XGR&s~hGn$RZ~=`P zHZpq*TbW&koy?BO>zI+rL(KNcyP2(%k1<<}t}~mA9_*5`C8(%Ka)o{rzp^6)GM8yHjR$Xm`ewZ-Ds~#Ani7Zq8%nlG;ET|Y%?ij zwwTOkhD;VRn@kq7gC_0ldXv@cfXRBc&t#a_YjTjc*7PE;+w>l9mDw-+PBV_T5_HT2 zvh5qVS*U-2?tNQaJ{*w$+av$Bfd?>KK*wjQ(t%k9w8vr=jaWL+R!bimvJ9m`%UEW; zWeU@0na%cE7O`tBE7)#}THb1lW?q*?2fx#zm)~KrS+LwY82n6r)VjeC*&bajAw0_wG`l>%=8*w;S>eNB9qTcTT_aRO)fDqE#+V zw9>_gS?&_Tv^mGHEzU{2X6H=a5~qBAqf@z{-f5w**0EW%&~c?)jbopDwc~dAO2@+r z6;9U_%AFo7lsW$?SLV!#fUxu(p4s7Y=z%ftNBu<*^4?(79$>o_?b+ZiqFxUTTJ32_ z9iFpjnU_7a;5n%#&p@WpGlH%6i03WxNaZhZ&*4|Q7YnM~tArKq4RYmf?ee8=YZZ#! zwkQ_49Z)K8zpRw+{!l63x8F#GlvtqV~WC zwFkZvsm0HfmiSv!gP$u?>*vcX@C{|Fe4}|4zKQ&DpA12%Prk6&XTGS=yH+mWyH!5V zdyQg__a>z*@4ezo@AKjepZnl9#S9-#KEoHzvkxbF%Ok}6W&rLthauO&(q|`@9Yfgm zp)Fm3$aexI)EqR98iHm}ZSWkb33g&CgS?peK|yS3U<9u?FrHr+kSfRz$PwlSl!&qe z7RY7#H!Gz3cPXa&ZxE;W@0KJ7oRK62d<}k8N($r@fIN_U%Qu(@*U;w^QOG&tPy>^I z7=RvJ^4rm_rcfoS3)7(mVN<9od^VMb+f!+nJ5vJ5 zlp0bcmmIQKAt`vJk~DZg93Q+>5*Knx5*u<`IX2`MrPxqTAug1Yj|<`C-f{=yei>Rv zp}8j!xqmY9EwD<8+5@z!E>eN2qcmxLlp&Qy&!nO#8_JJzVRE8;*sRE4HX|~Mmli4I zr$nR+k|Od&(ufMV_=pCD*ofsyF%f;@sE83sWW)*Oh=?!1b8$ojrx+Q*$-hAkTe&YV z4(FkD7@9j%ko%`2_Xi!RSnJ1Qts5t#@;Egrj+;mY@g|fTKbNxNoS5`DFD5lEkWG$_ z;3dY!^AloI1#z*t!kCzHxu}>rg@~ACis3Q6;?S63Nl45wNpQ@k;Ae4245tVbfIN_s z@o@;|0%EWqeIL%iv5uItTXQfD`4|V#k_Z0-KPyZl%1)C|dYUe!q)nlu^jRcLx25=W zS0*;ymyJ#fVI$L`dEse^{Lr*aL2zoZC@{4~-amDzf^TZKqIc>RC9l-OV$akM!BZvA z6i&e_g_D1S9JX>-a16YMG1!H^Z^E*(w;1D4f@cq~Ez5;Iex91GNQv2+6rW>2u{mZG zojaEzbDfxoTrVapCy))viQon0B=7@r(gnWR1w!xaDv@V)le~NOY6Z8f&5ADBhm@SN zKTvYc`ccs(i&JpPz>l@DF7&wSA7)IYWl){6VkG!)2a}nDjv?U#Fi7ygSbdf4W z7LBLy;^`DxY)Qez_7qg&&IA1^WF}$op!LdoRFT1o_p-yDG8n z$1_sF74RP_b;uuHj9=AE@~N6f-c`=zS>?^RR|PSyRZ*-_+Df+|1XhMIVYM^&I#v~z2*q!;7;^)BlLR~;ha#1_kh-; z{-6QlfNdNI0p4{A{%p? z!&1zH7UW*7xc>}XT3NDdlaNiDHd!w-q+S z7%WE&Ixr79k^grg|6hgtcQx+8uaP6OZdEev)+3W{BQowbqZ!?EX?l+nP3!TdsXd`I zr6-;y_hgY_PX$ftX{L!iYnkyqI~e_*vy6WCw~YSkSIoFo95b$qW8bnB{qIBHSGHju ztbhjtU0EGWat~s#4jxQDY5)dM1F;@85QC_J*rY%cH>uKuO?otb$cXfZ%xK)uT+$nI zBHbZh(iw^%thpCpx`d!)MYH&PwsNOc3pyk!IQSEKLC zR>FgUc5yFiAfOoyJ^vv*hp+`TC|jYw9Xa?8R4oOB0feDF6=KyC? z8ucT^(P&Z_%_OkzjNK@%J16H^5U8?EagCySV?*gSf9iyc^K(a`ZWKBYZGu z29973?3C5OAO{{r|M%lP7zg1296=s@j7OpqO2otWC^N@(ags9xCSWdb!9pYu;6#5C z2mDD4^a*tC#4tF6McRGvE8zY^C&qdS`d+yKbvWpAJhX%Mz=PTceP|jVh6ivIbqFWm zznsQ1DCday7YP?S5fB44ymSfecpvR}AMLm{2RCHUj_bG>e~<)llKG$+po1S^rypDg z-{bFp1AmXP6k{%{!^ztQ4+wn@J^+36b2`v&+3+yb8n zu>exS!Cn}OuKG3jFCU`sGR@DyZSXa?2M|{JmWQN79^Qlmi(|y( zi5?OaBP`Zuqb3CHco7DVA_L&0_iGox#CnMizC=7;-Wj6^^DG%-9fo*&oW;995a(&= z?>O{V6}k#vKnMSHO!Hf8zXLyjC*UXW44^yo3m^MafDfXA+7rZtGXOJzGeA2yoD8`- zum&LL90r<0paCa_=b(VP2Nn=~5JJvPGl4zu1(6^XIi@3s2%ELi7-s|E~xco<#v~x3bSazJXKIfbW1B2t`0L z^uQEg37kOykoiGr(8@=D=0j&8ypP53BHEy{5<1-&l78%e6ZSidq1p%U<2XG+EyzPm zo^SED-{7~u!CvlxuitPNTNz{ChbnvrF+2znY9Uzo4>Spw0bAe&%`oUm(Vr~n6d}5m z@I7iVHI_hU8GMgU==4BiJ@&r^(`OfakHeV4=izyLMjxY9H?fxwv9}Mv^?$)>!++3_ zLo5XFJ>Y{V!HW?g7R*${*9!e~fo1^oqM?-nogB2i1cyfzY|1()G(o2whiex!dZ950 z_hlFn*oPT$k}lw2K9823#dw^C_9^%-Cwc!3HxB-THlZCr9qmv>JR}G&2(P4oX(;)<}_+!P71fsap4|1 zDmq3-UJA;!D~mM&%ILBSXT`?l+9RYIq#7dF=+ON_}dsQ~k9@U++ zTlGEKsd|Nm)xM>z8ZT+HCP$mV#(#mYeS#N+LwzzVUYuI);;{ZwS3qB=DA5g7UAly4 z#?ENYqvKlcbVNIl4roWwKAj}mqmxOybP8yPZaIzUE}-qYi)owQGTNfIiiY$CXw$fD zG&pWQt=B(K1NwKUcl>jvX9CA`gEbR5X7xMZ>$t{B+#ZSl6m?Hd+hE8_)Dr}v?I(*gOG z6LLPV-yZ(M9OU2TinIyOYYohvN^31FX^n*=b)qh&!!nrKEu*Q;Qp&VgrZdf!c}$aK zDYMve0o!1?gsrn&!K=0Gvyx z`tJe%!xL*Jm9+tN*vzD5Ha65^<3`Ok{;juYw%VqGt+J`(RoJxg%Wc;1%WO6YN^C|2#Wv@Jg|_#E1-35*1$G=i|24dPTW$=z z3T&(n=bSg{-Tlyiu+;~(2kuyBJ2JG=UWJx9jHjiJ#8%w9wsxs@?6W!rh(9-Tj#o z_b{f&Eruy{OJeigGTB_WLSD986+g?ZQIO%bLYU^(FG_VA5hc4FmrHX03_KSlxpTrq zcTNEK_&mm4T;i`l?I6w%?Lw(nuPEMYyIh?2QMp*}kHIrh ztPdvyf>`e{_}KElKwmEeAoqvv&M2(&V~}%3BL?B{KLU|!_{mX)pBk0=PoyG$6Uz6W zL%IHrl;!V98U6uGntvFR;vdT<`=_vp{y99UeF@uT>cB*CUGZ-zpdBe?&gQ z|3mq3|EF@{0h}68_0LFvJElp5?t$-%x%VsHqP5FE|M2Pd+z!I`}1;9`DMaE%}$c&RWfc#SADc(Yt^ z@FDr&;Op{1!B6m+69FL*yyXVw;7Rm(Z!E?^ihL^xxj$%0z`GaFt`huRUYLZk!gVP< z+=x=b%_%9|nxx^*6d&%*#D)hkG2xL+RCod#5uVNq4=><{hF1xK!<&SG;j2Ue;X`tM z;RnD~dEfBwvHe@*7tRTR;4N1X@1xM#1^uBEj6)j60W{%r1>%yAamk2QqLgSYN{lw7 zgcwtbieUD^fkKxmixN?@15jbWe6@nxJq4ECg=1_b)+3Na2P7za>OjCp{zB;_LSL|lTh z#1xRNO@7%1rbw^;fzaeJmZv;&N}22v35BNc{VxA`15l5 z_;Yi13FhRS1K$efWdF{elg;txWc|Yt#Cr!c28%EbHV6rWVVXRA1nYktTtW`-hYgy95n_be& zGcVb}n^|%ie8Zbr{2Om(5yzWZ_z$w0sIBOK|9tqe)$kt{;NJ5>%mt8!&rvnVeerYG zDjwNaNyxTZn{28L$hz8?=G9oz+!{Nws&OaF8h^5=iJ)0EiDX`r!Sx=gQnFq zsc9=Sq3Ia7O%oRX!c1u3UQ>@P*oJrypwC^a9#7-IWETvvLt>;Uz1Y zokOIt@(8J~{FKyJydaI`9BH(JWq1|@Y=V9d`n??5i=kNoo$QsU1?ocnyBhglH|~M= zpax+r^m|bQu@3LS>{lYqes$95A4lr_Mx@qnMydnzfD5S%_>=NLG)V?BNjy+NN&~H= z*grrD{d-Bi|0>D%{Xh!q{wBplF<;j4j|Y=F055POo&(te4`>@aDE#Ww5$Nwk9l#!V0DJK) z!~urbg9CkTa(;0zxq;s%%s7NKazIejb! zjc^g3i6TH-cq3jQ3gE!Mi~+yA3O`4OE}aF4#ibYEuQ8S&?iGl67Wy5F{sutT=>YVh zDXW3eI}Q)v6#N(bD(@n+uYhadI{1JGPedNIC=&Q0YFLoz;-%zfrxnA_5rc* z4lpKnFyZfDfbSrXx8Da3@EPp@RT%4B#5^AT4urNd`Z^nZmDRxLp2yf$$ zAHa{`C-5_PA%xGO zgxV9tt^D|AA!4@_qwg0&IX62nUHE2b2Pt z|Ii3pK?hg^)`3B=4eSPo!8v#nw~#kKhF|d8nCJ03e)`)Re#Q1Vcm_B{0AbTc9<_ASUD+xN;&?$yS6*OwG=O$<@ zgTK-VjUMcOJ@&r^L$wRu$6np)`;NjT|f)_F7 z#mMSWX5hExLDL=nM=*vv7CNcW$%RHKG^(Ld2aP6ZEQ1E>Fla42i9t-;?HGl<@I6kz zeYlSQ9v~vW;*jFTxQy+kci>dve~4Mk3rr6IJ`+3{#Ns7l@f7tz-y;@}$cOG@UfiVw z=%hm<9~!05sD?%zG@7B&j{UE~zSqI`7{YAWiRz%kbc!y*6}m&m5V4~ey(8GWY_`11 znD?O!eKEk4=0x)J8>0eUyoVFDIbWeR=Tpp!oA6?;qb}wO=I2GqK$P?0t(4((RSmbL z9y&{*u>u<1bO>W|0P}Mn{u@P0c4H5_*eA4u_Xmv#@DvqZIK2HG@Ri)CO@g%ueo)1s zhttisLOJ@1r$!(1Ceby%8C~Mr(piBgofHJqF<~?v5hl?gQ6?P_70^CWIgQHI&~CW~ z+9lsgJLEfQSbiOC!#iWPDD0uliYIB4(#JF?enRUde^Woamp)}2N$((g)$vQ5-X2fD zDF@ubpKqwMbRBc#qM{a^5>KJ`Bo=f~;zWCueQ38zDD6;*rD2t1+NPRCTT}~aNOeAK zR9#4es!M3S+H&ew>!v=njnu2YlX}#TQMbkoTBZ2|b!z=Z9oo1)faUM-YCKLelc7Hy z+Q#rcrr~m6081TS%n>ye+N(K%c50c@Hf?Jf(s3g^u}$lB!>La%o_h6CXsup0b&o5e z)#EB?)wo*f)L%*+`kk~wzn|L2Z>P5Lhp2VJ6bQ{Up&QW z{VQl+!6oK|9oG3cweFk?|G^yV-067!VwyUwnLdeD&M>3p@M79bTxqF^FEyEj(qfZn zYA{WtI@1iQHO;4mrsY&)x`?SZUCLCNu43k!ZD7jHcClqTw;nMDcd<$%G#QM$}dFO2S4>RFEnCsKhS<`9pY)h)QaG+WXPg-aZ zKs6TORAmuI6&A@}T^VN7x*zV{Eq7Ene1~XKdyi zZj20U$FSnqd+763=pT1S&gq3E4%mRtxJ?G{1?wv-F*VN7rrNniv|ye&Rn4=Z3Tsy? zv-Y79>tHIjj-oTTtz##VzW>?To}y$O}r&!IvG zN6L5bq+Evp%615&EQeUia7bp-9I}~IhY}{)VIiC3(85X`*6

    H}m2g5AtIjukvG^ ze&EMA{mqMU;$*O6ICAVK&^e2~AAq)O8Mpy-2Vsqi_AEv_s-01LfM=wNo%Jc-c{=5~ zSWvc$9c8+>Qo4%|rMd)DvP%>txkwqQO9qqRTFAz^RCZ%}Hqht>!O7!rg1djlU^9ZL{&p0N=GnI+*%wr=xE7)+)MqZfbN?wTPAV1h^ zlpo}Ekss*w5c~l+o{XS3T=him9|$iD+Cy0Wjd?KGHsQ}T_<0f9k?k#}bZ;$6@iCx8 z9}`OOv7$I1dy4UKqbMI=iu4Jk2;Uec+&7sC_03^Ie9PG&-+Er4?+Tv3-+I2E-yXiN z-+8`|-vjUpa6I34xP*CdI0WYc=nlqVF2-XVKw}K<_aYv-Xh&Lr0wwyZQ+&V#iVc`f z(E+n5GQgI?16(L9(3?U7gD5yKiU|r#WC8-S82`Xh);Fk@^$BX{c?I?JJ%e`fJ%Y~i z-GlG*-GhF|XO1^UAhzHf=D~poEd8OsUW(j530@ee!{-vTD--RIhKML8M3tgK#!-04 z6bcPBr;yNj6cp-2fuWuh5EelGVG-mTmcaOgWiVc0MXX2ILe@R3jprJ+j^`4-gYO)E zn(q{T5B$b+4&(kIl;fSkxbKa|IRTn|Sn_qI<9GxM@wou;NJTtiB6$=ZDWQ-^9SVvx zq<|<>@{h72-zajG5-vzBrsmOaXaZbp_Jy%eL&$(zv5@HgOfHilV68XhxkWc(X z@`|5Mo(Z$bBf*B;5?shN!JAy9!Q?EBCP!&9V=v8P?4*^fjkKAaC+%kEO1JaoNRRWZ z62IbENq^zZN#NKq;;{vX(a+&z9Fx#olZTwE0OtfSAD=VPmiP?h-f77DQsl`!MU`CP z#kiyzkyEM}Ii}7bhcpMWPs1}9X#r%D7D3i&i8MDYo0*d~pRr6^!pu%z&CW{S%9^Ji zWoM>;&dyAG!Ol$OSaTUEui1w&*b1G#eB|E67>81f11KrMTr5J~h4uuXJ+3+MAF|;; ztbf)ZDywD z9bu;Bf5J@5{h67T!;LXLn`33{LLWClV=Z)-&&PQQDL_>fa$jtdDsb-=Kl3g}?uWm( zDp8=>rK&WmRFBL{C)3PQ6EZ8aBGWQ^GAVPX8D#-9y*!Ghm8a5_@_ zg&K@CLXvb#NZhSSO5NkZR8s6VCxvbslJ9mWx$a;Rc1uanT|m4wb;PW}FRk7NPC)D~ zc!9~s5!3ma9`s%2!7N$!+Jnh~R>ErJpFOC9>BShVg9p@y8jybI4?uqdJb;ZXi8d(` zvq=S$NgGTA(*PDDL-xQML;{>>Heph2LdQ1Y7dIZn$@nJt2?ydIfEz>R!L%%YdieKJy0X)L*y##0jScZ7Nt%FHjkMj{Uy`X0cJv01@A@p?izysKa zI)nrG9+Jb*Kl+AaXvZe0MX#5o;(j)Jz&PJAEDDDFh(FuC(znsVRBDe%D1AK=u z;XNsIO&Pw31{UY~SR778jR@NDsS5~$XORJL(0zt^@!1GC3%&*zlh0m^y^khy3+5pD z?6()kz#({W??M0g>t~R}=Ma0D<`r-q+yuA4C*U*i1^5cwK}^0@Lfr{s@<10Jg%Oe_ zwBv~zl7~pthU9|90F&z}gq~gi_{FF9DF=Oz+l^xYeRe#CbHFJaf6&!HKb5W`_BXKq zkAY0@Hn<1A1rNX@@E!O8{0M$x;f2WKhBadHn>Kuq$w=ZXfhULs#h?XX&^ZK~I|VKv z`;^rh$b1JGG9N^ib594d8Vq+30%XsCWPn0Y0bctLuiweB5`XRn{a^@;fc?mBPr@&_ zj@TzQ6MXs}t$K{#lxfP4J(nV@$+!zR zku2AT2SM;4#{37FA2Sh{0$B})Cw@B={fUQ8IyCa&eUu@SuZA~J4~=H@rycw6!oGX4 z??Lo@JNCa1zQjpn{WmaOzJn9;JBE{^{{gq&;bVN}6)R@j%m6IQ)eA&F*d=*+X+wNFsAZF971;y zk>~IWIQRwsH=H7a@xau;lzQC`Mf__uXikKt8Tw=kJr8IFp&!xENP-65g@JZlMLVvb z9hcCK3uwnVS`N2i4V}WXIVbR*l%w!sj^Iy+@LLBEt$pa(KB(+{!zi{g#=H+9^wEx& zXvYi0;~~c8E^356M{UkWm>1XatkxBHF&8i<|A(#j0JEw(`}TilfElLu-h1!8_ul)! zFa>6qp~KL7??{o}n+PHxB1Kf}Eq0UF`1vLpTVgbtL=$7I;eT%iKFxRiFR$Bu&Y8n{ zp0(Dq*Iv(WueHy!XhKgB8&BZJ4g7c%Kd$4)HT<}O4)PFM-GgXC53o<^JZ*75-?)#J zDW|E;y;v}!_8vI-3yhy!`hnB>?{=JS40-Q!YW+^;^r9m}kS}$MqF2+YQxWy4#!d@% zI?+}pU||wx!3=tT9(I;sXAL&CQ2uTmr!9_}?$cqjt2#8|1sxdmk@lN^t-btq(jH58 zcKw1M8B&bL4Sme8@;1E6>lg9sISVU2KEg#0j||j9=5cypbQbzbiOyKo>XcQhPFjuA zajOYBYCTzpt!L_x^?V($S+0FH>$TTrhj!Z>)K1&ey4&`$w%b0ht#%)2i^DhC;+~3m*kFT}N z^QM;GW(h}wzXvkZMluw~a@mO|*N9=B9Zvp@U-!6=)=@WS9dHlSZqHcV?UkWzUWMA? zU8zmp4cg${u5~`$TIF6IE?@eTW7fsF;%>D~M2W<`T)~3L4tq)4ln&2F* z3NF#|;A$-mY0{FAFBW$umhSAc1F{~uWMTP>!v9Y ze>F{xylI+r2lSXp@L?R6y)-Vt*<9XpnPbDTOmc6A`R%mL#&8?033tck9iD^vg4{*~oCgo>-lfkqDo0m(8feQKn_LPxx^4V(IW>M-$%}I99%oHz8O9|EF z)OZc1rfXtazWUP2G$E~4y=g7#N$XTsdY{InPgQ67eAAfp)uxV&?WVSjqo&r3i)JmE z&zd!7{?V*C>pN3(=1o%*7)OKGXoDLiOsOiF{?#zYhP$g-=ZqiA@M8{sOv|vvOCnC-K)0jNoviRV`|P>VQR|RYHG+mY^u*a zZ&sK0v{`N5hwv@jG~FVXBfNy2>(u>xEmO?~;-HZ^ZaujUek`JGX7Kj`ephNju8Vqd z{WLBwLSyq1)sdH>_PheM=9j4@zgErpt!gUhQbWOj>I-I>>I#;cY6>@-stXUARTZ8y zt1NoLtfJ@-@D1EFFjW)|^E_>E6?E)YkMTvLHLlQ6V~V`h zUKFa>PTCdMP|9_ZwJ=t2h?$6S95`4;Ul0YBPnOlqvMRb7>< zYO4KJRUM(q>I9WnXQ-?uU!|6 zA2G|Ue+&L$%B;INOzllm7Cc%bS4JxZ?jxw58o6?(ino^rjnNpe` zHYKTHfh9{@>UBKwK}Mv z)kFDhfy!-*QchcvvfDD1)n2H~_DW^6H!8hjtWrBBDy3scNgXRqi5)vk@ncS!;>KJx z#g2Ic{%(rvxM_-OzfIds(?i6-z105@cJH3dIXQ*(Krl239f;35CXjpc_u?)y<#gIA zYn+QR#`!9JT$ob3;*`>rrsS?XC3Tf4vAa$Q-R+9+o}k$78H(vyrl_86itIVAh@Q)) z@Sax{-t$*ecsKe(*G*G|!MK~I3)J%@_1}-(Z8PYHnVgd_gV)A7lotG|9OT|F{-pL< zDya|sq0dwC{eg<>k5p`bqGI|p6+N*?Q4^~aF|kGA6MGakFjXM~ixoVuML`2c6fkf} z{sS+;=L(qk9|iQYa=7oNDG<(5&s%G8W|4c(p&#aPPQtBqD6?4yGequ-Pg(esFnxp~ zr`alenv24w`6zUHh(e~vD0q5`f~Myxa7LK|W;DoeMyGsd49aK5e0j~-AkUcxF^$#SQ-?W4J#X!Yn$P*Lh<;c?KR`dUFXsL~pXD#44Ip{}`R9BK z`OdeO&jL4jFYuGsf-re5h?B>Hbh$4qkn6%Kxh!mx^TIwkEu1Y!7L_KOIhL-|{la(!4q{=bTGX*KyjG(ZVtKpcdC54fzhl;axq zfUI!?ev5QXI3&n+O(vAcW=(^v*L2Bp%`}Z(vs~tD?$)T)_iDuI$24-)@8KUBwURv` zD~35pJ$Edp4c2gdSjU>E4U7q!$iFue2U}PJaToXCAq;%Cl7nvJSq$4p$ci;7<~tlU za=V8{Y!8rWdn6b8Bnso=zoU}ZoYdP1qU|*Bb_&~e46bnUzk&GoH~4{y-@L`{l^n3W z8oP6`Gq9cf_io06U917vg9ft~4G8@9VShjN5267aA`d-m$-rs{&fo<>2=3943S4B4 z)Byo+JOJr1jeD2^4xi@)e1XpYlmYQOz=!?ZU)x9oaCRhE0e+mtkNfe1XQk*Iew|bIH3SDUPFl#^_!7f!G*nxrsNnxR2JgUM;hS3;O#eNs!=b+A*v-6*{cFS@Hl46(W!N-q(T%rs zm)AHFRwO}`2XDdK@V{p0A*0bKXp;|}Sr_7qJ`n{uXcYC(&yC!r?3ba^pA#3KQ;{!z z3xCo8ZQj90mDtY4ZZvj%pF;nlewM$YY?|%A)}g$|`w!ql_!#~Ke+HVCloBmuBuSGs zItYF9tq(~Y?eS9{4-{yIDI}F^fQG;IT+}Bx8mj^B=nsbWVC4Ot5C+B?j6$e|4wwK_ zVGb;YRj>iJ!Y()f$KecG!9}!*N6{u;BIExM&ETK6v=3wL$G_k!Ff@oi!iO-t_QKE~ z3=A#8&|?B24valH70?V_FhKd!DSsXr^b*QnP5B!se;fM8Zt~?rSUZW0`_U$@pnW`z z_VHVq=nG_{A8D@PXK{SO@dGgQh+F!@E$zct_h1fp^au6{u?A!KnIJ{jtHVw^HhR$_ zCQ<%O%AZfkODTUfQzZ6M zk^c$_{c7sbf{k(1jAvos#|!xJEdBB{{qiJ!Jb@oK&>tQ}o4QV)Tt$JnOj}&StBbVM z1N7W^tQo4AalG^TCSh#&@Qo4DV1)Sl=u&T^*S*1dl$VK%7tvy#MWcI)_`JdToJa8E zVf?s?AD8jt5`J97j|=#5jx|eX(a6rwC#=Fi@3@RY^(<=HA84I_+*0ojW6{7^(PVJb z0{aYgpK&_9Z-xAcPp@kfb#%a{7urWS_L3+m7k#A+JM~0d8!bH!3w`wbBx*bpJM*x! z6dP+PcPnSfKK?sNggn4m^EfM*{)ZO&E4}q2k;B_e3bn(gQrm1BbeCn_J|ZE{M|dgpAd#Ufbslb7qUNKtYou)T2Y<=d+nzJr?XH%n9f z7Hf*%I!*H5sX_nani%k)`U9U=U*I1#A?SOJM~C?({|9@|rf>=7QhR?Uxemwuso0Mv z=c7$l29DN}0Q84IU(F2)N5@Ij%wV3s5L~FK!4;YuT&GDPtr`gF)WncJ^@UE=gwS~! zj~3GtwoTn(N7NO5L7fp#!v`80`EQMhWC!3KBG^BdL=0lv&|psIGu44zIn+Omd#wq~ z^P@&+c9^|pgnMdgM6f1D#AqNgRTCp~)ECMAho~y`Mm4HCs$E@CJsKB1Nn@jDYYbXU zN6Z$r#U4~^?EPwqdtA+N@2V;OU%+n2xSx3$TaRM@LJ@gJ32gw|3%S3AA4}4?*Gk_^ zjj_>StgHHC{WU%=LOpSb>Wa(MxcCB%jW1J2e68B!Thx{?POS<3YEGP?ro_c+Ox&o3 zqtN3X$w@HwoX;)dsM*# z^2#%=o60l($5fW_72Gt?Ez)o5X)f&#mC_GY^g}JVKWwZf_s5Sp_%RtjdeY4`HqAlp z=^koH4^mTllp4~LRG*Ql+Kd9#WR$BqqfS+sZK}-dQAO5dm1oUUY4#eGWbaaO&M8w- z&NWkE?wjx>+%y&D{9p70b_^Zpo;s$v4fI1J{lJms^O{N9kiuzfu9;f0S$mM}s`_ky z)nS8!6f1y@Zug|EZk z;HD|3V3^!|a(~+3JoZjtdtVFF|5kENjthA|jrJH%d$i$4eSwW?@|{&#;G^<_5S10g zsH7lS#RXX^DlAfAVWkR+8kJu(R(Zt}m0LWd?BW&5D%q~glHvEG^X=)%9|3Ua}!A6!kuU-Rr^TFLB{ zN@~bbVndM<8mko7*sNHzn3$%?if&q{$fk{oXg;X$<_ijIejfe|KPtS5C)qU)bDr{# zQ~$l#-a0@m3=#_*r%yzGz?ar;`T<|^@g=owv=Y%`658Aq*XFO-ws6I?#Vfi!O;PRa z!Dwd>Mtg(8&|*S61{KmVPr+l>D{#zy1&lc-|1rdx7RnBk$vN*Cg^Td@1W=-rq~^+dV>2UABr~k5X8dw?eyv71AB0;O-;^b!REC zyI27|HS+IimtRkxe0qoE-MdQoY2OJzHvINqLO>y93B1~1Mw#a;@<*3fT6)` z!1i*=9@>N+xP@_XEBWVk)&Sg%4hA9MwG;chu)iA(fa0{*9BjDAIdF6ZUkHIXU^1|` z3}}nJ1od7zch5FB#Xxw26X1RL7XuPLz!vJeW;0qGb_aLR2iU0D$GLcbbvOsnfJjfh zX-20bXaL7}7R7PwpM-l%U;+4X4}P4&k5fK~ouOO+;~@vCffM^Q0e|mWps-Vy`Tm>m zcgpz@_$3Q$FQlGR_MpLFw+>r{*h$7t*ge?4m;OJ425?p~(UeT?xyZqTcNoP4$eN3< zJvU1jvU5;N*Xpws?3469o#o_6+2jo6YLIH6T|qE+C>%SqfUD~47ygegF$ zzj~PWS2?~8pTRe`G??)xcpfdbOR$?p{lc;7#n+gU%i^)ycAn<=9Q+Dixy@@FDeC{~ zA-^5L+7m0Z5C^mn57uko$H%GYArU?;~unvbL8$<(I=ioGx$BZ?3d^U!+T%;!SM_D2;POaVR+qzp*t8F1ZyE~ z=`wyCqag!IpdQ9xV?5d7WXhjK2EBmtmr?#2%HKpbyB+OgFB$$3vdVk0bCD*wLH_<4 zcl$)@24J9(LIJJe?H|erTjIN zy@|5#CTBl@<$KU2E}%<1#wqj$|Nk7#;73j^=5BBeuE4D|7`N*GOATTaHth*DPkJ&G zTZ!1p!A==A>IuQ%$F7WOgYRp+m zJcA;38oT$xsb65!{YUr;jIjOy-l8pDr7d1$J<2n;)?lR4v-#MlAkrErwVffV8(p9u zEoCw`W@2L=HkMNUT3X;P^oYIm$O)q30xNZHpv?S+Ui#au$lZOQtW z|KRv4K0HrcJV9JsXPwez&XJ2~G3V%$vuHx6*(-Gq+SCdBI7Yu5AwCY_$3gtqPkZbo zK6bNaX(zFXvanN39ja*UMtZ)Tv!M$+eb|{yFU-Qu0?J=WYi!}n-OHaYpL(VtDms81}(7!z^1vb&6s4*eDyKi)UccCwH4g zYU_w(Z5f%PjiXAm-n?3C%^S7aqFt*jy0pTgPs>J6(UQ@#wRrSWEwo&(`RosxYjs?6 ztS)NE`f1Iwd0#VaztIf#1x^12A7b~FU@ie+*oXTFEpXmW3c`bEAYG{?hE zGd(>t%_~Tgy`wegovevI+3NQx)CAuO_4?MS$G25ozT-5`uV0;hGc?A3kvjZ0s4ZZx zS_4k2CGfhM1An8Yps&;vjP_uF9%Jw;>V6&D=dx&n9HuPUa5-c9V(AG zp)wYxmd3uK(zq{Ff(Bzy8avDr)cHZ|pDHE}OUd_O9j}+*#}IxD;$wH*NOeTpsWsY7 zO)>szhzVDHOuTAi(^M0iqpH|qRmN7SBCbj0abr~$->=g6St?FguA+pkDoi}8f}~5z zPx_Velm7z$;WaBGZ}S-Ke*v2(%ISwn=Gd^Ng85wuc?N#;;m26~XiBhAeS))U5`0vZ z5TeS&Xq6`>sVp&5C5Z(pPAXSXQoRa!c1=O@1m&kpS6<2z<)&^?PTFB*r(IN5`U}cT z{}g`YH7g_2hIyE_zrPF}sG2EnE%{y@xql7)fFD!vtrtJIwU54Ismc^bm8W{DG&NAg zsSzqnSRR43{y^K ztg>03la-aN%U~0cNzL1K~$I?NduX72|7Gv6a$_9hFk-q2%HKC6$CLu_RszC25K;%~fn^ znPN)o6->jL%OjomHX zj7vRef*hxgW4_8qH^5Jgu-E2=745mng=uP#j!Wujk+7O@+o?8>#5U-%dbOkmRD4?-Y zeof8tZR(Lv({ytFrIJ^?Z3a5SeVGTFo|(t zGO+*ygTw;<)Z$AK{$ zbxfyR#!Qy;*u`=hyH$>3Psm~H!?GLuTRDvRC!jr`#TXdB54C52abYUgr|FCfGl+$m zXi>awox=HmUwQmJ86P8Qr-1P$G!<*P_BzO=*ImxNesby!gE%>kPnW~^0@;tRmfiRc z*^VEO&4l@~p0G()E9me*=UPpPl;xBpjh>Pti>c)rHMLnI zr;eA|R0^K57WTshgx?q7&+skc&^@ zzVlgw0QU3EWI5kTBUzK8`L5stAygm+(tr!kJWi;2bf6(_&09{8?SlIlc%FifxfuQ% z@L`Bwu$;#=3R}irp!q9tZxx#4YVvO=gw!?k0Y`s6cLSy-vI4U;7IdC1ID$L)K^VkC zHgGbpCXiRriL0o@s;w9~$;I$dAW>ZYH49Jh0k$rs4Jg~#3p8UrWAjGNh0TmfThL(c zVjm0yf;%`+jO8}$Zx@rC5d^X&*nt!8J-{C#APMrJ7P|O59lCuTYf%ovWg7lX0`N-) zgZ}{QjIq5EyR)&`kBzn+#39X=y@&H)FYAE7eIM<=pEV!{uzd)Q+yOt1;KvbLE;zKu zQQG4eejJNrGLj0#&;kUVvH9pI1s^@f_lb!kpD`$W2Rz9E+w-W~;7-m(>{VkY7k3km zqCp+Q{&Ciz(1aGJuzMQlQf@Z>%sCSiAo}IJ4HsYsZpyhaDWhL5;m4&6sDQCRU|*uL zmyR=8pg%6s9~b`tKMiXz*dBL~u>hO-_Yi~F3OLjYVUmg;&qoh0)_tLliTeat47=e1{EEN*3I3^R)VrHH*Hh0z>X>w% zbMPYZcM1LL3Tsdddxnk2;VF0yXv$kq%&oN{uW)=7UWYf$xmiiyyyJ{cfggW}MW-kr zfonmlm>Gn9I|Q#aci=ub~l=Ckl3ybS!4ThAqV zo8$k1-@&`^K70rt!zb_={qu!237iwp9Pwu(K_WM_%h*&f4oze(Y)2D04K(DpFOhK> zYcH&CW90qDS`dGTflMfc1{edzT97F)1Pfp}tVNgDj83qFyw!Ln$5FDmGw21E$lsqJ z%Qc=!`UUx{@e47-&m;YWqoIHN23~`kvH(MOFavWiv>0y)hh)fyDoSWU7wDq=e#)Op z*|RBoA=<}s%3h21u^Fx4Zmb2A{Fbo!JKhZ2eU77{eHhQexC%epXW0(^{G0VCUy570z%ZtYS*F_+@+_;yq};?9^*uQgtmB?Z(POm z%k-3?c|3TV^Bm8@9Us2O{#Wp+Mo>3f?76Y-BZzv$VkaFNg_K{VC)tbgII;04eq6_o zhl!7?ZH5gdF3%0_y*!;{4S(72=RZc16^naZAyhd9*$a<7> z3ZRrI?4(eu9Bh9a19`ueAw9G|RBKCpM z!Yhg971;eHf3xBgXJ~twVb&Ppu5wym#GkXAFZXbc9A$6N0koLCoU6N73$laQ*hY-r zg&&)VkB!9Xdi+?2A8Y6rc3x2aVtQpQD`d8_o9r-N-H%fA7`unwB8u4uf)5798E)_~ zmzvkyS&HXL-U;`+4$JUI@NJ*Tin=*bFZO_-kF8TYcG9U!9xYyqooehfVyB(6w2PkT z*Id(7%{I-|EVC7wVYWrnM(o$r5vMhIU9lTe4&BSKWV^{r~ll>&|sbmK>rEm z(gP=WeZZT!l?#_wI}>YJY_)Qvi-^T(y$3dN59vb5vsCMs2wR$J0#V1`&KDlc2DN(&I`yYJS z|KQ912S4^dpv6@BFIBn!7L^4YR%ze^DhYf}#X+CIkHD{U-R1^$zkux%ImBQd)2v+D zAd7sDY0%6B@{B01?IC7r53p5BfQuRfd{rM9s=7d)eGr(e>YyxD1{J6xs9fbibt(&P zS4nWMibJNVC}fceLpLfv?11vZ&M7ziY4}*V5#IxM1;U3hbfEjOd$fRjuZY|qR^~Im zWm-CozfHi8G5FCKYOUH(M^%Sy;V3PZ`l?l^(;cyO5|{sPdy?loy?>+~`c@L>DSMrb1aUjmnJeR7Tu@(&FYQ zHGYj!;`b;y;f#_J9#>-GAK>3gN@VRp0#|0ZMEjq@=79>PzE$LVu%wbaqm;D=_|b_U zP54ofI8sG%w#tulR&JcPvg3l386T;P_;{tqrztHVPpJuIN=d9&a^e^zCG{&YX-Eml zs}!HIOK~Zu6_ffH{2uWGz6 zVl_9}TA9g?N>BDsTC%@VlEakD`kbVc6eXtQC?T~(@u@Y6OKVeX+62X<&s22!3Poky zt%!_M3eS8*VVUo6{8|y2>>AEsr6`=k&arA@p`NLJ6RAiuxj)Bgyziks+UT1aVx)+e z$;cS3lni?%@w?uM8NQ0o2vJ-H&tJ$)QcNcMAF>J+#a^k%>=s32_b5DPxZKOc$%=I}=;e8jMH`6{rvLW|uLQtYST5}pB35~IM96a|##$gi|izNPi@DH|*AvPtqRTPTn6&2le4D!1~> za;e}M5am1r0{z9kY?zbS*oVEXUF3YdjE&=oM~)M_IVZ<5=jU%F_>$Sk{&Rc@t2S3a z75YP!vwW+({#jPd z+&M#wF*t;M(oVrZZgDoe8q)%#vkisVq7hW!~AXk)6{uqI0E8oxADW``{@=*N@;EnZ`1kM~~Tu zjcwRoJA-jy7O^m!SQw%ocwNJ11^AQ7-=ijzf8&z}ZR9v$lFdXbSxmIo$cZi*(eEv@ ze&fOt0}S&01yBQYWO(*T772Jgd~Pu?d?i=b-~F_<0>l(|qnf z^I7Ix@^1*nH!r^D1h!y4G@8J%1t)Ol;=;v#X4vpWW@a`=0(fQ%P|2ATG-ES`ABSt4 z_;1tL{{Vb|ZF3kK7jiz}>YQc7;&R3%sD?a9f-pY!S&9Y(wqVYCEgOZHY7O?_3{1jU zlfq_KNTE?_3**5{OS)ku!M+v_G5}tpvab_(f9Im~AK-5FVq$RxAFsxxHH-=CI2YD4 zCT*Y(Hlo3B^aU5N<-PeP>{F#%Y%-@2Z2&(u;m0N~J`V!=WOD|T@fm@P&Hsj(u8irk2J#| zuU9frpy2y2!<&4M{y6)C=27QC>c?_2Rbej&Td^n6A+hC*E$cIA0B5mz4jzC@@X&22 zisWZl7rDmq`babp+T<|@k`xan>mk5-@>Dr=!fc?SpFYj;Y5w*x3B^A(g?8=4b}e~T4|#9A`yEocQh$z2bi7o22W#(6Y@ zN64dJA+!3FyX`-qPYkaC`H-Wbe;5jkp#$*59=M}H7+Q#-#khm99wi>K(OAl`(Lnhf zl-)!2JV3eAP!wiU?m~2l<>(&k$Y}3EFWALik;AASr^%r&p-nuEZtxz>^z|+6!&rOr z1{k)Dy&#Xk&-U;5VNKnfvFFFXio#whbts_xDink!${s^=_tNYG=sDAnq-G;cEhLj% zPTkg_O>9M**n>84jC$Trm|Q1^e~s?^Tu;$*Ph!!~Ka4dPh7NH7e(A$^w8uZ_m(N*) z@d;}%K2!*{;<1y7_EC)gHI&^<*<&fYm$C;adnz>@qU;6KYdImbp1N&AFW65g8#_QA zq&1%5l=^_)F`gXDeH*wB?t$ZQ$A^Dm|8w{d-X<>IpenqR-HZX+Tjg76c^Z zAVh zRyjluon@8EW2h3pC35}&!^)zeE*kvV0{dKcp0#Bd;`ji=%YFE8Qlt3ScGSrgd;aur z1a=a!k%^50Y?NW6h8j0vryVV#3oW7#J5#YS2Q6Y5nup=h9$M!VE4!``!7uaIzu+nR zK(Kv>-!ZJd;DUWO@=jOs9Vc=whL*kfwu7-_Gg{0#`ec;?sZSJklBgxm!NiYw_%R1R zW}}15!jBpB%XH#nDz7HZO-^7{Gc=->Tb;m1nnko#FIxUnKDW zE<^s@8t`HL2UC(Qj$Br0m*pd^vFU_8Z)^p#@`m5QFePiMDNB>h3N$!^-@zDBqyCXi z>Ki#m<45+YXVheMkD8-#<|{PTe5=M-99H}23u+tvv|24cREzaDzzVqAynx+npc$HP3*s&Qh0kfjVug>*|#{L=AIDD*Xhwp&(501d`$uQa=nm&l< z5)Z~RK-NbxrwGBmAJZ<{WTK;mdh8s~aNIT4!CxH?;c9b;Q;S2YnjQH42gf2cI990M zu|ai??W%F=RkhPpRXQzHh4TiLIqy@c%UPAUJfULO_f_onEucT#=27e$m-*uEQr<)a_|fy9@e*i?bSCyjAZStUA{y)wm|A+AUp`Zh5M3D^a<7jY{3w z|KQ%GVt&i0&||I&Jl81SYqxT}PAkXzF=c!IUfDk1CC6OtrFz zfpq4UN#uL9$@p;Q7W7S{x20;(Vye8{RpI5YGOsX|ddI4mwJ}BBSt?|0Oo2~@@_ZVU z%RZ$X*2QGA7(3H{r81a|r3ai+TEKOs2EGknD>aC92Z8L2gomj2DeNA|qaX5_;=z&} zrpg)2ZSbQDKU#>BnxIiC^S4v6zl#d}y_N4Dq`ZJgffDZw+89I{+VA={N0dQ$PB*AyT28~6vX?%*~Tv2_yrdyCM*iqYa=VIfm-rhOS?lPvq<>VPKzm!ik(Nr2a8PQ5tCAyploS!HgosGRMt|k_0I3KEr1$^noj~4u> zCRPgRvy6CiCB@n*A=X)Ov7U;J4N!D!xT50X6d9MQ$oO1E#Fr^Np+4BR8i+@ZlkjC6ZPUcxEAca* zcuh+)Q(TgbqLUmIndGjBBtL~Eg(x&RS|Q0v3QoyV5PPKpQ)?AKlI)*0QGRLj2^@HWQC6+=AvffG=b5qXGZQX{TIbB$+me z%CuByx}Ad4T@;kzt-y>R1!P3XKO;eYnHloUER;`HwY;+0~ACI<8vN8t5Uv`g|9p!Na1KP z0j!Jh&2yAbp1ZvBcm_mXs66vx<&mE%xBNW0qQ$rrG|Rc5S5Aeqi9;^oMrNNqj5Bm$W+a9()O|VEsX< zrQA#Gce4!$Juy@)1qFMM{d8;O2mBl{Zk zhZ=YAlWlFNY-;0VU7Iee+9Fxj*2LLnbmEVsqPes%%kvonQH$5=nv>H zd$GS2n`;M1izf}QLzy(0cpN0}=qLBZm-HTTuW_^i{&>+(PVFYywpq%u)lL?z&N6TH z(#X~zjcARMSt}<{D+5j|CzsKXZ4-ID0Jgw!xW>u$7JNk|e*)%n*x!upmD5PaXK;;} z$=JlPjrWyPh(-REgn!`_!nkOc&C55e8Tm%C@f z1_s^3@DLSz1O85ehYy|X0m06C>@MSL^XAeI^Y|La`Z=@#pQX(r|K{%j_~kyGH4yk_ z1!hyt5euyO2ln6sUJ%G4jwnch0w9Pc(}0uthm%&rel7qHaMHcZ#rUuABf-xekRi^8 z`Simgt`SR!MQDYJ#jJsVcnIcio(s`{Xi_T}0a{=NqX}Z#V!i{of;R+192Lp}P9{U_ zoJV8MT?#u1xcf=;pQF=1!{86V8e_^|v5bGV0ykGNCg5%jK=`C?B?iD79JgT~m?m?92deWkCSF^B4dBNv{MhBo1tW(w@czU|F2I&(Bz8;&h>v^m<8&0zC#P$8-wSl;y;R`Td3cHMe$K!7UUR5lKXq%v zZaKCxu@i+IU+g$i_UL;k|6Vu)XMulC^6@hldHo<7vNXK$t%{Z*>ECx34kP?9a6^njV>1R&j;G#x95O=@YYA_JVFd& zJC}OKW78j-j@Y!gN?CVo8nz4@FT%_4>TO=<_$K@Yehcrw@8Ny;2tL7&&uNb@Lf9XZ z!Gjg*&_o8&L{w$P!5e?tOe;MM;#;W+} zh?}_k-xykq3j{zkia|QIim*{l*~VU!PRbrn*@Kij9figCZIngi%qz%Sjo(JuMl}$N%8RugN0cAOm}iEb|roc!|9KMY6=_$&jBVn4iMR6TG{Dmyef(y_5kMWH@IRIE^H|MMPHAnOX0(#KMM4?RDT`b?$#A@cQw^uj8v zZ(%sui{^0;dci}ydJ(oV$wg98$E&b&^j&Fc*YTY0{uhJIA8jSO3 zG53-8-%FpIB;P)UPIm-94&lcE;$t6ix|e)=H#*2pviTkOu?+>_E^_Y846GaRWIf)l zrPS5*~#R#87+R$YELf5OJ{^U1~eJ)MoPj4fM%cG@(^!Q|w;HLLoNFIU{ON8k(@vj^@!t zRPIWGQe zXQm3CQRvZ8MM{wBKV z6f~hptRI_5jP~Ql1kTxB{OG|Cc06FKnJ67g`4ecBX`DR^_+%a4?N)>7Ue%kftImvH z92)thYDe)jq}x1+?FWOngoTp#!ggM-0@XP( zLSsh8t9?|e+D2uo#hl-uFfUi5d7T<8T2*J!t=iG-!5GaR49iujU~f>l)jcY;x~3BA zH&tTucNN>BKR^jQ!ccyHB$wnEF7>cEiYZ1I^$*0pH`6ZKq=(4!O~r}D$V7XQe7`A#q~8M zyZucmXfSuU$mRWH3b%LC$^E&stxhBN=lu*mpGcdGqn%p(xVCzXR2f=Kk&BZGTs)QU z5};hyaOJqN|ADnJnQmFia4S-}Tb0t>o0ZDCm}HMBO7dK!1kcTi_d23DuS<&cei^<{ zEILda&xkQNPu-7Udv`YdkV8Mf{7k0I_%VU@G1h?axJlNvaF4^wN;zmTS)Ojn@bp!h zSFlpOB9-Empk%LfCGp&tMDKFNqs7Geb}E+V#l-l{SCrpIMRH>*!v6t<2fV1TfWL73 zk0JuNLjh;0_Yv&w%qQ0>Bo+&p{^rpSOu>3mnO`N47Db~!;74|Vh0^@&lv&uYsSYaVx*9G%?KNz#9$l61vx4v$W2i} zzKRSARzz@=!h;hP7LuvZ&_acTRx3EHO+jIO3Jjku|L`^Pi`XaMh;#Ced=@^DPvj5s zWsj5}Yjf_Q&ik>swVWwk%`_#7GocOh}Ba0%M#M5aS{LSU>s2 zhRQcKMm}*V@{Y@q7g~%*e52eGy5*KIU9O2MvF?yRpBa znX$QTI5nHnO7745Rz7Drg^Ku22ENA8W+AkRPu6I;W!T9z!$~d~9&*m~mlOMx95dtO zkeM!f*2UOm)yOt`3`~+WNuyQvR$1nplts>U_^n3gd?`yb7%SLMeYdp`j~$Fnox~$d z8cWW_`(}LMXNZ)?cT#ApDEcgr{_!XpA*TXsIpjOYF5eY=WLpp{n}TRr7o^ClAWxQs zRkA2-lX>ApjVzq65rvy&Dm+GTUx7Dy{WthgW(Dl4#r|Dmh{tYppkCq;2B3@gjrdba znwG_P=w5}=E<7?yE>-0J6{FEs?9f!4WKr%Z^YQ?Vs)*2tibR=J5XcocTtQ=2On^DC z9uC1Jg8WtZEBpZLzo5>Wu(`62Ys5t2ae(m=jJ-HDd|uGS`G7A`_!)pd?z9cd6j?)J zp;3*tGBr3dh`9lSx-pnFFu*nt!2GVY8VIZgD$_6%))3(P;Q>Uhm*8`P4IgURB|n~P z1U8pVW^9^<7B!t~#8mo$_ho~$0lviJUkKmx>Oq5{NSkr2K^SL_eqqfCY!A-BMc)v% z$3#Lh5X56>G-F}U7z#8NRdnn|bUg>p6Y!rAaQM*5+TKZ=lh|E6i&&V=*aTyTXanBo z&)|IIZxIyXJB2>rySDgc0o*tNOB&4s1k!D&kmvLbM{jiWfLldt{A!9xo3V@=8|KL`ifVrBu*A2WL?oI+-7;lw{lW8dI{^bxwvw_Ft11A;B%nY4G-p|r2y zT!0*iU&%dqj_x$0Jy-yf$)7RaFUODN__3V!SdJgd@nd-aMAN}s{FYS!!L*bHUAmTw z!C@Nx8YjT-sQAAKLe^kV=7QC@L&$WlBShBI2OH5~HW35h0rp_A8GBTNNjn4kZH(8q zpkMA9&FA>B6+gD($F?AdhkV}C7F!9dyQu74dnkvv-256R!{34D>|=L2HoLLWK-opx z84K=a9nMZPs9m%_VPwufBZ=U`0teyH{|{78M`)8H)=bJAm=NH{abn|m3X}nDacmY~ z=;*!h4B!0}UFAE?q8{V1+le{-Gp#r-Z*o!>L zIe3ctpJolteYeXu%6<^|XSZ$wU*~A>2s{de7>V@HddRc9e%^w_2|r%;LJx^Svnb$3 zb{mQ0OpbKws}%k!ZSm?y>cDp8amE4a7(?BBsFOY2HS!YW7&adU!_E_6l>Gwy>NYQP zd<9;EH{drw)o$H5e~;sb^vx&u@n=6aG$gUGzk=t5^padI0V@6lhQ2UVh+pat#(EH2 zaD_mKfea{y8nRVmFG?p_(gYNZNo0;Q$kpePu`EWLSjoNn4d?~i$>#Qv!=6C%xPUhC zIJxvYWLN)0ni+l$si8v{YcOtrp+Wq&4~9RMFuWe+C-Rx^$T9!Lx`VICWB$rBC_d*| z7@v~Oe1ac;#E*~h<3lpZ_sRO+!;jyQW4=RXZTv3EZ^-uFpfO)1n|%q74DG{M{qi&z z8pK5~)V)%C1EBXh4r>LmwDN*%Oeb1}S?wT{MTf zEkZ9?rKiYHpCGroL6-gq!Fr8%S192!>dGZ78v2K^2IC|c`$6vb@Ev{fclZR}XWhZu z+~ayiQpGDb4v}_f6L<1q&M%kTc1mh`tfU>7i_G~iS#aLa%94#C}et zdpV`9=s1CMjG7(6qOteHSc_pi55v$R3?IIMzkm@&Mrggr8jPpV#vY?B9##N0q9{L^ z-pxiyD5mU6O0K7Dev^d!@;DjiQL@k@6;{X}?KJv`nC75c*p6q;p}x`Y}oBC-}@*I0XDFc1EO&0m9Y z+8L+(Mc~Y(ZsrV$c9h^sd--7}3>&eOnu`B9Xc0xUcsWs6i;X61bYP?(y|BO!X6VZJ07Lz#|j~RNF;ly|b$T@TJKI}7OP)~+veE!M9kfCZHU@}P_JQD4A0ySiH~WbE46O7t8>Nqdl1dy#K`20y^?Zouo_=nny0#)8N*z}N$`5`PxB zGWDQcCRuY$8)=122kg0F%NHxb*x?zJoU`@#QHLKj_)(3ONQBnC_!iI~;34eZ%cbo=B$s+v%j+e<)Sn@KGVL?oiMgHa z2sMx9`46KURWr(6Rpx%GFb`3=d6Y^m5>#Tr9*ohsDjZ#`g3;B=vuswbCBOe*HAz`k z^Oa$}LFv{9lxlrmDK@P5vSA0F?RS6%0T-}&oXgstc%~MxGLANgVonjtl#uorN1L?J zUp4mTDzmXwv5k`oZTRg68-L~5geu22TG_Tq%CgN+hHbvm?aGyA*Ps-%m}I*FCECwb zg8e$hIqX-g!#Tw`JgaEOKPkrXdtmiYuiRAt%#6c?4E{=0~KP8&ECH{2;G99PC z%88Q#M@zIEdu2MfDBZzZX%2x(VQoyZBl{m5+5g~}tpvwn#W~d~)~Q`FPJN1U9#W+9 zYK6P(Q5b7tLS3Iyi0j7+b^BgnXfa`~4}?VSgB6;;>oS9(uQdLcceKoXKrLJNTq2)*~-A@mk{=)Fi6 zq&I1TpcJJkSU{8_A_9sEiVD7nhz%9>+~1reklAz4@B5y6ukXEY!t-a&nOSG8wbx#2 z&t7Zx%!ElfztWQAS5^{P8&lT5ij)bcB?;7G;#n6H7ua1&2Mv{&peYi?q%1OcyF>)P zD&ZkFBqHP&i45WD0N6sipFy_eUZ6~-nlP*)wGr#1N-!hr`MFKX!N2-L|P}gyl zq#!RT8{{Wtf#@-C%_ut5?QHc3LmmPknW zRtXM2CP5LONN~i@5)#4MgK%>HD)<6&=Vx)*uQt8_2G=4T$RIUAk7i8KbMTQG_-m!o zTyv%t6CdUwv0*+E6BZ~@VPT-OM206yM0j}#kEkwT5p^XrvYCWLb(Y|$0TL8FK?0%| ziGTDK@ryYkzA+!M{Ym^|SbG50BWoG*=hk6N$RY0rgZSK&eZ2COXTLJf#8w%p@y zfxd}kT}-KX-oY8~BSG;&5)dCD{t2<-$3fqObnz)u3p5gMo*!%~+gCh!afWB&eDO%y zAnr*A#VzSww)gnXtc|rft7!8gWX^0%3evP-9g23Dx@#lm+V!YKalAHqR;9mEut_X7 z2}^aLp5rclNj!s+X9)Wwhl+P{jF^(kidVS`;#sbSc%;+^ZNxpbm$;_piA(AnaZXz+ zj%hD~GvJOmrT#9?sjTUw{ZBO^_isV^(~6jAO-!^TCYs}q=*6p$r4}}+gr6j0lW1%b zQrSj)D!PbQ1yAv;;42;#f&uqnR7?=JifQ6nsk%5<$`PkZEyW?dyV$1>7u)n11&IUC zs*&{5;5PVGY$`FALH;~sPHju-)t;E>Kuoah)Q0*4`|CBs7tkdQUE{GyIQ5Sb3`AKjK`~sLu zA!iOUCwHb6)rFYgC%xEqJ2WHd2>`q`Hm;{!?@Hy}XwV1EKuYmh6kTVOp6MGb_LmAkcnCL}J z@Vx=L)aXd=2~9k938%08(Z`eKxioPQ+a_)}ttaqh5mgZ2gvb3s(x^PZff}K3qjsPl zjx_-+hUs>2f_lss;1^3BW_~w(0lDKL)1HUYe*pf-ws~LFW`AXLNrEN{eS&C`iN4~N zRk3SlhoUajGCT+%Y|#c=w8j>#u|?}-P!$kxtx>!+j9QHWPvK&UTp2zRn={fD;l=3_xB!hqd?e3>*y^+h%!m*+^mm zBycPYy?lpK1A^8bkk%AnkSzn6Q^A^)fu3y9gXg5kfM|d%1`tI38vr`AKaI+rPSCBP zeshq0vnPX$Hc*INy0{Irnh@)?iFz_Y~Bdidm-72dJ7cgL>#pd;s_Z z4`2tFP&1$cOlpDl`4Q~-jDO5Vk2&ZuClr(cRoKTMGaHARg<)sm6En9nSvrX$e#4}d zyBCl*0N$P9+hiU!m<5al3yH%;j03=9F>7F!AZsaD255$*9IgueXcq=nV~f?f?5B~dHb8R(`p@C@iws6?H&knmtSn?yfOlzl1w4cNRon+d_mLh@ zKLMM-vtVnnt@e2vplO!GWGCC_u*q(B>LKW{Ckj*m^#HcmgVXIn(4IZiOwK`fPkPYC zRR0kFStdywtR;1IywNg)dBAvPo~_{d3r z5KG;nDoJEBfGtkV1e?Jz@Ts(+t@UVU6?T=OJ=!~wJ$Aw8dGI38)$T=UM?Inod1^0e1pQerFqDiXkJ`ipY7-XpJim(27W&if7ybeasd;GHNpJojCm{cx5{=56#ig+1OxkJria zULy-TPPX|9dK^WM!(_xSlj|O!3a}4{+RL}+sb=iLa?c@gCn#Ejv9g#BKO+BYt@{91 zIH_l8prP zdID-KRI6~HmE_#oy(dqDr9fMQF%SHX%`4D^^rt~&*LOd?y)=i7Lw$HGN>bIeHc-4U$%7>?Xwg&4wClm9j#QZ1pV4KbU* zDr~U~zgS3&%;O%V+2s8*$hW7G=T1hCiRdu_KOINBj3wV5O%|MouZ}>E;rPc;qzr;W zyE`VA7W5@X`XHqj6}Fy4SPx_u;hYWf9jL#6t+oZr@|L5+Q(zW;F_pYOpS*u8c|T99 zM@AXArxFS2v^ImfKo${N4>^sI(~|l?2jp~ze}A;mR)$Q)V-~Y_6JC4(54u3_eGdwr zf~;*{V+h&DX=a-j^%u5F8QSKe&op#kB`$K^kZGb#0munMP7E^2A|n+^>C_^sBPROXAf zF~mC8FR#u+<$BF`6D!AOaqO|i5m5pUofnCOv1ztq7$cp4_Mcn1aAW}kpar6P0%;!<;8&rxZ4yT*W*rc_)jWl(1m4>!n$n!&1 z2+|^H6VHG^k4&31$*`#`)onASsx9|m*fo)KyS7rvzK2v`;+5twNm3maONzr5Np?IW zNsgB!$>}>ua;72$=x@q$v{(_XS%-6Ye= zB-I`LrK(dYsp1qR>CU`=(m6>gIP?Aom#UKHk|ilF4WyiFD@k(gDv54`q>Nj>B)Bh< zIQPvG%M0R4yT30n9^Xl6YA~_jS>!*HLL8K5S_8(Vk>`|Sx>p7pV3St#Pb2y(8$Zb~ zIZ7oDcWOD*AKU{Z#XVHYc|=RHM}j1Jl#{YNKevoW4T<;6kvM8Gr9C@KjMo5(_L?A( zriBv0nwT)t%Mxn3Bw^m)1J)jZO~`+`0=}3|9jG!>KDLAT-i!T3Ye4GLH#M0e^4JIJ zEY8$(Jf*D3M-ogy5^oBZSk}grHu3%kZ{Gjl&HEpy#YFlvm2jVq66TvLrF_Rqi0^y} z^4lnZeg`GM?>!0dzb}E*V1oR}{gJl}*>kFKDXu!xO)#(u)Aow=19~*2Z|dSFHKMrA zg})|Ki;44fml$7@MEm+nq;H5s_(n;%U%Z6*rAVlMWhv#KB_RQgBsid*1P1nzfWXn> z$Hh6{p!MPtv|qe~&x3p76U^EJu$Fc&LGG+f=J++4?gGpEXqfi2sYD8uNfp$1MD$D*l>=k5Fci z$Vf*C33HdgP@caK>Mwp_r9ia!hLsT?YBAms8Dfga5mRI<@r>*t9#O-^J!+=7MXwUq z=sn^R{TAT)3(-7(A&T4|S=#+b({u30`dpp`{rKD&eOjVh18h`_zNtdrq+pYHY!V)C zM-9hSe4;$XJIYr~(ZL{6yrOvzXH1HCl&&i7rR#`WY;$pq?J6#DL&Yg>nmER-5c{~@ zVi)%&+b_Wb_Hiz!&D#A)QyY?!G$tlMpGKr(=+YcL>Sf`N=vf(Ecq}jX`LOmJoA{P> z60Zag@rd^pkAwhmPY4q?jO5DuwOz_o7N@ec#j$Ktu}|zQc8P<;CUG(a49-M6t40!E z12@6XbS`rV+N|A=G`Sfu(Sn!&y_%B_Hep;quX^a0i7pjtG3U*oAFxRvHt|k#5cd>! zaVcjKr*i({m{Lj{Qewm|B~ffsaimm?m`a0qR*ad*hrLr3}7B8_$_ob%7Ad*fH zr{gr~I8fypfFD+F2l|0=U=hc*f>*!|k_mL64x?R)o`Kx_j*O3;hzZagv_pMfwWtbz1Y`srK32haDR|w1^@=u%s4<$XX4PA5NFc3%-&!$0loli zVX!+2KBWeO4%A`hq2{#C_+vNxu?H~$v|C47ccmZDr5eX6ZAmP;ltN$1z~YWRjt%W_ zSQiFy4+u@bk7PLnM1v$y8K7)V3(y^djs$Z_rZ$2@1pLP&_vlcY`a>6dp=Uu_JOq>V z%f%o2(ht3eNpz`*E@jX$0z3Jmk0-WqqG{5~hQM*8p5X?tg|;T88MbH^4&nj*(F~tx zhSE(D(sU@8Nm9EG10UoBaFxJEhlV_Zq7VJhpYd@J{x}3*7>o@D(k%8>L#JepM`Ow$ z^fb{N7wpBvr2t*9MQ3M}cE^dm0DjQ{zvzH1IwS-7qXQk-zCD0xyQyFmwVQnmfEN+; zBW+=Ri|ld37#}fJF6cM{HHXv9>?@DzvFH^7jW;Y@1|VAl`dNU@xS^Mt272^GkG|;9 zhd${O3a~}*ih%y;-3ri{UK42Ka!v%#<86SOwMqT-s6#*~qpyXch@k!CH}L*=_~f=$jq%%MN^F zM>6Y1YJs+3D4=0G=#TAhLHCVxqixNRn}xgz$ckBud}O(9fRDy|i^dP|6H6Ur58J(k zc!8~keSl`k0q_!d85{wxpvQ6aI36K#q5^0Du*C^F{sfIW0panh(u#K0;cF#i#vw0& zHn?r)_s@anfhI%aukk(#=temKj)T|0>)=gr3Y-S-fV1FT@ILqmJ+AqZP{#7Skm@Wd zXhmX8uw7rkzMVi_2cp#uzaeCi%E$R3`O8+76)bkIj+kROn}U!r(exnOWd`{)nse; zpgawV)?oZzDElT`qJ=t+_C8imWcec}92xP{j#6>GD)6p_Yv#baF}zz)BWOz)bi#Fe zAgv!^GlWckG%}_TP7COfRn!c25YE~gYp;{XKLiE$pA@aZ_yw6?gO7pcN}8J;0{gJV zZfvoG9CIsq|7QARJvE`VWNoXk2Tu{89Sz~%9R6+K-;v>?JD$*wy2McG5@X5RXYg+c z!`MbN+D9*)#bfT11sA++xo8c>SIE=cS95F4?Oz01SZu-;Yw(MeMDt}usqhT!l|ML8EtK()kj7X zWVE72&;c1e&|&~}f;=K;D&Llng>S}#Ut(3l2lU(nV8(I9u-zPhGsL4*ikMC{mkI zr#A96?DJ-L_v4hn>2@K**$lokm*GY!jCqEASZ&h@ElC^XiU5` zLXQU6qdpvS_`fb%WTTfq&>wo1COdsPgld^t1?zpVZ_r`!D-kXNFS8qR2D}4 zA(Yc9r@B$-Gl*fnH+Jdb$h1csk?W4#xCexIt%n|UC6YG9AvFm}nkiFj~XqA7vM zj^_jvMSmyW2_)Cl{$|=PBtn zK2pIZP||EdCDkTcQf%WT*)~~{>?%rGyXsQLK3fv(n@F5Pdx>@EBQcJ7674ukA|0u8 zI_;5gr&AKY zuW5$(uqMXav`0)P-pp;f4Ic8H+#h)hE8z=O@P(>OIobA0#}~^p?MA@?;~OE~e%$}y$Ndj}Rm97mcVP1DUXOsz;vP6i z+yWt z<>;Gu`X&OK1co?^PoRgG0=>m6FhD$mLP3mpP>XSAU5r~uEpaW?SX@eV6sOSs;t-lI z_NNsxHbWGwN<|l4pA>tYyEzaR(#VMkKI7ViQePlzii)tq}QT>=Gj{^(A zvw+EF)HU!uM*UM3)Bah=)t01d>re)Qo=iR4=a3%OCQZdAHPEvXx+LPW(Zo(LHt|WY zr{3Z!E-{|s7~>-jF~MSAI#O&)Cx}ffP8Q3LVqqHB2J``Azyh!t90nh9935i+l!eHj zfy@a_7@wOFm!Nx7#->Kh_hC_o;~D6ZhE3wpBOL$n$0lCo?8G(6RU8sM#ipz`&Kdw1 z@X8X@i71+=Db0_QT7jNm6qv`pjo@W)8CrCp4x_yvX&N%ew`6>5Ohk=kd~?tpi~zGa zwjLZLk+}rWfjW%kekA0MZAVOWAZ-O*K^wj|K$lGHQob?qfK9?_7Va*d^q+GLTh^O6 zQ^Rm4LH8nfeLxV11Z6-)0AW=cQnd>h${;rrtff)=!9^S$9jL?1rn@G0EL?}ujkp9| zK+DebLwkP0@#L2H0lEaCiwS>mMIXBy8=Qp`S{+x6g)M4hi!6T-2I!A0d?Jeu%;E=G z2*?^lkWXd6T?JmGaTjnjbjV=+1G4fl)yQ7NL?7a^FEP=Zb3_lwx?+P)xDR#-ZA%|u zFL(5@r=K`^yYoyjCdQ3T7?Gfu$4}b}{(8Lp$!#4%R;)TU&=RY&gCE+7CnRp_phe z?t)Iy91lb%PxN#|FD5mQf&NUc0DAO5j~?_%cl78U2%>QAav%dV!O3Vu*E~A@nCIgaG_vP$iIq0E{+pFu^~I8qOvH@KsJsw`usFGHyIJ$fpUQ)kOLLq|r%K({Yy>gb->b&SpFY-oR-N{58x4^S}atIW5pWtFRoSZx*^2Oi&lmCkx91Y_Sk$ zdJ5w{wV2;M&&26GJb#c}WOhVGqxtv%zfWGoH~@+F667yM#xn3USP52vHDGPww&=6= zeH~a|hz)Ev5i^_7V{-sWOB|>Mu*H^K_91lhW^e*J?xa9&6J*vzR_Y2)BgpYtjeL#2 z#&;utoP@QY4)PpZ+}cu~(6+k)O|o2|-plqyun!ynFWay-!~@_PN0LD{z!pbvzN5qWFDVyN{8JN6ab4--iylE~Fi?P_FA)(6zH zWJq_Iw|~tbaf`Y8m*i77$?HBNtGv$q`4gP=8d;ilFUtEE@?DH}4n@z%4b*x_{h;7E z9NN7gS`A_gP<8knoqi^#(cX!2m-+kG%)-B5rhc7S{71~}KOl#BkL>1M^f<>rb%uQE zZE~5n$i_|*sBe(%o*-~u#et9FNQaQ}5*>2@>U~`2*4BUQE|jg^3sQUy#&5{J4{n1S zn!Nu_@`n>-6tAGi5%ef{CTJ^ow}*EZc=yC5`on)HT{xDkZW?uu zg}Biga`;{3vL~?0SA65SSwOo7MXN(-YcMoB{9GveW41&K-DyJ?yCKV)HiRG}8vcp! zt_bhy)FZNJT|Ia=hIb3Xp)H}(8Qwh!9c`t-Ff1^h+nXh&W6 zH-vw4__tvg=>-2C@b8ZmM$jYL(=g_fd9TJYyYQAbsRrDkCSkTfTZb_OTu0tna2#mi zq=nliuv%=9=Y}j3!%!gn!>M$|QHw~XqM$tsqbmGs!oLnRj|RwSij3CC=m`IwSVDUm z=P2@K?JX%wvCbB11&8opZ8gzDC<>l~(VeX(UkfoU{I&vZNFZ`8!{01yF_jp}XD!HB z^8Qiy#z^!SjvhntkHO@Z1Icp-koo4~BmK~$4|?>%9zBrO4K1{%ly;)mJKzcJkkS?} z*4ALOMs^WiMcxZQ8xpi(km1TQG*4wa4qJ?*7BiH*e;|F*58vn`k;sTcMiMg06BU*5 zgbd{HJ`jAQ9eVIK5cqec?vYC`<*{cvo}*Q-c3`m+XnPyVf+qlJ+XF6?g54N`-I4Fg z9NVGbbTk?rhJsvbF}=w9$(E4gPZWe8BMKRD)I5^#$TZ}nBPWB}Ky9*q?LLs^{NJ8h zL{EBZ2zw`5yr(9eJ_C&B`!Mv$^55EG+QmH}=us0rYM@63 zdQ`(6tb9OLWn^U{D+gK4*wcYphxP=MkxSR79%-@ zMqvXmDuO8oy7WR99^=omRXL|RF|T0Uszsb+;2TxYBb`{ONX%A1k2ES9sYofuw`4R* z#3E&}MFQ5-JSUbYDUEk)Pr8X>gpQ;N8p-GzK@~g#9o9v2DI*R)NMM=~&%830X%71= zYe0gS?)ftP#3nWInJNxW$aP1i33KkKqEDC7Oh>UvBW#lGM;d|O zROC4=$u2HZ#=%44>`YSH##f?kgC)u~Tq12tOSl~x*_D@4_SGcBp^gMQG?PHb&Jy4_ zK>VEY#n)-E_&9A9Z>OVTa=Ioa=kLWt9cCpm7o<=JN@H489$#eJyBt%_viJcuX+S^K zDuoT`n>75S47HdTcNd9p;rWlOjVa~C^B0^$B*-~Z0$t)Iz@?n{yH*xo*DUdIYa-rm z9mV8EbDQGXgcVK^W^s7fdW#T94 zVdUNPO+0-QX>y>>LjA#=`U7iYeBA@YhqW=@9^C)HeM(-`Vmv)-iic-IarbI3ZeIPw z)ih3=O;3pvYhoNths1$3F%G7CeCE18^5#@#OsGcQU7e{f+a6Vj2XtwgN*aHB|KP>_4<_z^Fhz>HiTfW|7vt)kA6;{M z5{tivhjYy-#8yoHF5>R%DXzXg;=+AO&VCW%Ryb#YkXJ62&|) z7i<6r*>?fl1zby@-7}Cop*As*&A0%%@wqMg8lzh_HmY8cd;^;#VUyBjnDgVWz7e+K z!Lxf^f<44B*jwy*W{fS*jIjyfUwv z-x&-B(*S42xEG*53-E{1f5;r(i2SkZmi}Z^-^Ebg6_*644_n0~_Ec zCYtD4-bUs&Jy6sO$E80i z(jOJ^iHhX_L>21+m{!aM6Y12aNhF?Uuz3fdLn`+b0q6=&?iDFGwdF^FS}Ol(mDThzc7T0u91{>VrKl>x*V2+8O}gT~Xb zOTjLZ%G0#rE^VOxfUI%2%YU@yj4COol+6<;ie6NZ=NgRvp zM?DlBJJ8)yz{g9;Of0}#Y#&vD2H zOmGXpKGK&0@LQ&7oN$Z7hVRY*J;nrqxPp`6=mrEIh8qQO-ZC1#mx1|x9Pxn+pM*-N z)_MxAGmSWyjt|UWxCNekwr5My!aX!#7MKI(7H(&=RiPcXzzI5hL%T>eH;H}NVh+wd z2V>5L!L04@d<)+9q%Zv2!Mi@ct2&Q3T)+_i6t!SrzYuIDfoGv;^0oj?p2l0_tMS|q4g%bXWVm2^i0u(@6yWNXr13Sj zZ-TeL8F0>>WIBYkA^7LJ^?8mewzx0_An=05K>ufBXYb(X?BeR?;pye=nlx?Jyk)D_ZQ8Z( z(5Z8muHAd|?A51lzy1RT4jM9a*zl2gqsELKH$H#jq$yLUO`kby_MCb17c5+~c*(Nm zPp^1p)#|nD)^FIfdCRlgw(odu*Y4-{zVPDy0|#F^boj{8S6)5-+Usw;dFrjxZ=X4P z?%fOTUA*+c{ORWhzy9{{_dm`5)jxgf z8GlRK|DV*2*iVeL$C-= zgcspP_z{kzd6LDI3Vca%CW|lON{yRnLn$L^$Ie~5_w3y_cfjDG!}CUu%bzrL`mDM0 z7cO4<^vYFh)@|6dWh=v_Hd^l6k5gEDLUD-#pRhQE<`uXFet~1)8Mp?%@#R-|$L+82 zk2~MsA>ZD+_Z?31{SQCvfc%6$3|6@)@$xOk>LI1lNuy7UH#3I2(Dgw zXaA%=gSY-ecuxZ-p#Az##LUFYVrW=tJuSH&#CN>X+6J74E(JSA*AI{<7p? zi3R?hEpX?|l|>~6)+ z`3oF;*($-qDVF6h3;{i?k3knZ+FBT}BGo#pX;51686!R8?--fjllf%vjejdYWAA^^!6hkStT%=KiIc>-`OvId&1F@-6aUT+=}dp04j&r}%Jc)q>c{c+NV(`o@K^7qi$Z z!N)^cOKbTvO|*KF-pd->!oQ*4T4OQ8x9GU^{>lo$H!U6=mv*08AvnaU;-bI$)<%## zzoNt3ieSYMzi!qZ=vep{V=Y;`zNT|h5rM}MD>|6(y*RN=)o5=g8(Z(_C(XsKAK$v_ znbjM2zw)Fp$Qt94eI*w7D+}Cwb={QwhfkvT{^c3%QavSX_Y(*%&TCu{JtKawN^x1c zpV0Ex$ZCDywoa|-v96mD-23@{byR>-le0~ z&WZ)bxOHgJfJQ0)L!V%pFE;h7=%jgNhSl&lU!GRI&}V&CT4Ql{W?}jOl~ozrY8U>V zO><*pnZN1(__$QIqQrjg<9YNK{gP1$?dGvZe)+pc>TWilUe>2ZFkTh5-l}D;E!5thXZagq zjo_O0n5x3Ry2jI@*{nTZv*<7SfRV-Pb&8I%wX-q?P1ME3WCMp=MH8F?K8DaUb6;Tb zzxD1&06TOM_PvDXxWW4goK9IIA2?kEIauPYqP3W4ikmEiI0 z!$RtMP$l4A&EkI^1`apYE*RFPc7?c7eqJ7)J|S_(RSuT#zkYG`;`y^@&c1v3#_v`M zN_PG~XMsEKAG`S^r7!NB*{6PzkIhW01ow9h&+sa6t7bQ>P<$}?QT0$FtmM@V@_&ngA?V;BD`_lZhSrTAeAscNz2RsC_XSLZ0&oz=R( zt%?ZkmS)}GGKJt{-BG?wQwY@XH%>u{M(H9>hWSCUd76c$yDDOQ(N7DvT~vbi6awQM z<#8%#A?>SjFu%Qf)!d1rhY#xCuW!GB!^Rt@VuoLo?D)60z~?8QTQaW480&g|vTaQB zBujdo_NEnz8%wo^OcnkuG7?-IR(yOg_^kQcp@pMQ0gK9?t1MQ87IC+1{Z6O^wRFdM zR)IpWM>#I)Ahlj47@=@b&;HU)A^1q;&_p5lO(m$V5STp`in6~d6crSLQtBk8c$r3{ z^-?D>ixduy7Zec}6asliAu#I-HBhTVt$B*V;*84SL4b{egI&Rt(8kdt$UtEJ{my4s z-_;(gaPiWWk3avH%An-?KVpIVADw!F-211t%*bt0K1lqn)*lb&=L8iw#c(5wEBVFC z2h|O;nCrb=ihiwa-3_ziJiFq@dAzM6<7Bam6BbG}!C0mLJx^g#IA{+y(fy_Vp{KZ_ zJIa^fTY7?@RRRY!n^VJIUej|h7q^my+g^o$r~K&u)O=7^hS5kDDjf7xRtTmj1XuL} zc$AMqVAiK{JX}a6$X8hOR|&2>x)xnPX_y9?l{CmWi7_|N$Z6WTLzf=Ca|aC@JxLwz zRf#3v|BqYX-Ummw%pcj&>g9)DjxX$;6=k_bDlLr(>4SA`9}OSO?bWa_e>o^X=Td6; zn|tXFb0Jn~tx{c}I-n@ANns$~AL?1WqY}tbJ;ApIf=zk?v+nYRMmlRtTD? z1icl4p(;U3gt_ z5SZ0@(3dK~+xi)$7%v8|D^{{{@(etwa$vE_;bX78b^e2EpM7=r$A`wlC42t+7Wn1* zyVig7&8e+(2Dh#ryiTmA_~7ES^;k8`emCVj#%Ub1)PKBupWwYG83& zPtn~#pj^S2X&`u6&p~^%r2bDSw4Bi&<(sl#?Vf(72EjzV2y>>&Vvs`6UnS_M5EzoV z1`5GNRf(z!fw{dRql`+hSs@5k34Us;r?6KE%tw`UPQ7AfHt*7TrKlq*9N(^|NpbRx zESK3bcg9_VjFKIy1%CYCZFQIB`)?OtSURRl?F44w4UGgRC)Ry@)n2@>Dim)v*LSvT z&=YJn%poXTXlNdF95C?_BZdC_S2tJKgDZW=II;#ZN6#^cbtN%Nz z5ZrF4KQ78pYc_9GMk(X`V0_^Tt&o$I;{EvInUX_eMF;Kc=61PxQ=VG8>(~XQUS);0 zWS3@vyEm=b;o_F@?W>iE2@UXcaMK+%V4dJg-F_CMN&>ItsdJvvhO!B$UTE~^rV zl35$IRQxv2z~Pc!#u@{`2|dA^mbG@psh-ypd}kP>R_h7O{fhru`>ZAyTU3G(3Wu9& znWVEqU|xcw75`HwjOIJtiXUcmkjlaQP9r^qfxvu*Ovyc{bXv`(-Sg&ef6GWWL4s=-SP z1WIoIvVlO!?LUoDDO%}uFstWwsi_c*Q8}oyoZU+OK>`#Gx9X}a6aw?SzazM;){FJM z1;6d(^gKRLWtg9NG+;XhCD-kK!$4r((JOE2qP4q@p8Nba&(kL*FC*( z?u;qY>Jws( ze4tK*xubzaQ$4{f1A#Kt+G8LH*K@d3EE6oCb=4F6R>QzTPhjrzcLYXLDIAU&M#_77 ziu+Af8EVoxUnMxM=U_go&PvrQR^}y&7ULBT=I`@#KIPp_B{1L4^NmQVQnyvl5i{2u zxTs#~v5;T6{iES$B|H8p3p{-A!~Ji+;j#Sp^%{J*Z`JH^gL`*r+oDlURz{Wb$q6y~ z0z{+O;>oDR`IhQf%uyYZcNXagt{4ew=m?C}1GPFZ$jCy^!C2!^ssnEuYB{36T6DjX zLB@5xj7|murJg*^K%i6yb}Dm!!-}`}mExjtO^kYpRMhgpGId7SRo@EpIdy!fr4X2B zoAeD)(m}KNs;Y&`!F;@69Ph@Zn@0Y6{({xpUpjgDwqg2WU{DqI}LGNzbHywew zqLDx!yjBvai#irB7&+)CF`tz-P^{2#Fc-U(zK}FuPhhO}=*L0xMuUhJdJb>Js1zA` zf^U_~pVv0(3CxB%ib)~ZqSPlO z3#)D_RG#=IV-1*h|{*t}_pIP9}*~7axty(gF)>JJC8kpO+Z?1k8 z)1z+rm>{i!`}l9Y&IXenA1L~yucN$CwEOY4rH;eH%8!o}ek!jcFb^;gD9Qf{1A#Kk z9apYbL$4R{l`&b<6POKGSrrcE#g9r+McODF4kr~IDB3F1_}hl7C?6^;mK0m|XJK+o zA$Ug}AJ!=Z=4Z+kN#|9HWpeuJ=cG`7J2}ifB%yLnyZ)1&-h1|2_4j}nf zyOl#FyZ-Ym@W)Tz-TCVC>({Pcy7=COi&t*wEO37Py!_$)dvt2sqH+D~%xdZ7Q_AV< zC>LYB?H+q%!6zLDvoSQ4(3`0KUXAV-42hPKqI^=-z~W=Q5ELnMCR)_JZxzo7_1{O zPcaZE8OTn<6r`G-!v(`Q7o#WmDOZ)@t|u@r@>3}kfrPU+P+NH1|0`YS^2`byXvnCzVBau;N+D% z){m9!EwRA=bqoA<@78A@e|Y)Q#S0fMy!ZZxH|{*pt8i%Eq%p$><@WB;wNr=o?b>zd z+Gn_);CNa@fXP)?uhkRWZCfm^3O*{!*{4}5z{RP~>RIeETm#v!C%D+I_^-9kYxD#U z@|6o1qO6%SZ!SJP5OGLcuIFGr)1znzWg%5k^U>?yQwV-~D%GNeM@;p0qgPpb1p6Dk zg34BJ+%4~^?Wb-jKQ8>bWAcg}NB?pGr|~Rcom0u)5)1rWSm2ZMCyyUH{L+E_FYeoS z@bHPZKTzk+D+l%N+M!L0W{n%xua{H5QS%OcmE{TB<2)ZtUkc*9t-fXFn-}bU{J)Z- z{5Vc6q(tdCm^YMv{A0@}Ple!3-Ewv3w2m_rg0Ck>6tQrOZ!~=M>t8Db=IxdlhHYR< z!`yj$KCn`Se1BAUT$P4hM=jcQ<_Q&uJtJm5v-|aH`b%9pIoF@?OC3?kml6y7e`JB< z`<~mjWz)tD8#Zm(w)?<|bDyYl`*|H(HEUEqr%rb5tlHW2ns(?vQEC19a=J@J1hPW# zhBAxs&r%3(O(>RNxK!*u|A0bZ-cYk}e6WvgIDFOX#w_4eU+pTYZ`nFMr*A)JZ2{Gt zg_ROBTjWk%ckEM@dExh0M^9e9^RXj>2taxV4hG%!b^!j_Bsh1c3T-dx}y}EVk)Tx_OuThJRxzl&5#@m+l z=s6Tl>Ae;m`Aj7^)u*7;Vjtgl~69rIx~K26C;z-CeG2(Nl8sl-RBJ}bhJng2=6rBk=hB}ryM&XfZDegW_zB>PVnD{@FUvG}a}`tf=U; zNFALC^T)LxMUG#@IDa1Jm09U2<2-TEIM3g^&0)(pZ_=4?G-jN){V%*#XF|B}KjaSE z_HNOc*!5dy;;zPV-l{YCN9}*-Z93DoMg0fIh9{-#%t#n7lgo=rN{-T*@vCyYd}>ru z)Hwg%G|uV&bv@&xqqE63IxXEeCG~hrTw?SlERKH(Z(^yQ;k!j!xN1& z(xU%U`M*b-b$oTAb<%Xwb&PdVbW(MW>%{29=_HQJHjPV?$J76jlctj~E=|`7*NOPg zJmYb#@LA*Q>KE@11!xa<1l|hz6s#ArA=K$;L>MJp8F4c5 zX4J=+S+Q%5?TrhG&q)v^)+b#}ew#A;#O74@wB&S7Mqy@4)}`!6IbR4fiKZkgvM1#j zl}eKVb@YqiDD(?9KsGSlv%FYtI>WPwxKM^XUAQ-i{09eUgjdrIgFT{VrE5 z29{r)diCA4Ti2Tg`8Se>4%}RM^Y^VAx6AKPhkfs^z5D%M-+k$W*oT&nCOo?GSUwUx zx@Gj&lb)yCXGg|XkG*?d^CIEpnwO7XDPQ})(SLK{E%n{bcW>Sod3?hg9sc)ky#MD+G@Z0_vd5ITsqnPY z>3uU^&eWZ?VfI12M19Vj#<_P5Ce1fnaKsQ@*tY1+5`(3?mW3{djH*`LG|@5LxYB$8F4w+% z?@fna`%N63of4h-E-eS{x$3yBa(DKKKg9AZ^Xl<_WWzTg+3 zGmfqfvkgBSks8U0Dvj=rc^vyUZdtr_!jVK`k~;a+@xhdrsT0$drdwusW+r4I*+n_s zxg*4hBvbM}N;s8369cvMOW-rcM0h!}i|LIfVK6HXZ(|Q~Uh#DJ`T`^2CXtocN#ZRH zksXt#DhNtY#Zn733T=K~X?{&XLt%MQUa_o%Tgog0%83f;*{ z8k3riH>b5^wx*p-X-jHP=!iRY>~u_LbXWA5=(Ex1qPwGeqR&V7#$1T*i@q3jDdKWi zf5??X19n%AD?tQg`PP)`#aY9JoUw<7f)ZRUirS(eckmY{q4H9ci*wz+q{4CLHW`3QxrXY#Mbzs~%Q`7`ZL;a}UoL;vFcv;XH#TsdjSWVb0% zQ_0ih)0<`t=suq{VfGR|OMUM-DRa5=>I`no|8BT^;oe2zi-9EtOM8|*H=4P^+}Ot? z+f==>Yt^GQ6V|R==eRz01G=$l)3wcC&6jSm-5R!yZc)7bqUGD2^LFjB3fWD!F57e2 zX3TDqz0uy?4u1Pi>}NR^JDqX9>+<>FY}b`;yWKrKVh@o#1zzReT|T#b-}+AtFgk1z zxIf76NK9~c2o|b7S|8RMemCMn)U;@$7|U4KW07&W@%)5}#Ga%F$v;x&pV*S>mKK-J z7#|1DWZlpHk-LDfjd+NZNM=%sspn{qfIr}3Xa~a^PC^i70osAxWW8liSeNF*+iv`YJAcjQkLW6Bq*SL(N#_u7wnpYy*Kd@uZ2^tBO?B<+>Gml?GLd zs*P$^)vm4ESa057(YUM0rrDvzxz+uoPg_uXSV!!s=Q5+8bR}&d_G-{Ix9c{88*eNgnlkkM=J2hvw@dHf!zb?g+}nPC{{4>+ zt~@MzM1SlvVlwh}^yCxzQ`cwnp4}c(K0o@xVF^kqw`1eclY1He{uiW|5GRG zO>wy<_l@8SnbzAT%) ze3j9j6~4wvCXlIWW%H^ltDmj;yUuXE*#^gr!Q-<4(@bmLyyf!N$J@SbpKWQnW7kgi zU6EGVyK(FMJuNnuZ6Dix-8<7^#lG$PT^xg*Qk>~7k^|)jPq|)o8+HGBXu9W8ug%`} zKHk33ep&v^0OjH8z%xOEN1g_M3)MZkEX*w2KEgXPDk>uyj*-QdA8U{6kAINxE=ebO z?s3x;ixc}&ebb`S(=)(KVOBwQL(bXU>x9R|_oROmJ?e7WCSW)H02lzpFw$WF;WE`| zCDz8efZt?~a^7?Q@O1?Xg{wqc#Cs$zQXg5UJWi3OB&lFES0mNt<(1{v7PJ(eDmqtu zq2zMu)v_Dqw<_*b-mSV;N^rM3(0-5s5$+D|uk z)^}B&DLI>WPSGvu;haZ$sTXqkPFy^8DeSU;zxx%3ft^=3U0ZQ|-r(fHFE^eJ-MD$~ zR_$%o9p-TQ-Ozi^_st&|KKT7`weu{{of~CujiO zz$F4m0|l#uPzWmeBPVwu_g)<*-$ab=%p<>r51&UU%NWfKRTN*wLM?+LfvQ;plowE3 z_9Uts6cnCG9fDNOU9<)UchwKj1iSd_1*x5P?L`aeDcAg#J9#cUqx&A&71ya>NyagT zW-WPme9kVUSTYZ0{-S6Qei%aCjO=h;L;Z<}R{H}#S+9S*BtDamj#ZLYNP~yWNk>Fg zXK#|^g1A~6(gWUdRSMaRvkhHC#_{NkcJgoR_fa;5g^oFisRvN)>Pp%e{{34g(Y0{N zQ-la;PY&u4+f=K&`iVc}dew(Wd&Fak8WKlng{VmHc;)FC%7p ze^Otw-+oaOK3363J&9|}_Fio!1{HRmPABrTw=2F7`;>QONu>F*LB;67m z2$@Yj!fn~FBERM=TeX~Wj~n!jl%uads9RAN^CAMs~m zlX?(bLei06NQof@O5BcYBApd3b;u)c;!iLhBx`wtAH;;=GbQSQ z3~_OdbICH|<}$Ugh!|2>O1nptX|5#)5JwdGff1xFl5YDBQi{lKxrDTj|LvWCxb_ly z?=x{_cT3-M;=EJzmQ3P;mW_quL){C&iY_A+P4#BP9-?{0n>c5p zO)==}LEM|Cu&yUAP?s!}P_*M#-wiQ>L`%?;wAGllp* zqFEc78%~_tP!Kan_*>QI)k?TkdfjRbA*CRG{z&$7w$Z0m3=c9sVgS{WZ(Mx>O{bW4 zMu9MeQl$o`Q(i0l>Fv}QYoy zU>76u$`^18G;j*1pMt_G#>PjFpR&vJDPR}89JmLTq}c+4;KdLP@C9<)|CxS*!CmXXr|cR1hV&?wu)P=fjYXF41^DQ8sTbhSbbxLG7D!U6319}l3oZgY z;M@BT0nG@$Y5}+(=e&XFD(R$qN9pFGflInTtAKE_AMoK}rAKKmIWn<~R*!!HeQ7*a zQpzxmgE=25qE(~heT~2*RJh88zMai|MWgF#&kqL!a@B>tDS)wD({hK_CV5>GA`NIMYq-4wa4dRu{xq#O zZ?uU+JFGrlxQ+Ty{)S&pRZAXGTdA4CH%YD3M1E4BDV50GwYP#=$eC%1(AIG~#}ELn zzjo6ISXgb=9Z!>&9c#A} z%KN;+lJb(T^E3rG+2J$z7g*i==Bzc1SZ7oFf%>A-Ns~%-F4>9ypp508BCe-s)WLBG zC0#D}BT#%K7B=0KwIaKvZ^%8r?H^AYpIbE}9=t*96J@rw;9o2H`s_H3N z0i~w&X3h^le1+0i>{LNxV?{4>9VU=2F?Q)`W~HXq}@3y zt+c1j>)0;GsXLn*nUg7nHP5qFQ&yMniGs;JMZumY$PsyScbz2lt4RxP5OUaO9|GXs zdqDqXy5o(T?InQe<;&&9G`pTgsVCLtG>vhRqG(-|-bemZcQ0ZInOa%o{)qIWG;xO+ zX+!>i!Mt1_*3+>?tZLHUTLu`Jq(1)#-9w(-%w%pOXBFK?){uV+)vzx`2wY+)sh-Dc z8Ovz9gKj{#0G7ij^cO5PZHKS3ram>nE<gZ=^0ni`%aB>nv0xbg1KxW_%hjow*gKv6?QHmRl?ni&)n+7eJ&skH>%|_0lSL#p0 zSC|9&dl@&88@vF90kW9#0g8lyq@Cah?0C2xMB(hcSs;KgP47ZDd)4EksF%p~nj>?+ zAmYqSWHm3W_7c2})1VDw>>gi@sh})Y2{{hDfE`PC0Xm~-Kok8XYG8kreg(x$Uw{B- z(Ze+6b(LV?Fw!Oe(76bfOE*@Df5Ssc?Zbd2-ueg+dQp4#t#q~{lRjKI#CC6$#7C*=&K2V^CEhn)cv#rCGguz zW0&abc$nX2;3TKjwi!s~Ofdcm}mtC?z4wN(6l<>0R&EHDOHF=arhD8k1RFcnPSQ%|eryt?bxy|do#brJ_ zv#NIy!fNL?2f(wN+KawGS8BPUP>@*R0S?o%i;L3r03?5TL?Z2i+V)Ts%|L#07n{0Y z##p$79K_KK@zL~47Tw>F-`!str^11!lL~O?XKNw<4oI&*2RPHit5_$F0iLDgqYbpo z!fkFawLr_aJV{Zicg~+pyo0j`HQ4H#DQEvMi38-izi|EqySxmB>6z=?5)f_oqE4Z^ zH*t^82UgV13~{4vtN;)EqFR@XT5Kl&%3nS=kWhuS4?h#EB`&%&#)~BKI-YPCq-b%Pa4-hfN6#pJuhYs^QKrubIm1@*lDdt@EpX1@U*0_#nNk$3 zZUx<^?F0KZeMtoi_Xgf8eq$GbOW0GC20SGAFx}|JaS4bgz1(jIrhyf95%2j^^9MMQJd>8CxSW$#a)o8Yo-7SURV*RGW(Hzu*+-Do zXlBe^cop-GPdme&S!weDDn%J4e5jwp8AQ2SIoh4Xel2xs+J!F`jTL>vq=H*wK01eY zhB1otai?Yu!vc0vw1C0I&Ad-Tr7TyQgWz8l(}V=x<%VC=a3k`ev+?W_jj&-p>#~wo za2d0e^$R1IC6YPdImB1kmVN^^;KPxfjH%ofFEdD=V{3g5bY}lukx0MD3moX@+LTvy zC9!EGvua0J`33fQ2sTCYoIi7X84dxd@E4gRmBT0y_lFljurTM4H%Q>aR&8`9@B6YT z0LI_a-^WdEusI#WW>pQ)c8M|Fl(>fw}RhP5K)XgQ=O<+`)z zZ^Xtsdw~H_(xM{TMsC@qd)%`fW*x`aXIktk+gaTD#VTjCyD9_EN1m2;kf*^Rg~H^G z&{M4-Bm*o~ZFP;Kr^x**S-?t}azQMmik;Ip!t?35)aJq2(q&hk!Mfd+Eq{f+Y}$`) zM4r_ACQx9T3ZDcVDk{DgbQbh1SmDB>Pt>m8)437tX3IM2h?cA0M*M{4 zUulqCCYW^9NhpN;`XO`64RkMTOQeCvl#; z?}BsK7k52mY~V~<{vKM$<8*J4Y$|@zbV_)ypsr*cKSCQMZR1+1;!z9yvpjpZ&a{Zu6L{FkRAYp`v3bzk|t)jezdH{(d?(&y3v}Dx{ik3CZ zEimPKR8xcs&WG|a=oL;Xw#eFnCFSGC9GMB41&2v+v2v?(CNwG=*(L(@Wn6OVZ_ zbhOl}OI6b?0@|yzulRT~&Ufd|se41G;h9zT>8+S^+1ltT=9(fGzbmkN{__1=C|fgZ zeu{oV6*Q-tl8X6vaHNA52dbWkKJ<91fARY|^SG|ujcoue8c%5oO=V)MYoA6&BV(24 ze3BWz%Ss&ud+)w0+FmLt`>I9E2d^PR)?hV*g@UzhctdqUI>y362O}F8y%AESExI?~^ z`bxG}qNIMrs)S!@IwXK!MDtF5%Z;VoK01vZ1DtfvV^KjfD_?95x%;hG^no zb^TVQnDMtrSEgbFO8AoL&~0>*@E8) zWOuOAkC?G4u!{%DXc5NSX@LA-U0&V<_wu~j!qkV9+bVx6j>|r2n9^{Gny)8n770KK z|Azq1tmKM#w~vX~_FTWfW2{viGnWtOTy~}4@wtAGH0g%nSRxVywhW76;;;n9E?tb*U~yo?F)S}jh%0zPArF2v^UZN8yS ztS~5ZQG}JWDm2pNg*NPIBDXvvsxLoE^*xo!8C5t%oxoG1-F_6zO5E)j%+wM2Z_Qwg zig(Osf_w0hrU~lyhQ%fE3U96 zN^Q~4dBWdZk0;#cgjF|(uvzK~caLb)vTTaY7<{>CVErj*iE`Jhg%mZixjC|=H+Nq7 z!NP;N6>1l45y6dDqoNZYgTLiRh~zAh1S18+oe~a^j|E};V9L;ehg^NyQ_FAc!%*;& ztL!bD9ZeHUR#LZ@4ir>T9h8+?J*p2^U-_CEMGumlrDkX9ia%3tAM+FLqMZnI<8=WU zE>}1apt0pvoDc6`l84XX{BBSd7O}1=E-}wj zmq|6q^VDym0VFsY5a=V5{I7Fah^bQ#djayq;w)<+=DEZj{m6@~l@~SfUl&^DZ{^V? z*VF{=9L!wd$?hQMNEhR=Ct^hVSn*MMd?Kdy<8eM?wvJcuEG)@l85WDDELqF^!KB1XaZNv`#L{7NReNGCSx8H;HjP12m zgr4NCUib(3!QE9wEQ--?)(+*}P~8@;P|r}DfWzh8GV@$qk|-Wcq6kSMov`gZmB8EU zGy4yJr31tY=Sj>Fbhbb-KM8(>4^-3@iA&NndU=lv1^gqb*LiA4AQ!6dWmii!D(5Ar zgqvmUpX zO>(I4b7qHRN?v1Jp5Tl6WAFj)CuNGqBKA(X#|y3RPw@#uj!a%a;aBrkYKpTKJXd0BcI`l<8QU6t>0osns@7B5J!dA zOd4<%?kx@|zSBjODf2IM*s(g*Q7z9Y7v*OgUZmzprq+Cn4iv^6oblb3v`<4@0W>hVqbg9mlizsb;usDX&$mq?l zr}XCjQh%h3CbJY`s!#YSsXJ|wk4WSRY~DYSp9~sqox`<;uNYQwnmB$Xqt#QO2xUgu zU+^VwQPFuY8S=|F02kx{YG3-Vj`2DCH4w`q2pzL)g`8i%<`)aG1xN zk6boC!>(mbHWcBB9LFNdDs$dknYwHt=L}oFuy=e`vB{f?+hhi-cCZ%5FOglxyh21` zDth+NH$fU&zjqDy1mJGHsEVCPOwyAPM{|D(2g}fqalhviZV^=Aj$&Sye*XoY&}L?1Z+h z@=M_s;aF*Xo|LIuxIz7nG)L>Ggj4=056X?Anxx&*wTHKe+{GyeFY%2;QF~n3??vA0 zD_HX+DYLI5bD4)VE2=Ci0tJqxL!}4cBZX!~s|kME9r-@Tw<_0apoqItp32hSSu{B|0^;kWL}T3~QB?Z$YEVUTTv& zE^`4U+Flclftxl-1p8y_Q%omg6MhrS{j z)qJAw3@lL^(|K;0QYFZ;c`e$^II(FGKNanr*Tr*ZRV&su1)y8FQMD(T`=PpuQN$y+ zsze5>kE;u87(b#6HNK42!%hkkquJF_@(B*GnJUad9&F6y72@;f4RIH;I^;TyZ#hMr zHMP#{V9>uJ8lRD~zr+Chm%KK=3e$-wR-Z;={m0~wQN4qUCElp}o{fSlSnP(++))l~ z-gEX>)&*&MW3lJ}-c$2aP!3$Ln8W**Rae}^l_!Gv`#HAZ6Vz^OH@|)I7+mSX6<@)T z-4_IQ?7;O@?rwhAoH6_imMqC>%v4NcMb=!9Eu|eOzauftTv(hbijKdX_gzqR)Lr$B zALRqfr}845?uw&$`c}*My}S!+37kUFj@f6hhfFu|)1}3B za&nwYUXwH-q)AmS9`Jf5s}|85ZNw9Vzjp%s08!6sgfm~#qWg+j07nSLjkTpqnOQZ1 zMLuMY^78!Lv?;|KwUc8D^LQ#{@FZ2XqTSO}mL%V~FGAETtF|Qb{>lX_t=V$P_30UK zJ=7}DZTww>!b7$5DwB!q^4n$mPwXgmDW*lA%@Y?42F+3>gNqOjt^En9e8t!Q~F{#@{UG703+XYi?=W`soKdfPk#gR;Nz0zsLvqINdn zN&MT21H_$S6N+CDfB4qt%gDOU&(yV)1}la98{oP2vE&cjq}M5incsQ!r?aT~@aV}P z${^XSaTbM}_NnF&g&tQ~4pAtf=Ze=+-ueJ}_o(Gg_f>BJgI!nTYe4$ic!@6aquvjZ z0cy;>ed+~xiSfGi6lg_S)Nq8ZN$sze(ru4rl=T5mLwH3=bRF-Iyw&t*#|J74khM!! zc7$K?CZ@`F<*tSqF`^ZbAkKWA$C|=n16iJG?D+S(zz=8N9pjEW_Vx zo;HJVe1DIU2)pjwFO@Kz)^J6(>@NLCek>ZxncDH5-3ku3G~jSVTI8&b)lL4)rMaoqdaZkY`{~E$-(R znqCuFh}w0Z;y!RFrs+7Kv835Fe^xEe9Io?||4y{3Oq4AOr8cJ5n_FV5 zbnu&$-zM)lP^)&9U9f@V>*R)8ti&@Fvy6m%7h%;@J>~*%5p#A2u`-*Cw#+L#m-egf zaLJ4~hswi+`Jo9Vd-DB!9P_KRmz)RGTeK6bz2v&u=*{V(F?Gk%T)rM(IC&u)0(8Tr z9nFp5q>U}Mbv~)ux)as=V;@z~2YW(HtY+es_jCzb}1 zP3@l*e52g6u-4oH+D$!`k<24A_eqOk6=Tx*f7Bnu8)t~rl(f$FOO(6u+nd>xlCZow zTMFGjr80;z=K87xq2}3rFE9Z%ZxgCtgFj5CC_J#XnJS49JPc)=_l0&5S9F~R6H_0w zPoWFq_B5TPFFGoyO#mwWj#i++g@X@^JL%ST2KhdqWb0S8F6?d6CEt&Kn)yLgff#@f zdb*fuLSWZAL~>%J?J!I~wz_E!qc=3Hrh&28cWwCq^yNTB(LcsU+xdAYd}ynW%AEPY zge-IBPMT#W^gvv|4LuHQi(IHv&3c{k;$%A}j-A_h994&y)#x!Byx*6vWcs;e7ae5! z*(}svLT$FRD8pF-lfBX)-qEJn=Emk-P9xVSW*(7wC?#3>Jlr(L( zttuz0;WSH;mekcMkVQn+HEfmC1oEot#B&d=EA0@i*iR|AD{Qhd)OdDe^i<+8`-z4Kq5nS z(ls;Xog4b)uww4wHgOU!deVLLFrcCw>fT=VA;bT4N=ZzDXY0|zo8epP-{trCf2|79 zj<`jXMrmH|%_^X4K3is~|7Z=?O_x&@a}9~&M9%h!a}fnChCH`>XI()0%F`#R8S!gd zgDaZDCfARZiT%2&JW4LQ8kYK&SlCY~$S5|mC{};Tf4v%#Gi0F#>0%1BCk~OjQOf;#G6jc*JUQRYqJ|>IF!_2dk<&?6FWJ(j2a@2`Rq6r*3 zX;pMAV$3It({4U=CwqR=lWv1-MXgVL zdCsbel)ULVHN_d6sN9|T(@EjEU8>r662VRO)PFwVt~ku*713R|ap^qbI$rft+pM!a zE`#;iW}Vy5T+5cWUabwunbojPU7ka$3LjsWeJ$NZn3)@2u=<#7?gMp`&j*65Tx#7( zfTi0Pe$P$dT8=Ktwi)<-EiqfzXWSW*{p8HYsyjJ0?Goj@9A1+V8k_UH#xFZG*Q%^E z8q7rso4l6h_Gqnlear1su@~rMKW1Azo|gUe?!^J$9Q_*>r*`FRzU)v@mJ{2vQFbLq zbLtbaBxksVkvS!IR^7hH<+)ZBZ#=}gP9<48a&rFUubStcHOvZoITbjYyX8(lK+b*H zM+Ynk4Xw9;Swv#V2ihyrVQ~WOIe8Q4Nc%-um@)vEQ|BE40SPV8;TJuL?qZV3ux8$W zK8cnGWZ&wir2$dBG@1k8-TYzP+s3DOBW)=VBD_Xh4N!qcw0*#-<6;_-ZW@$MdrZIW zuouXO_L=a(zu59;rc@lgKJ<#3#eCl*qlPjkHyKm?5vRgzst?Q-xKNKV{?M3IE(1u8 zr(R(k2uz`EWyI~}((c0tO{(bESuRi3QwjvmgRdwGp7U7M?6%w zo%~Q(plu>Q<0o+jDO0!_4vQL3-iugKAgnd&5Rlyo7UM<$7g#29iu|8v}T@+0mAzef}+_OR_Br38Py z;xQHAXdX-^FRLQ<^T;d8(mKY-%ZqkYGRO<^go@wfDXJIP9`amyQLX`bgCs5XCfQ$D z=R27!iu@&2 zX?JElA+;zEMc*I|O5b?fk-m!V?S4m|B(z(+os`3ydFLWIyeqWNiM+1uaVwehsL8Z+ z7OB2=wj_o`EFWdmkd75wXGlq*dBu@RQlzTV)0h-1pTDb%WFt*AoJwe8Ke^RRzI9RA zYe0s&6Pk_5#-~0M$w@a`cM0#4sP&29Qj$m2w$yK=m8EmTUX!L56uNs7A8UFozY{Z5 zDf9m3hOllA)lkxhKJ?5We;t_BIEx(Gd#@mu^z{sZUqYg{PooW!b~f!vDIvb8QG~LH z4dqV{4iF_&!Gj3obB^J_M#MB{r1Kr@nA=h7z>pKf+9>EVk4%MjVHC}~mk7V4zUki%Z>Ih@^@$Nhn^7GA zU8Jp5vmpzbCwmikiY6u5f&qX|{C#jaAn`8%4e7XD3U~r?GxlQmV*|q=<12FEk}Jan zS=ccd;=rMmL}()8s`3(;#c*M9!I{tt!VtX*DvryeXFz3s5_&9Sj%_2o5_UIsg0`?; z+!iuSxV?Qo5QP(P@*g;e-z*N&7&9a3uK1$W!z5YDxi^Ll-&&k;KJA` zz$%uX4+}8F!fiqUYb?pwg1&(L_y!xAowvHj7K~RPYPw24sqim8MmLl8ithm$@pi@^ zz+CV#vyIlrJ0ERM!#VH0{?Jm`5!QEUY4{tX)wHLaAJ>s^Pv}(oW6fzPm~Uuhqw$ znFh|OURM`2?ois2_Z$!x4e%XkFZ1pK>9h#7PpUihk^FJ^Vk#hc>v5E7E(+TDnsP-L zwa}M*n{)n3CN#NSeEKuEuDPtnmTpl8XM!T2rYvbt`R2;f9o1RJwL)=yU2E zrP|Gbk|z_5?{`m^*38!@ZNRygW1*~`@Qw>0(0RS`6+NYGy2=|6H$K84T6Ik+shs+y z{AW@I)vow>usx+FpL-ySvP=`U?HzfaYWv&;go_w{F`l7&)w=Bh_~xQ+`D1$dIi+F` zPWi-l%_&U{2MkdKvhf;#W4}^pmV9F4&St0=1{~@LVo6{(fSn2V&(RF9A;;tI}g#(QBB)rK*FPfQtw7IRC&9KbM+smE;kS^V9uCv-3T z>#`)e2iLwg7)vV}X}yJ-kGuD7M+P+vg$8a`*05eP__9ILS4bi`lw1bp37>~ZU8lUQ??N&` z{W`r^Wfz-kOVJlSJM#LFpPh(c8l2r03C@C^H(93t0fn^%(L%aiWq_Y4U{a>DZvpjp z@wCl>lq7Z8tQ&+8r1Jbv(Wxwtla+!y*;^|Uc#m?7)y3Rxx%%90Hb9sI7~?aDt5UyV zd8Dw2BWN1=qUQo;I<!oKx52?xo$Ti){jkjdp*-dyqpZMSbxt!!kN(K=Cynq@Oo)K zHvr63EM#|qmbeqXf&Pd57&}A%lA@1Bg7c5|GPiB;=5ZEsy(CFEjs2BbhrPfx1Q4aNki_52xdWZ#|}KWnz0V2S^k8q*fW-#0h_pK9oBq{e4lCwH%z0}K4d2=uL#z&&ddFv zU~Hb`Zk7X6AZj{x2U#Sj4LrcO#MgE4fMD)ri|1f0&url~x+|xqZ3ll}xkIIx>s9v{=iV+C`Q&u_^5YbdZ(a{E>JX4X95{ z1`(h->*xnYS@~^Ge`s#0jomi-iK2rWTxrLYvYC5G`mn6sP8pHeReeYHBdaivVu+(;g6mb*Oe(T71zSR|U-}Ze#2P$Vp}1F7On76uBWMf}2V4OgO|AP>qiCvBqc?2dl6k;Mxuj>J7=4 zGSFK5Y^$#P8vSGiTbfBHX;z5-(e3#zf=6^CD1_HZpOSTpbCYfo_X7V$j}1auF5r{{ zVze4Ew`^r{VDZwmOf@^NIYMrZdX)`HhL|jsmnalD!zBys5o5ZXX9P1cH*+lEwZ|fG zU&i~u?N|Y9LNHIle;&MSDri|lmvIet0rLmTBn ztR3krwhRl7dCY1=O9P%^6Hzz~SeBtMGn#c#zMWfEbG z(3928x8+w+9&q(|vr_r&C7h$Y$$bS`j94~TwgLL8++vIQvJARQRBbysqXD(Zy znO+zpu2nWk_`)^vLueiElXMg5G^as4BPA2B6Yh+7j5YGvz8je@c+UHmAS=0)Eer;q zcWOQtGQ!7dY-Fp8vHVvXl zX4O!O$tn9?;T&Tt%0-KQdz?c-GwsTf{j&H=Jf+W-VF5+kSxkkv;)D+RM|q{(D>VG zD&Ul&7U}`Hku+Wt@WA&oM@TQ*{}4x^MO({QbCCH9eOR+O-W4nJwj;%w4T++vLi zS;q};GM{r&O2)L|qGQTMszAX9u7`XzKNZ|7N$1VVHWi-c6edXd?rdgg7iR=lcsApW zxc=S%Oo-1lt7P8f?4OS!*YVL}5AAB@WO=A+seB1LQT9Z-3h&rS3Kz)uv8Q>sL>Ba%Geb1!K8P$7rKhO_&i)lBdFMy+2tD88Ut zP?CpUmuU*C$))1%yw|Bm1&=iAqA&4Wl$Q>_V%N#bT&J-7Wm~QLnO~*->-^v{ncmDv zbayyadskaj&lcWS9jZBi49HGZd?kv+mrHM^>=FzVHAnJ!D+&z!rR*eaj|+zV)kIs( zV#4YNt5e`fGSBG}T0JzNnVzTD4hiO|#4R`B8M1qg%W`|gL$xKz{(`Qmz2O?}?{cy) z&fZ$;<#ZUUDVerI$TTgSH15?LEn!TBsXkzQ!Du;{Ns?_Z(aCaV7Z)7KUPLEoO|rjd zJ19qT#u9hQ-V$aXZI?_XCU|uUlgY*omi!4+viV|eB#0R_^E`3id_`F=afW2L*pV29 zTjpzt?Z5<0JZW9#X{9+y8{aLnBL{^>ijPp-JyAg?wPf!`o+Hp?=EuPp90L}2623F< zK-mhKLY!HopwY38ypOa5Dy%+DOUiIkfV7;rP^pA=A!L!L32;3WE?7W!vER()K$ABs z*=&e`-De}}#`u*ccsBM_l<7H%qF77|!iNU`=5=1NLX0Vt5VN^ZyOia8>2 zV;%_FH||wB#qA~UG;@os4aW?fvB?ED#}617u@YEslwV3(1RHp73s>;&GCpVnxC}y- zY5~VTC0N$S-WVkr-%s>B3=8hE%3Tlg%vsrc&a(xqUmLnuncSc`-?2qlj)Gj0BlG4S zE_f)(gSKl8L@#n4EBl1L$$%`CenQka z6^he@_G@afdE(=`eT*U4RccnkEB=j7E9fgcM!T=+$-9u5qm0nR#Q&ClP|ZBLMs!oL z%x4n6U6$o|gY#4>-|36bmTg!Sfu4}cr`v(wA)z><fv$hLa6>6dxu{sh(uf0c4;hm z+Md568kM(!C4eDI{Vw4|v{S7lKar!O#Q;e;JXB__5fHBmaDu5Hv8$zV!+=n9%5(c`ffA9UKn1d;=X6;=+|=Y zq6z_AQ;sz--77fg5M@<~BlAOgQ2~+Z5^tm_Mk zOt$Fl-2}sO-s6-)O+9Z*)Kuka?!&EW*;w8jFt@bnZF2n{Y&{wb-8 z?O_g!wM~%N{8tyi{8)cf6HDaOM5@wDUR0MW>asobpzLqr4D}c3zOW9(Zpo1?0n)`1 z)I%wJC+(Y0<1dkm$4)_#APoAnWrXP(SXSR-gzq-IHcj=EE2 z3i+s*tt9&uNG>QNRx5-|6*$)w)yNKxItUZNC%otu*M@QQGxaxWD{&iZ0xY)*Y1QE- z?yh?MHe+mTiCSaW8+2FxUT^Z5Ara|+uDUMt&~KgXj4qM1j_hV$V}P8lmcl)Y0I!Ds zTD>r>HE~Vb^WRj*)E`eDrT3}DM0cv8n!z1oX1sR0S+bafvEUeFUd76QF^$Cf03QykGS2SBo@ zHFp6~;^^wnz>jbbgCpItWk4ff{8_tMxeUs1StbLyPIj4+xkx&@uX{Gs1#H*rfoV61O(-8{G+~p#8KpAMq@vl_wY?7cy^3&^rc=yvY%-?t;V%`~tarwb+ItOm9 zZ>oyI{j@qw_7`oQcSbT+>}Gp~pU8@YfzF$<1>~;w#gcpF-ei+)veqxph zH$-PusRiH;t+rM$chhOr7y-}igbXhtIv*03NQq+|xo;pmw7j!T{fCs-_ED8k*4X%2 z(Ux0PtCz1yp_+Z92cy1L`AAL$&ed*}cx?<)swIY1zS3W^gR_r_?n|9V8QJ^5^Wd}2 zDx))TX4_SRYiU&DDV=-H`q~mrQSun`Xw|;RV#7bBavQAqs~GSWDua~P6~R)y>XK8I zs7MTrq_M6sQW)&cHtS2gwe68*fANvV3nponwzj}nm6%}qW;h;RZ+NaZ22^Qo>o>1U zQ9A42Ek&h;+GEoUbfyVxW_;dIL2ae;vlpN@8 zCQQybz2_@|opPmRHE~L`UHw6l-Hv-T3FH``G1ZyW4Q@hxC*952ta-<}GWMh54LlC8 zKm3yX5tp<71Nn0C-LA{zrtFJ*9Le{R4-8$hI!6uEJ)~R?JZqs*uWy=RgaH1kySfu# zh4W_h6^^gX68UD>omO~wDSai@vVSpfw&+LaZy+=4X1gyClGN6O1-3>$ud@N#w;wY% z(MvaaR5dext#a2sfUeGYsyf1B*rZ4s;ArZeLv#qnoZ45(d|UXn)0-KTd8myD4kR)g zo51FXu3BI4-ZqWt0W)obm*Ec7xf0a;Lki~blzn`?4Jug*V<>MAKIKfPe9?P>?NKnW zmyPVqXlOmgI-HQ$@P)M{oMWZ4)^9C0&0@jcn+%hYniY37%ebrNcq>i`IX1nbqp*ZB z^jBO?dZ|`8y$amwY!L|Ufi4U4h_4wGs3)U-f?aQ%&cZ|2G)JiS8~5BuU9kp z(pk3@4q|xha)AkAk+=1|k@S@R?ztt#<#l%mMXS@dx11I(j6GNXNw6g3X$^_rzNNi- zBEQZH)cp`NEL*M`6w7CxmkY#mN7bWl;8oI+z82NWvS&R*^YiYS4tLqVG}o3~X?u*e zo+lv%n`<1zcQ@}cn#G4b7wQnnfNQUcD&I2wg4{uPd}J=?7bBE7v5%{NU3#J?O*b)@ zzUQ09J>^HsVpU6YLj5*nNKl03oI>gQ#h9cJd2n?bRSOnhR;^MsOiPuW6*vzsXHNzO z@elfFmhcj|XQwGNXX&07#)xET%M?R%)YE!by?0>0rCDe3@iT7HPhL&Xx#-Im+N)et z6Q@j&o#8zmHVhsBhT&@aaEO;Kc3{bY;t_T3hl`RB$X z#zp3xZq+)6s>kzD)j~zG{VG`*=hLuZ;1poko8N>CY|ou)!T|2}8BZdgFr_z_w3fKM zE|K(>6t8F_Tgj!+ZVHB?FYBO2QTHW}qh$i4{U-vq=r`u_n1w9dtM|Ciw47TSd_Aq{ z)EYto?N2w6xC-#IvWWixu6!)%Jn#}6AUDx>m6lM%^v8)`sYZr!OC&H7d^ooOoQvFh z@epf99B#;QI2L$f4L%lr*C{8s!v&V3#2L_QX(@3K;xPoI*HA!-7x@M>Kj97KDYS93 zj19Oga+4sZ*e@GBXr#w(E+yrQyy085h1YiHp~Br}a~@x`L? z!Z(Br!FE~+F&&i`N=T`^nQ;%vc+N4uSV|MebM9|iJ&*e+y5d5$>5^^bC4=^m0`ovy z+PV^ZT~%vP;;zWA^IP%9q$!kGLZ9eoeh0Bra5FZ7bOgQUbDw;Jx8K=@nuHEK@T&-E zIDP(7Ws>!6-%1S5bhl|AHoq!PHxrktJ;B?I->%XVe-k|AoZSNA9Lf3UO43BpmW|Iz z9|Y}AUF04?$lc!+YxizEE2v!DYTEY-Gr!?-Lj`uMm8(99{bX9j9>iTT1mREOJ2dzl z4uPk1jA|ujNZ)T5O&TY$onAz|BMiNLy<+>pF{e{17xc9Cm@xl!%&J?6xz#dVS%Ph? z8)R8PwroHcY;9o|xXE%eHentaaFm9SZxIXP;`@6Nmq zS1!NWbL?Z~*>k`4+G6sLe>G#VwuiJ5J=WU0m~j&4(~()m!JTbBn-+oJQa>y>4bQHz zThonCt~xvMH1;~D>EV7_2)`=fE`ELcd>WIGTkTA<5*tMsv^S*Nz<)pp zWlqsT-~=@(KAKLTUGv+@P|;sGdqUTcg!}E3LCUua@l+RT;=!-f2ws=XoG*ob1bpM{&g8RO?zWTr;brGiDWjni2mVk( zp^WBuiX3v#vnfxR=lQd!K}`J+1?MCa%-7SxnUYvn+AroipM$_>XwZ2D7|VHo+f3TS zC!Yx?+oECnUXinTQySyRz1+!KBV`iDk(W%Nu{V>%)DZ|JFOVu?y^Wbk^JD3K>;P9} zn)4FI3f}aaJmMw!vs0T%tE7WHk4ZxDV0|{}ttddR;73HLD%Aw_RyqsO=)6pf@vx%z=g(vKYQf+^ig7{nYt9I5qv=22D&CP%R;H__q^FQ_8kq1jHf6RYi z2i-sUWRjoy5j>S-RZhb+kY~w~vPO|r;%^bH6c^#S^=~LOg8mtMsdEIh%X0~awz>Uv zgilQ)_lzZOsqHe+iG1^G$wlIIqdj$0o`N=ID$&S$aqcd0^wHeDEyNXlpPGw^Aw664mx)CkD+K=$nJqT7@x;CL z>BWu2*OtpE>q)bXcYJXvyEPIgR)vr6$NREFuSuZ5%C{$$ zFwbM>clR^Ha3ytjz)yIq`ZTB_eB}&j#3U~w3Wk!UyE$MAWguDx*3zmr5t;e)KeL~) zeAvsb7?=~trej`U6UDjn61bgWwf+EyQC*ea7=6?(b|6DTizQ5C@PVnh-i&JC&CW{3 zOU9IqR!|E5n7t4B$0@jEWqf9)9ofRDVY+nOWyFB5Yi2O!fJYSW^jDynRZqVP8gW(h z%V1abclsOfcT_rKIa9si0pkcfVzz{NhO4+xOCRKUANHdk;BITbOeb-I%=73WY?>^B z?u_(8@8}Z{C+sBpM3yA0ioSxiB{Gy=#`4=RkNy*xHM;Mk1zk8R_y*Zm^hcHBF(-|%x z(QZ318%R@qZ#Do672>Mlv_Tn36hJ#C$zaT&^@=#thtpXMf=mzGak zFP^w!S)q*F*iTe?+OG zUe;VJUPRriyqZ!;ZIt^3&!g5!hkN{{vLz)BD=2$W^RXacZl`_cK3ad<@md>NOw&$P z0&PrP1cyp(xA>BBsbr(P;3G9eKRPLlxT24>gT(SCm+<# zn6QS}&sHCv0X#eH(2fU!j;c+yGZ> z-Tj}EZ&|`uh)MVK7sp*C$XWkR=W;VD=k0&NNyp^0VA)aFgT^w1hWjT8WVI3^nF%aA zqOtNWOd?OoJPgmINFq)_pJ+wid*DEZ!z?XgX1_YgicUBMD@)LCsQ9 zHj2pbJ<2^s(~!!hs7Qeh(XbgMPzF#LZi3wDm)Bo~Dw$(vg|JGvQ;yfLJsFKXi;*OT zp`n?DVaRmH;lm7(-~c>{A)!x(AVz1|2gru;bJtR46IdF?Vd9t}>xVJT@Pt{zV1M4y zqx+F4&eSeX7K5Eu*A80{v*s4`9kD^5Kq0J3S|al(Yiwy6GYIZZy9j=UjiD9b0r=TE zFYpes$f*aKkFMxH&60>zdw0P7!fn=*&=dX()gNd*+QFN~6!Dmp2Bs|+F1`WsIi^$^ zxPkpGBo7?NmUxlDS)3pzJaZC%=AnL;t;%nY2fRTMW-&mivS7tEW~&5Xe`Jmj?IkHd zjBs(`9mZ#VM{*lOj#7heF=Bb*weJ|4d5>mnVjSfE>$?n_jP`AdpgVe!DIXfCxgb+A z!&SwsdQheqM@VG+m43=^V3deIC0ZH7MVohwq?-gU);y+13&&11)2H&Q_qV~kx(zMk zp~jkt#yDnI^+^c>{AM@@@jy52Y1|fuK<%_!NB^Qg;`h-jWeM8@fbSBC`+Fc?a%}QU znip?VZ!N58>unkanVSX;eoR@tlPCx5uR*~q#>?sf%uI%-VP6i0j%cf5^}rX^zpd^- zyyE$)E*he^HE{%m%URSdf}izXs=p51=;UaBGn?8M2pqvf%~R>47$@rfmbcLdYhpA1 z&{tO9j-CwA3k@FUeoN#P-d|+uY<(RhBb|mo@eC*%`k*)&YZV=9^ z?5&x}S70Y9f1rW5Xzo7lAijap!nsQvS2Bxzi6lz>jC`ZGhmJymXzRUxBg+}fozgkO zIHCi1p^zBZdQ0F!{A3PAYe{B>J1?Ic#{R)wN%@a#!Gbo39>~zm zRdNE0&v+iRAHE9y^;`w>;W;yjtWI7{&kg=r#Ib2PTERlAxIA0da0#1p8I8%EbALMIT;o9J)us(8v^Jc)`*^LPncoEw_PzV*V-8>|a4JTzrGbHAhbdE(;^7Oh0 zp0Bh=tKwXgj20N#IuXo}B00jb7&}%n|5)xKI2kp@-Gvyu-fe%GJv`UdgUnUvFvkSu zHi2u$3UrRRC9r)IYk_Rmpln6?6ZoklFMBL>U34VoKJ$&RXsau8 zjDX_y0`wOKOtCQZXkhzH)Wrf=!nj@42b6e@o8c)JhrG~*QxjP-b!X{)xI$TwX$!^6 zeRuw5CP{Jr1z?h7<;qu#1nErsm4KG>snr>EYm%5ExlQ$f@=A_f&2x4U(q=kK-o^^9 zYAQYnPuD$7|HOQy@r!%~-cn85oWST)h?m#ViHh^%S5j{yubQW!6`g*@q5tG|s&ory zR zJE|6sjVAvGRdn!O5q5T zqXdOMDZ)ikNX(rjcnpP5Z}OKz6U)Bx>Y))?P29DRQ_Ow#O9;QU1Zjgq-3M785t3sW zLg70!Et8()y)#CMAMqy2Si;BL73^feDo!EAd8o45OQX0y*vm5)a~R0!XfE3esR`JH z3`btOuV%S(S{z5SiupGiVx%FGZbN}MTbwB22(yI)Ltc2Jpp!IbhS8h zC8dQSoDZU3Y3tcp!iI>u$S^_M=8>>b;I)DRVT7;je=q~N1glhXulhfYO7z%Z5@-Y` zb!)&ZbWlBro64K3?9Wf;0P?FTVaN-aUHE8LxunfE5c(+|SVmxqq=NBn3=j4eOOxb9 zU6ZO+bfe}X+9WWUp3qCtBgQqDY2532=I%t!BJGl-Z3s#AJ>(xepbXoz9@?cKyRHU% z6nAWI139cXQ;*cC?VX}Q^rN|sD-tv{#L;%3owWhwkGM@1&z$M(=hf`^_lR@V>>w7* z(C_w!m{)a&7sWH4s@{!Fr(S{Tji;o=`xeTMit4ZHvHn_JX0?9B!Y(|S{4DnO+T0sc8FJJv0MdQ*(2`crU9@rf3~jF#=E$?@kTEZWmTi3kO<(>@RN^C^-0(IxSQb{QwDhMDAhbeyMx)MvQ?dDiuscjp3FRkQ8t3XJR5s(4~;g&}}svUbSM7Q^B4vMalV#`c`8!4V<@%ER`!KjF&4%*loZsX(W3# zh9;hg@bZd<&WK0KNd78hbhs-IgDmtt&$))2Ua^VY!cCpx&%TL{uG**Z5bcm@l&wN< z4qI*~aHY+ZT;wmT(1?WS{9K7Zz)MZ?7~%#U4JC1}aVb7QoWrab}Qk`<{>ky0eHl}}1e3y;hHWFF(U$+F@KcqYl< zj_aI$iM{s)^*4&12>~s# z>kdCfEsVkQe3X~B-{S-;_O3gQge#6N+yJL3K8+p2SOpcSVD-DE>-=p>j|K;Dgehdto!FVg>_2TH9vZ2BhhozVL$^uRn%x! zcItSQ%A`G&^bNAO)*YC7@#UttypKYkddd(z<8$rWNN;X+4dNfk1}(BRHLQ^84(EF2 zHCf)s%QXL?3AV^`wEUJNuzG*RFZK?D7PEwE&|bpESG-pLhg0s3RS*cHl0&6`h;?CR z@fkAeyHj|MX1hFEum@aW-zF;K?$Ay!zs2tqJuGk`fU2ck=r>l0{c^sJ^FifqB7i0Q z*Hz_U81sNG1YAX&qUM1MOHvglL6^*bvK?S@{1)+b@ObbDVKq~>(UZ@Gr@8(_=dq7X z{D^MmoKWnke!yOd<`^y_bwe{~6|w-2sP3}5imoagSTPw)shG7g_MX^@wQk2L!Aq8L zgAaNIX?5Msi{<%F?BRYJx^JFcohO*XJ!FXId(tu51!xHNges9oDYTPwxSi?!(ve)Z zm<1w;b2o6AAc*sJ{ZVuYw|Vg%E?1B_zK%12y-}*H9w%GHan}ElyrQ*hPKm23e<**6 zO7d6B=L-W;pGzb|)P?P$@%%m8lKDsZv~~aSID+{LRh)z3p^=`|%UUYFYkaHeLkjh$ z)MKb)G!$iMMWE6m@7j%(^~)BgERigiyo_WCwc=+1HT+2Y%97;beR*1N!H$j9wYzphQgUyf0!GKqx14*5QEnYXJngf@NgY4KC=%=nR#7UZn( zTw^hQH7l$3G5$2w*|LFf`mU5`Z4Q z*hcgn8nk;Tc0@V^!iK$+_3(_^g_I$u-HfHQm)|h1q}uO(tb0TyrLIyxq`B^#rFaKS z+*U60qZhARBklpK7Db6BArW>5g&aiB4{jL7$Y&n1mN0@yk*58OoMCMxyfRIM z&z1Zz9D?cDf3*3qe^RC@6Ap@~kne`s0kb3rSQ|ZKMQf41g(C&YXr&z!H6S>yr0zAk zh5o80f``S&RP(uY#Yu*Z+%Z{kng^Wngg)gJ&Z4j@vdbKYEh5QW4%Oq8kjp*40K=CH z{cLNwC~Gaps}2!A0ZJ{OL}#!))o#M}!Z!VF!M=qz875);LPYv6;%@ zVo7kJ^owY~cddAlc!IlBkR_>e4nh~omW&E#%?1^$`E~vV8YRgh(wQp^##YUVAplX3 zdP!QM`kGP~-K6ML;CIwYweq%2DWb#j$Ezg#m&$}$FL-nbcjPQ6h%pm_>Y{4+qy~%1 zTwZqF*i@a6+os=GMN7e`d-VNLoeGYQyImno(LUcWSyZZ%FTcwNwBKf2;(iffhOY+q z02ja)b);qt(ZSN%XjAHMY^k&6IOsyaX7nv>V$BLGkdYA6z zyJ#Dyedd-3n8SwAHNY^Kxp!H`S<20}X_YmYn8s%quyAhOd@MENi6tG!jz4L9fqxXX zNlzhCwv5yikpYj{Njp=MgGUy5aNto zW;2&~HcnuiO1d2Cplhbg^82pVP#>@HRve=9=3SHBXSvxlNjhLIle$Mo{z+B>y`zv=gyrgvEhR@M*biS(p}s3a*yNZG>NziN?hxblU#Z8_@A=w|QZis|Ur6-T6Fg_~y4LCU+y8HNP(F} z!hfRsBgY`+3_HrU)@RxSm2(>RXx0@BuMJmq?IM}+%Cxw9RWik_&;vSKd7S?i6)d0N z#gK1REMDp(nWQ8;4hrc)&*39r1JF!*(0bRDU0&U2HMZsz)^4gAop#-vu4lw9s>0}; zgFk8?X)`v*sdj2h)<($3>2@uK#23`v4*P}c_@joL!9HLZ5!3p*{%zUD#=6=MyT{bJ zTSuktHiudYqid=X%~OL?v8mEEMXg0sA_L!lr;5Apbc z_R34;hk86P>G_X3^q999X|1W)q4bExnYgNm^4j_MXWNYC4Mg0ADOEjW?uzp|geIGH zQ$vPVk8YBmVls(i_g}}qDtpx(L-5S|u&0{<>`H3E6Yb&;H>@Vsg{N3IlIpk0O|j&k z-Y$kq)CVismvz?D*X4e0 zA5OPP^KUvxcZfynFm$)j6&8Tr=3i;lG8pRu^plz6mMPSI@KdKE<$845==Y*JW-CtH z3q$uxy!L*FDs$enZH8=8S2g}%-iaxwH8T%}*qWOl+bywGC!t!eP~9f@@6xBL$Lubr z{}kQ=n=!+MYnl784L$ET+Tvw_y4gPX4!oB9DOt?*7{YvJ!u|@wy2D{@lL5eb$cEthZ;>rE8 z>AaR9zdINhR_@n*vPzc6?r<|Wrr&JI(OKgzG|bdu!y0N1Xs`kQst;(kd%x44)HyUyCWJGsN&@-xla5@1e<71n<@EeVaUQC7$L zORH7YruDn@mg@8sl%c!Oe&+&(Rpv8(n8bvf{(n_J`i@;tKJ79dH=?4g|1<6lrfAOy z{5kCV>PLhgJSZMdY$kjI)TCY#smMgWLw*#$hw4o&^-HDw07~Xi!B^0;2ahY+#F~p` z7%{2v&{u3NIiPJCu7NyY9F6a$GzjAe$Ei6qN8(GGcVRer5->JSM9HWB=PRZ;Fer0y z3^6P9-tmg@;Og_~mFvMpeQz-s(79zPRtioqtj8T-{Ns1vAA=()CgKcmR)H-k7z~Of zldHf+p9@q^rgBahJqS5^M^V65Jh=OS%ph`#4fk;X`u@fJvN5x6(@0Qu67EWuL`YdmAc}MNSP)l^TIW zvkUWxKbpG?H;30u#Nl^yckhlNv~U8V(@9gQ7a#mI*_+0=L3%{OM0?9**Haky-u z`h0g-xly^T_E^OW`6$&C%qqz~l7Tx!Q;R4hecf=hVha&d?Z=9lvx=R zEU)J3my&Tci;huCk?PlbpO()wAT==+8CsECTUnzL!+gwr`BSVt&RH6mm57fNeTw)- z;0O+{A0%4&Hl zVEHQ7j6__kd`rqx>^nJGCg*>9!Ot_o2UL7>VluDXkPmp-yokdnTvPivCq2~f9gNnS6k|J zfYSW3jMM#}A-LSKXOsY2(cScchOInU7hf_SbJ%=4MTRw2Jqp6$5_BIj)JVXBYT+Ds8{f$V{-LlM+J>(7L|9V}>`pVvh+2lXiAk7C#8tx#i)v|D=OQn+Php zrgIr_3%%DGLA*}Cp^P9E(|;o-@@$4HK9~H0k(VQ;bTf`couRgYSsUKczA;U+su&m8 ztV9@f#QNZ^2Spmk?*sTfFkbwv%Q#S>K%Psv0cg)Er)Sp*&pLH{8s5CaWH;B zynykCuveH@;Ynok4`hrb<)HrIGs&skuj{^2GP&HDzSNKCl+#wsmg-ac9k4Ej_f2^? zd+k?!H}0r9UGNfLtT+u=2}@<=rC$hNC9Y`$#6u#N&;XK35bODqoXS^CJ4yb_pLQ}8 zGopdha|3g}c3gu8w#uBZW#U4NOVMWBC*4CT9&b@UD!xX@ReVhON1P{12!@EaC8O3l zkP^jnrbLkzp!UaCSHADib=6{OTZy%Qu-T1n>LA>IwL#oMoW>kS-iY5^wZBl0|EOD) zbeAAj+wZ^=7bt>Oe;~%oN%lU3I?k^nKQMFp*6vNloa({WD6p6gRB;dcw{!{MvSwkf)pAwwbU^GkZKApUPf(_zb4vEz%oHr^ zz;oFl?2qmT@L62ao^wMM|GAd+xzF)F^;hHU@CxgSfI@taIef(p{5t)Qaa(a)S^Be0 z^s3UXgTv{x@`Sc@dTQk~(;E7ak|KRVUys`f9ifNgpJFG_8AL%=(@^$;dt@8~M|Jex z2l4=&QwIErRe!n_Sc{K3Un#=XuzA~B$*8OlWmy}Kq2{0B^PL>+GWPl z?P-f6R?@8iy8aHMj-hv&2APl_CmX3BXdm|3)5Zd>O=Q}9Am4zYZ2+vIa9RxTmVu@5 z>3$V6XbD@fxF=J^`q!#n5{F(4=>I#JeIZ(NVJ)K-E`$9C48zmnK`pMr! zf0JmGox;c+EXq;->oxDG%lO9}NK`g@tIw6>YrWCRB;n1+t1pmRj2h`j(slhl_yg&y z<|-Z{FH?EuwUZ0vLnuA+UMYV2DawCh&FbM47jeZTTXGK<-aneOycKHRMT%;?TUAS{ zsB;jXAfXm5^B_rIJrf&GI;MY<%O-u%T!|x+7pWF+?IVZD->$kwo-5x!VIpY}o8EJi z6y4L+xPX+mm!aQD!nLgw7LdqIYZ&pQ$~w1-cO-@d&2}J(s>ervB~=?7{HKx>+8ZmJ zNfXrX#$gD@SqHmXNTwsV>TOAigEO?-NwnS#{1j5r-UEOiDYVVI>^v!;$vxu@XdI%nKL2uS{+C9o?lU{5ksgWp-tp$%$dqVQTcR-ZqKPJj(pOMd6%dYD4WDZix z+De%`>IHKTb0*DGK?4uagdA6J7wsc?Hs}iE6v@C*z>k!4#y@&Ta1gkFalvyoIKf(b_=9E%^C#HS-AfT(2MaAL`V^VASz8Rh?r5auX#T^xqsl%%C^3kKy;w>Fjy= z7IdaPLK#Zmd|D^1=!M6m3SahOvp(C76|XeJ_05Rz^&bY9iQR$VCa4LWN%J{Cm_o>tl7_xBS z_q5;b?F0iYrMa)*E%knbPs%B3Vy$ghAmyQD|Hk)}ji%(q6{P3t#xXI3d(5w0#VEF< zs-c{hTjpyJb0f+JB;z~0;{RP zflY7#FwMgaCNWMro<@#xR_xh77iG<+EOK+A%@AZg`^P&jKK>p8QRRTJ}p8N?cbFMvN;{q93j65C_iTF4(A+w>V} z7kxB-XWbC)myd=&3K;AjcmaPmB@T)}BTG}5R$gw#U#1;b8ND2oa)>prtSRvpy6XHC$PMepEQ+U1ZQ@>7o`crxP^PYXjq zjqG>Y2*wNP>WD1H4)HO+)$}^i{bkpI9}+ja7c?c(V|~ZESzD!f3qP~m6TF5VnZ|<} z=C`T>+)D7T?qvRU&|OoIJjf_exre3GyA=68uD~~WoU4TPRes!d2IUrf)pDKnwQZv6 zEc~hIKH3Z2syEVen76Eo%8%d)vvBta##7^+#9GD(LsIY;`Wjv31`r6+)+{3bl7yu*JXz$!EAkE;gsrHzR7+?{GGoI0(UWfbz7e)mm4&bpZ{;VC$ zyDNXge^~)jo*)H0u0Y&YU#X^nz<_fA~?F3*rf% zjrhunfB$L+1 zsS;1DoW=GN9?pG%I0(#1;qU;zJ~R&c#vi`v3N!+Jur!7Ps(TRm)gpZfuns=`H(>?sKUhQljb3vS%$*9gZVGZxvs6NSG((jowp0 znLKLI1jYt=|Cp<^G$>f}l8EFrunQ&Xoci>LYlmJ6c3=tR@Cv9)6*s?`PVqR zs(e)QD+gs)6|=F^xu|RmZX2MGti|ud2t=8L-FaPtW5nu|LcSMyd^nAFpVI34h`Wb& zXL&C-5X_zAh!$`|^*ZHh;&tg7xd(~DekpY!ucZDJCy-B9jui4K1-t$EKd2tbj%Yc} zH7t@2{$g`kH|$q%G@Or`t#T>+On6PIfNNo@m+E7NoO7^KB8}=>s*$pm;k+W+FpLGdM z7_VS?v)vSDWkd-Z^_H;3m5go?U6h8qDQFRf6}~~Q3Ve5E^49WmVn`e#+7;-|_C^cV zr?8%)mlhSlw}f|WG0ZQl$+EjLUv)e;S+Yra33wx#tJs0H6Ks>6E|`Jtl7^*8xjmAJ zJDWL9qQBe95R)k1tDdz|bigGQdMNoe#>99IEtQUx%`uj;VaY@TZHRI9M2A*-@c(G) z^VXxbsxv9=+!*EB$P;X<{6+wXILT+OwZM?PVZIIXqr7XBJKY%s#ZEGpx*067#IEKl zWuowkSyH})f3G?t*M)bzYGaa^Yo{*@m$3u1|F$e;>9yq5|DeZOtn)1JkIZJ|M4E^( zN$4i?YcoMTlBq4jNZW)b8?#DN`G@O1XGijCt(t_roHLd)p*6@`v%zmFYlV4)n+^)9 zx<9J`94GM_?nCtjZpg0~$Ca!>J@nMlI1T!q~3M zlWBaUQ*)bknzm521sI1JDgOdQ=U4?`OPRzX)B-G%fnd z3q%BVr+5*FMR-SNgx0Z^YMh`+6uFYZ{7^xVFJs=$i06?E!Q3*sbLPAbWQP!Gp4LlCS{9rxe`zIlH|Rj zS;)son!p};5pVJ(nX=kWIX+w%7Cu6aD?d`7R#FJN0mDPx8p^pGp}t~N+f!I-2v zBzMIpDyB;x6|a(ZOPVsjh(pB8ggL^+B5mkPzMb&x<`|xf@X8u5&Q{UexzWfWsof|W zCI!sp%-1f^F9G6Jd@U8LliyTdD=e0lsDd+=h<+%}$At<`$n_!is6p20Tgo-bWbSL& z7WwDdpIA?%z9UXCy6KOR80~yZIhCw}Oh9Fm{FqUikClcSPNgptozPE-@ewe!M|ZqM z{WUK(IdfNPx2|NfZMD;8z8&JncnrHtUk>nLwsv(BO5U$xH3XIakayNi+1)4gusWt@ zinNx%oh5=m^VC3F-Y3)k4fdR)ruoa~BShVe>CW&{Vfg>`YHnj~LJ5{}*%DlOjJTkB zS^4bJE<v@ zYjI^EooC*KNylF?`eKty()78wjO>RR9v+tjD+dV2BM!-nNf7~IQV)u|=Nj=^+G!UT zQ6N)f>nfTBKVUDY5fkzNFB420hAXSeB5I3=>x87>EJWQy9-HW+TulBQj>>ANOZ?YJ z3TOw{dWbR^a~C8CW8q&m?*(aa7wfd;FqK8URNYBEju|vaX*PwZ_8=`g(^2)5b|L<* zA_^!A+a$d}58N_FtYq*#)(OQ-%Y20(pKY}1Mi0SaIM*^3JVq%n4uZ{%8|Tv(N3})?%N##@pbO~O-dok%W#{? z&*V>^pfE-kcBNTLzgQi&7g6EMVp3cm`i~?YJ(}SdNc-861 z7_76?pUmh~JL!@V#wn(1Cx!J(lQlj86GT?caIedPC0aYz!#t~I=rhLA2%Ls$QU(m8 z*qVy#x+`u~ZMR;?Z`b~`{MhBF`fDzVOO?Mf%?lkZT~saIvO$zqeR1ts{@Lmoi=%ie z)$%F1>^JC~|5r6o+B&D?C*G%#SGJvq>HW`i_Z~hA{_dnJw!p$+$%gCQNq8Wa|&?&YS>q6D~L<-rW7Bi|$(; ziGkVnrt`$l$zD|~QeBi#2a#(6|3}kVhc(r|e;iT4R=`da1;s!VDM1lwlnxP*?ilMj z+s@g}&d$1fV~lP@P{HD{yYsQ*vAbJ;yDq=K>-_)T{ zw&caMDY9d-FZPG|q0n8*koHtX8HU_^P;M+9+Av0TWvg>-zidwGcyor78e3tAl`dab ztc{U|``W1lvdxP|$R8=)CO?-=lrrrkyg)FAWNDwMOQ{;)%+v-I%Ignlytb%nE7U>B zka>hEFZzQ1lk)T0J(`y)N1s5It6JbOR$iUb)-6p~ZtAjTM5OD5R#wnRO{Mk3az~}SmOA&g?6~I9 zII8$C|HNMf!mWO7_4&%UJ{@*%;j?XgTwB)Vj=lKvsr@alh+{T%H5QX}>%FbtD3yNi zYu?lPJU^K>G9FF)WSj)EhVj%~&IH1kT{VPT6_vdfqPXBwx0tkc%fo)>Seayi{j)l#uL?;h$=FtjOv#aV#B=X&+StP4UP>6!P|v*{ zIn3&ctPXB9_adKG+%#BGqB~c6TDX1k81*a3Q~S4~Zs->#u=lRerP#Oosh~AGs{Mi> zJnenUaQ^Q&Rl{Qb$#7w9GyiAMJhPub=^byFB3eBEg?5Rw+vTu2L%h|lU4Q`zRYQAe zvbZ9p-xtn zS@_cCcTWQCDz5f=s!tZ2=w7P2wdG9v80FWLEzP0I=-6xZC5pk&9?L_;{s1>qg|cXw zn?6Y0I=5J}O2Zu=uG}w3w)x$upk29f`y1nnd`j0R!+)8%ZCCa0lWsJR(Jzk)toPP6 zgnYAf=x+MiPWA?u#b5Me43B18G&5CwV@eeQwBT<|mX&QEtot`FwCk9)VY6FXORXeP z&~(>wBkFwpP>Xae$-=DNu&UnVP@CcDpnGB}o9V1^R{R+GUG@eU{Ht(nC+>b_)rl$i zUl`_Y8gT)(uf3SG4qszhLM|biMN23;av5DoeMk)}Eu+WMzNIDse}K!ueXP+e$C)@z z4!r3~Ep{^T&M{|PK6zF@j313MM7~0 z?-dPFtx#UFBR!lwDQGyugX2GQ5?IGezTj8+8rggBD29t<_a$JxZ2V3H*Us(HXb8)= zdA#AoHtsyqYqB5wrf?HwKYTv%8Lbq)wR#P(mRmaG1uG5hJo}?UDGm9jrfR)p!S=vv zN3nf9id6}}s(SDX1+Tg52|a?L#8gr=-#!03`8JxJ@Gn)#+Y!j6=kn@i9AVDnKR>;# zoTELxKdv%X{h|9FW}#}7)wlY9B3@B~+ai;4T=35%zwzsd95F3#HrYiqEk1&>M{p@% z7%f{6IBgq%6Goi`%i3${dmSqH=3AYVD$qEu=5+NO-GKB5cCTgwxDy|xnqK{#a7+F% z$AOe38@92U{74euH=Y_QUNbh}f)j@t z%jWD|wqrs0#a%5;V5N5Z5A6la#I6?fO|_^kj5>&Y*%Vqf6CYWZo>54^*4&94Pb@WF zSUr6o2delFsCE=&3 zMYONPc}N=lAJPw!E$o-_pl~8%7p*&SCu=Ow9e4{o!Ri6EBmPrXpT1j<_O)N@RNQ7VC44$HyQg9Q07vWZ@?GUzmVtjRE`$e zeS$!o16Rh+vQCj!}wi0m;9Q^mA|80Vol);p`Kw`@i6T_aCKfOFcf?mU(85^ zE(OeFKIcSEAIT0zCm)N!Z-;LTbQ2E3Z5`8ycVI@1E9o-4MK+Rr+QyU*qFjMjV4qOG z!b@_8(&xb;aS?!xROP>cIfP4^b_jgM&p0w0hZnuyrNOI3&TUSFL$-6wGU9PTfn*8k zEWd`uB0oT1RL4`tq2bvLwDr7!jXrcMFVj!Rn8({T4Ptc(z=Qu`De4(J(s7+iLGy9^ zYxyhvE5c71OZbsALh8iGBF_;Ys*0w>h`_A#RGKh9)=E1j!1>w(GXx1!;u$}L?f)FB zZZ#M6&c%K-&T7QsH|hqpDngEC8{bGQSJlxcl30rU6)xm@*~82ulmpVx=yTLp;;TLm zv@havmqYZ|BK3Y;b!g+$ZMU)X`r-9KxWU>{>H_>(b2_h-aNS^`ydgf(%_y@Y4^>+; zu2aS<@lh_+X>!2|SL#Liun9O?fN=KSpz0A_JG)x3E87FC!*M0ewMs91RYM}zn^0N% zois?SHs3G4Mxq%mryn3!=sY6EQDW3rz5YPiA13#@?yfDH9oSNY8j)LEE{FIjst6y~&KJZZ(xKL7ZN4mARWx0~WJpkXYD4mXSir z*$#%$vf`YfRRHR5$F64uOud3wkvRtxKpg&4-wvRj7~A|3_(|$F@EHl@?_x(r3pJYA z&zwz@V(v1x(cfmLvGN(o8}5TC%s+k;pl#6FsRi8iJjx)K_Ai~deFuF5FuUmLAGU>0O16x>?f0Eu=mbbID>FDx#DI z(F29F3_M^XxkbJQ-U!C8_`-N2z)ZZusujN2;X8t~Mduu4brc7=8x9TKCP&bfcqtg^-G3r05|l^+Oo~9if+QKFl5uypHx{tC=T!JUMzO zZ89GDflk?>2RmqsTIFD^jecPPRrEvhiJ%Oi!BwD?5lFTJJDB#RF5o@pe;H(GCF@z# zP-r(4voes~$*G$3AGd`c+^b^Yp`>ORiw04R78VFKOFdY8=sa7)Qn7~+BUpRcYm4cu zZ*0w`31BK`V#F=*KIi3%bm$1zJShv#6vXuim|IYJqmo&P&ez)py?OoOD5i{;44z;% zARq8*=4GU%@F{C5f=laRF_41rL*N7(-jD-6=G~ommor@$-d)9DNuSpf7$(VcEz0N@ zKM^isoELp&_A(xcMq(ArNkaeppUhMNE#(!nm;X9!1tx^iA$7Mu%j3(oe=`abIY@4GrqD^cu5W z{BGP&mX}~&`zoN&vcwV$aLi817@)@R0{H-J*FB=KfI-czN)m8iJv93lW3+Nzq6cH0 z{6a`OgD$(g_!fgEe>D0oBLa17od$R`uQq!C0SzX(9}sSR4?hGFETxo6AlY=YybLI@ zp^3i$pe`0JX-baT3}vN zJ;?)bsqZMA2e{P!$ZQ72*AO-w0!ElfK`!*$M%euseT06q!+F|b*w}CYSUs>@cL(s? z(I)x|jN3i~Y^Hzf+DGuD|JzR?1S z_b}kMA{C*tu#+ML_B=`_!h4QOu3KZvA{x+6Ms^Zc2z?Q663PnZc9EasD!3b{DTOoP zKeT&kp76f_FPs8DV=BDkxe^FtBis(4+I9=lLit#mjkr-ys#kN3v@Sk|JD*Nxh~RD@ zsrm+-#qi7Tg6A+-r`&;uu*$+V!cLIG@?^MygScGhVfb~eyKEtN+iD2hQLN9(SXd5* z^Ca*BP)6U+=?5QS7IH99Y~Bn`Aao{~%5i6d>)kkk9Ftc8XOP?PvI3baFf{)I0|>{& zhsPr8);=tX@S;4(P{lPxPZK!0j`CMU+n7x@ROq#)t+uzd`Rh2hXKT`sE)+M?;ke*&MV z?TTwarINAvIvAyJ*{~fPD<2m0hILT}FYISkE0&JDV7q_oEjsol+vI8)^wgTi4~H<8 zIgBpwyXhh5iN#M+~s5HrXsQFjNvVhvNfxZ{{1>L$k(z!Uh6nZ(}M ze$kfMajA7V&jpHW;?o)6$p(AOF>sX?$|tf0E$h$f-jD@D=X=g`=R+&Ij!>0gP5YzDTdXgwQ*wE%s-}jNKIWYUMMN4i zwjT6#U@)!Wb0^XdXtvsaru}9QX`CgEDn|?w$-+v!Y=L+Ph64W(e#LI04hr7lV=H9> zCt^^}ZGI#aqcF>tKINT)8l83tPV(nkVbKlpL+vo( zb;?Omli)0MKcwdWM>CVE(eHF#842|V;H?fkKBFoA3-X8gJH!{Uf|r*(L+*19J2~>R z1k)_Lg#R#Cs8eQ8 z4{Th|-N0QK{1N_zj9fg4I~&a&e-;4+-Nxs(w@I- z=NE3`wxK68mT~P-ajXJnqw=6acpiWJqGEWWFl8(krt{|;ocV?F%TfsC$}V$<@(j`| zw28=oqQKY5cX54!>ClYFY-$_9Q~%d zDMrx=+EUIO-VF6;iXXC4rK$|zhA91Wr7&6HkV@hV$Ua4W<2cJU_&;Qq%d!?Mf%+9R zM>4pCJ4Q1G?W^?^YI%ohVxSesAyXMCj(gEyDNlee>TYDWa(-!}lI5Ib>U$C6*%;Li z-v>~wdiK13L9tpq><6=$vsL*UZD<+7=kVGZPq3U3OTCid#jUXtOP9k!OFj*b%6FA% zyd{cgOd7CTb{@MF7a+~RI~N_4I1}wQpAg+APv5vw_>7ts{6V+^nBaL?n8*4wu13N^ zPw1Ug`PgOBHHvdMD_0?3PH?BsmmVXss~aS_q-TYpVh>75MyY5BZDQ-*KMGmQems`I0l#mze3|YTEt$i=uV(_nRgQ z7ciE@>=FbpTUWR8nc%>}+k6lX7)ud)^Jl2rY@6(tg#XG8F>=|SQa{Fb@^$eoCbN<( zf>@*TB82Iz-n4qbS`dmZNYQ zlrX9YapjGWq=Yv(x% z+kF=!3q@GBAg)|G;7H;;;EtA*NfqkHuu(Egb%yppq*nGtM>;iO$vrdFcH<+%l2gPTNo2a8i!TNhuCk2;v zWd(SCw&rI>B|1}mWaDS#p6ckjaoqi?iWOv7p$2FF!%0$|wC94ep+12|>fMkC9ujxf z@yK=}=h~wc`vqppsJuM>RC88(8t!H{@c);3h}V@8>8PNL+k2-2H}RfO-o_U zq?)7C#)6+^Z~u1oWK&Hc<#gT2l;X>1L{1K(ZtRuygN{gn~(s@d@wlA#@_(t3d^;n`^si)F| zRJ@ffCzIbN7f5eVyCP~OOnRID3UNL&WWi2xG@CN2MFQ}A#A6N9NS>S@y7A-%bSL!} z%3^GVY8rKZ@ql7AZDE#&Ohu1Ra+CZ48pBEAX66RJSt34Yo_}1F3?CmAAli>S6Vmi^ z>9yV z9xrC(1>qc|p8rS}0{Ma$HNj97`JrkN)LE6Nh=pDk?2whPS8tA#9AdY{SBSkhq3eeV zpTNsk4Hewvo^hKXpb3VK#PjQseAG|NMLx5ptNF+U(kvwn>8%jSjfgJqowN@@H;E); z5c9^pA|i5YU5U`02drcXMxpoTX7gEMS4Z1(i8}+)Xr78LGV)cQMX7`s#XI4>vOc+s zusnC7bfIuaTCW%o{1f{~cvsN1)+AsECwUL>gGHn{9w<>ZdDvC1?Q8@OY9Qq-x>3bb z1mWZgvOK@!r%WYNWT#0QrDsw;i#|#NVmyTjk{iL#_&24~mOG#%S=-D%yi!G{eIcg| zs$u`o1n9e{|Ehd-`0BHYS=x|dUzv|)R+d1rMm;8Zp@^gMi%Jt*P&NmRkAV0^=;k>U) zsW#CKmO<<@JjOf|zpe~t=qG4$OLeKl@9B8WGO}pnDAi1AddLSwIvumpTXu^XIM+v} z=FD(3N?&m2v(?r_qKFnybBM&iIhfqZe8SnYh*&Nq76p0Z{ariC*;eOUW3zv0>yxH^<9E+J{n@!Ft^4Hmc z`?6xxdT41fqNG5_qEqE%Y-O-Sx`T6Mxu+z8dwzD8xK;4kp;z!5MwzQ>&TwZD51V4S zQz}XNKJL^4sWzFrFw>%X%8g9Cr#Qk@MDCRJA{a_2mcjOZh+Z&TG!__h(#JUB-BP9Yd$7}Z~|kr^h%kVu21Tn>!nGO*dAxfhmr+x74qHU2Vud| zTJb}Fg?LcXws?*3o7{UUU2sh%w)256fNoTbnX2inerEWr-dRG|WvC8jgX#~;*J*eq zOX;(rT^_IaCloJTq-gftCH7HT7p)e?sJkY;=D(L*wpG+T8>e@lInGSNxEqd{!i(y4 z^NkC#%<6u_f)q%(S|1zJA^V}b5t1Vr)DQE)iFyo0?lM8W!NGX|{Ud7qTa%;-=9LYf zDj`Edy|8eo_G?{0<`*^Bx*+L{;!kZ_)C*ZpZSWd_B&YWNiV9I+?bi7V1P;3D@t=9N zug|{kyG@aNsQGZ?Ip^y zWr-SRVA9Ng)F+^Z;nS3^92|9V>(^>E7SnVed%g5-{cgNlE~oY}p(0~?%@pE+_@hRI zoD}{?KZ-gxkgIu0PgFw-)_KIrDt?*^b6d&boz1u7qK_l!x(dgQ_|tF(+Wk944AH1$B$ z=Z0Uj>f%J}ZMyr`KbA4{J8AW%V4!Yeg?<~OZv8Q>AM>t1Nj(+RF2*ZkISJF_6k2}# zunO^h_Aes2Ie-aPb~Y?zJ}(+!9m>ke+Fi4Zbul&Hm=30H@YS7woI)*{MD`fJQk5?U z@f0h{5SM8xxkjiO7AW#%-y+OtVsi);q4gw=sX$Ph$~m*e*9>xgCI4$^fmg?L>ZZXv z*G*B^aB-_%E9uCQMXqu)zjVrN*>>?o`zC=ebP9K+@g~YGyIFS^#peCCoI=Ysn@r!) z^dxV?JTxiVPOCtrYxC8U_)~p?754>29%ZrzVt5i)`kzE>myC`C{juqdWs<3-xpk%D zu3W?tFHXpqWnziGB{b@rL^~oYwUb2qf{&|I;<+oVig<~1!C{$~yxnEB)JZ(V=6Bn? zMa+MVPRb|6PwU1j{d0!Zd{Ai9Wv01`5%G@tFu6MXkfvJ>1&vd=%bzYkFTbR`;&wo4 zR09(RCF6xQZxOUzRe23p43R}i)@yp=*3_E4y4|VgjE{BWH!jx?(aP6f*W_tUfsV@4 zI&hg-j?urGJ3_itlQ#B}$js09t5ET|q1RefaNoMEwq(oA8mWbw5^3zMxgD$09j_6G zF4VYMy!=-y8J1&9Q2FGVnX^TbQ>y0C>4HbR_W%8d8@-z=b`>A)zENeLbF(7>la#Tt zxSdc(#FcK>x&st@q-)pwjm?&3QUNB9jdw4803}=!i`|D-g zRrZ;RJV;kLqGv7PQQ?Em!^DBDkJvl~J6(t^FtC+?LvwD(0G$ z*NwB7M>hIckFmCfHP-wCu3z15JP#4QhwGy`Ve?zH^AX0x*_xF?!;tT`tgp1vf4X)< zfAYxfVeF-u@y!G5{A6WAIlCwJjCB&nd3|8bc+SDVAmbePsFy`2L-=kRG@AsoovYL_ zB9a|nm;z>%v~$+AmpGWmK8w_aH(qG!2 zg3!4Zb+cr`1bmk|R^q8h~vY7a+ zy5G`2k(rim>8&+pQ-^Hjs&f5q`6|yw?I`8RSzlEO)%wvtgtKW@7S&kgv1C;wMl~fsy`d`VYu56=uEVs*keE#zNZu z-{tS2fp)0!1@zD?$8@vpr688Z_5gR|gV?dy3ZfI6mZK%zXZLP=MY+jw@jFKQ&ABmk zBa?;vJ{(p;=D**0t&G9n(u%1tpht}QDkAD5wpOR|>X|=n9sGHii-b`~XZ9-Mb0l=b zzvTDa_kIqv(TMBRX$)61^1zScm5R7NVQIR2Nb}5crLj%#ic^M9Gk1DCu^*8J-JEAdY-c{^Y z_46I7Iu%!F{A#{@TDcroDJ{uNC9D?Ti>8vCM8HY~xm9?}nMo}c^zJJw)YO-Be=9y( z`%fLKY;4U?)t(A_BNaJc`AzqTl83pX;g-^|J5*j7C3w7IO5_3J0@(`houmS3(fACq zZCZZMqC)?UxX!)BnXMyh*OjUoI}~6!r;Y=wDsh%rk{2e-lw6#Qov1HLPs8ohoC-fe z2vCu|>WPz-N5|BXC@8=GUZLj>wXLrl)-!0nQi|_9BXcQ_Z=1<>t8{PLia%6!yY5xN z(`u1Lk@5&q^AdS25bE@{U!HZQ7NzADdEd2>FQ%wT62h!?~HtMg$PTxY9$dgvTXSJG+Ue z%kH$EAnmCnni9!o%mK+h3XEF-hEqj^c&sn2pM>NvfUy+Q#_NoowEKQ!);R`c@=v&r zlXF0V`(C}aua)48{n&hkxC(#FP)!OUW{O^t6G^|BDhh=HV>+q(X|uCu(`VCLH#7o= z8RfnL<}sFVvWc?{zCT!k^`VU4uEmk4;~KBwkI-Co62b@i7~vw)0>B>lL@ogySNYhs zYyQnTLtDwb91GH~fTLG6GY&yLlh;9=NYp-G8>Z^xHbV6`FrfYb4g>Nua{Ny496y5a z2bxD;NlJyPDm3IH(BUol)L{0Km`vJZ_DP@fz+ujdNiRWPo_f#ls@c5zU7Imx9@DxP zyO=jqRgXiEgS;yI1Ei8lB}O8%%KnfJaj$3EQ=++z(P7kI+!~(|^z+EdNoM9*^l1N) zinr419n-5&$!QC@dbxP7q717O9pZ-Iehbf#^9ZGaXQlSUCxWpVtH>q%xX2yU(dh0K zOj;ZIz$KMo$A7TXTK-=9uJu7>qh_Vq9aE+%k3$@wcv1LM%4E^px3%hwA3`>X?a@MO0v83<}wRqg7o;9msAqEcl)TaKP?9uODlAZcXfxW zLh4=$k73$s7c(2NLGgr|fp#ye}b6IUBX zEP6%^&|w^Zk@j(mJHfISd(hgK6-B#@s*RP;driDen3di4Xg<~W4)+QeTi3F8YaH%) zBNTT6zrF5TFpdyuo!}wG?=lQJj3l0cC+=QG^DbG^Wk>5QlUrZXmsQSCd(k^E*=QQz zg&m-&fvfl@6=4huY2lUtrkaux6Uw?x+wY@-IvC1{?-4${q~DFQ1%q}BqdH*MS^iLW z;U_ASX`2Y$h=}%^ltFnxA0)pm`v)*nEt_vKEcBS@vCLhc=aUm?9l-q37K5D#6IM3}i3@2XWGR3u|z1#$79=VRY5PyfF z} z>ir`6j+1Yrf_w2Lo!*2bf^X~F;yA+Rre7I{iLDKhF^5Sj>hc4wk;-d(=dUG}>)1m- zQWM!7+bk??@v}y6R!dp5{vm63#Yyo^)^&^{G{E|aO(V2{>unnZ$u~1L=ci4)B|@M6iaDLJeYu zGdgIgSlgT*y`sQI#Rp8O(^x5tCt?0zAWP)6%Qnh?(`kSk!AowdrQHXvTTS+F)b3R ziSxmnVq@vNRjj1xIUf)@eH`Zvt%DA8K2$ja6F7fzegHl2?4%S14$fKkfms5-UHX<) zk5o+PWXJRIO{El>aK3Q`b(TOSn_wg1IKug~C42^@me!1Zt(ZagMd9q}^egDx1RWsd zJr5~h;8FV}iA*9tXdD(S;~#FwA$KSU`Wcisd8+s^<&(^Zy@pDbdXPZs2g#hWQ8b3w zD{DI4S(FslOCJz+uQ387VWnpXW2UHgbTl)KXRUK1&oqqGd?EMh%7r&68#E`uV9FV_ z9|5LDDH}>=P|qlQG6QLwWH&ZEr#+LN4thdoN&OeDr|*$OMKt za-4sfe9_#>Tt(SvG~ig2gL*;HY-*6UDPui#r}}oxH`)>vDew(Vrrhm5hL)zv8UCGi z9Ug7IMB=o0D>}(zn%j61vZ`?o{X1o7U3+ynrL;CM|1srU&5-mF)MdsGQ7u%!P~~4j zH5o>^B~i|4uME9JMcEo-3yIPjCo_^>ZQI0Ek+*i{(MFK}X%kl^Q>HW@&HY3vZ;VTI zqHM2U5MiQRw|?|>r}WgWbiGDir|}u`2dITInul;*i~CJU@aZx~`Ei(1fk#O2s;U}hxh`x72ZO=F)YE~uQyK1RBllgPeIX-<5?8A%hZ+rwc3mzExb^{k5%(orJP zUcZvSZ=pijj25;y1u*_ZtuIzmY&cR<^z>~a*^OY7PC7`jRSonT_mU|>QdY&k1t z$zhHG`Z&JTmVJ5D8VT;A=W4paZGfX_71$08KvTeaMketwXkv~my$x<>?#s#mZ?lB) z0nkDaUYiFsL5~)nWm{n1@s2j?;#o^93uaGN9b}>G06`i{#4cdPuq13DVKGa_-dz&N z+Q$Bnd7kx<6T0y>7|7YbCJa0X8$6#t?TB(5p7WSLX!d9F(Z`BjrXCfbk<1o!B_oo# zjdvHflDUJYC`x5s;iYE$W;yVdZt!HK@%)4OSf_cdivmG_4~$t0eL$m(A&g$>Wmz}l zlH@Dm!}uVXO%GuF7AIFPVcLl#1zVUig!eYNFxLsAW0IK+!Ncs*C$q_0q3-}IeOr_W=cvf z@IbXKVjJ*Exyx@2@JOkhcLa!61q>Sv%x3RZ-2;FojsOdY8uCCVKv%b(=mm&tSCx4I z_!{r6B%s8UmlOzO7*2*&03rG_K0E1O^)KdpqMcA1hkOEhpk&2AKt*>XS`L(T9AeG~ z^4h}ju|Rb5d4#Fxl@l`#0(~LE+I7HPfh`{sa(Z=hlJ#0vg;7Ply-I@5*G>>4<-XT<$*0R?M$8E(!F9epPGfY@r`iC?=d!&w5 z9SJG5(+XcfPU-`j&w*_!MZ7N7whrDL!p*Z1Vc0=6< z_8fMgbp@piI%c6&Err6(XY;RuJtpr>yFow0>5Vg4d-UCF@37YBla_Ze)%pW77c<g=j%qru4e^VX9xesG+FJI z6*Bs2ZcYQ}TKVd~3d#)G<6<@cw{)QNEO=VNtC&N&E+$q5S6mYnRhQ*n5kfd!TD+i_ z@H%>-U>-R)Fo}PU7V8lzXl1S+bxy43*~x3Ag32f~RZ@W&#abbbuD(Z%5XInl(+ z#CCkA(2Mk_^pGHwvS{mjKAp<6F%yo{enf0TBY-}CdvqA{*8)G(z^)s)T`&caNal+t zQVzraiJYj-^pnC7G!4#9Fq>XpvXJi!L}yi^Bt}3|Anzn|LwFG{iB;$OfHwsa%@5^i zxy~cKQ8wZy$`{!&J2}gQ!&vULnF2djTXimf8aTa3hkAp`%y&EzG%Mi}auzxf=7!|5 z2UeX!yy4YuS%`_(Gh#1q0{5waFPsARLrH=K@F~h_{$luP6%UPoKNcM2ZRL*GyacJ^ zddJV?e&NzX*<3a<(I=c6!i%5V$R!DWJ6z&UgU#rmAYO2nMdOzXq@+ZYC5Wo1;hFd! z^6ntp`Hh=4a^LZB8^gJ4`1v9Ia36nQ1q+T5+?<1fmq?WBA7|NUKk}tR_JKd3 zrSNTjf^IQ=Cz`LFf@|TGX~ve?A)xAK_ByUm`6*>Lyhl0HHihy=K@VQcS*1XhU0~x? z&eLx}kYdr_iemrAA7`ObccHUtW>welTutYSJ|jzv%q(B-BE#CGPB=mz9$Cl9)wKq` zVRQ8pm#l;MdT`1$aIEbAw+^49&zm{q%e+^Ox2i56_Zp%LmvQgc9m}kMFIX!Q?s0C{ za>7ru@71RGe}o>?ZdvpWY%y(_>%>uXNnOr6uLuXqW1deZAl!oc_T$S@R;LFaSD8n z`$=^l7r-c0rr{Byqx>-8e)(;nudPKZg^KoyDGk{SL~%ka+`%G6o)sVf;7dXI0N5Vt_xei zyDHE3Ux^GTMtWv(Eh>-60Ng9}`CC!cv(k&Enb<%{j&XF!KGAK%nCwnrwSH0>kN-gz zyg`Z9X!}BE^P;p(t3Dwkb$2}u!2`OlF3FrkvD04#@>^SS#~Dne#J!GQY%kJSL$lrs z=hiMsIl?zsHpHAlSJnIoiAIiEWGlaLi!Fo&C7j#(*a=E#r*PF@1!j9qbn#QtQIl8c zznJlcvE~03v2{+BSF)aICSm?d2~(xw#>UK0^bktcte5AJek|W5qtO=5E|D)_4INf1 zGjUuQQD&El{lrqk@2bs}{kjL$LkkycK4Gm}r0UW5xTNpOG~%47=kfvaf?zjU6&2@Y zk-|XPtP0s!X!Ecr+s5A``W52~tRDYee-C%JB365vuq^+S`Wca$xkfdDbRlt{VhbfM z^04d(^;ghXDGM03OfHc#PtBxBzHnX-{UZ9r?xN)yE|YXPNOzIUFFUO{N`dpzRgb7( z#&xAVO_We7&!7)PaHK~VlLP;gaG4jEa>X@J)(nOC5wdq^n=pY*qdd}Iq4!`%Yp(&x zr5Wn8!0(*5$`=e<@S|cVvod~`Y%^;^c!cB_xH;gUSO%S1!WH$xQ>G7yZt~NH7Vyup zW624+@8D-lxpp`dTil@@2DN6dSGuuBr(KW-vKbpGQWue&Wy z=LvnQMZ?gRMInM0Lifp9!7fRHogMrEv?Gkuc8eBP%upW|-YSSxo)VfeugYHw3zJi2 z!-eake@Qk8<3pB+_6RLLhlI(ZJsu*#1ZnHUFuqaT_qQH!h1xZW)n(P{M0tIlPMIQ~ zx_PdgAgfK(N>N!@RG|2>%zaIs$WxZT;;EoRerN%YAEFxWG=L@wfBaRbZqvNi-7INR z+v{9%_9*^nvo{6Gr)v(z*Gt_s8zKV5`I^9>cwwF9`v zRDIO!u3cXAU3Jj1B73r8XH8O?RCdbTyz!;voH-(lEc$IW1k!{aHI`*oKE5W&70>HY zzZ(+?|BG(?t3Vju*jW-%(N{mK%%xCXi>MG4^>OHlZwPVP4`C}*zcdS8CQCkjKe^vPulvuW6E^Q&29k4|yM8gie zLo7{aBwP(0rZJH2`<_(&q=qj#th@pcrr7o|*c*oImD=8VSav-gJF_gf_6e>qe|}9O zejqc*_=jkp9I5XmrN?a5TF7@oeyQuIwX4pmhSN_iJgK{3jH0QbAHmvGFaeE9@^>yR0OpWt96u2#beFPdF5lXYb4YoiKGNS~+=124sUY2HJH z5rwMzY>!|!#Z%6r70cu?@V*6QvXQ)PE-IO`@SL3;O0{jm&azy9!wOo>B``lL)$kU+ zm0GE5eT282LM4xcHm8_<9<(cySl00O z<-3}$@V8~2H*DtjBtOtO^S8%7QXl3YTR%bBCzu^@z?M#Se%V>+N0DmoH%YPd<@kBx z62bqzNJ~f6oRH#kwWb`Y+vaupN0LWL(OQ$FCniW8D>1FxqV$v8^!qJeA=NMWEX|fb zoAXAzOKBN%N2o`8|5j8nxrU(b%l=>*r(VB_t5>UDB>>teRYO#!>a|L+wpeja^>NiJ z*>25x&rZoL?fqHH#h%K&qgL~`BQO3c6b-BKGBs_b7*CqUq}%BejQY4|n%~CEh(49c z2n2U3@Fua(R9UzN{)3tE79e-w!JJ1&TMRcQT{k3 zyK!9Q_Kg$jR$vriofZ?ee)VDVBK$k=2}U21Wj?HbK=W|6?b3h-J3GZDn@fAWqo<-L z*Q<3`)zrl~bMeMgOgP#JjDG?xS|UualDwRcnNE(2s|7rm4- z#m-JP-{#UDYqwMn=FDzs#lFh0G`8dCCZ*QhBNWEk)lMV!hwd;-$#DU1jjq&1UeS6_ zIyA3ady@IqDNYx_Ewi(e*rpc>8`?C4ds{y=Hxh6DA5Cu^7UlMS4+Ekgh~0{aN(fSd zl!Qn~2uOD`z;x#`@x*lZ4Bd!@9mnqOu4C)5^O(o@&G+T=Ui<%bJ+t>cJMOjabr<9M z)-vwp_;x>FzD~Q-IhE~_(AK8mpd+8O%;Sy?%(gD#FZDcOJ`N3xx0vrsjCOWPKhUn8 zyrzY{pyKGj2KI}hxZWL{ovZA-UIY5nT^$}={xYQ1%3Bk0qd7p}9#CkBf)>nuZaOP$ z9{0{9lWFbjn(Rh-~ED37o^_qdyR{fzGHIrWNE%_HU9?fs)tr%B^4!S`}7ieo>TW) zNoz)L#|O#fq=2@?Qtzerns-Vshfc8M%D(t&P2o!IY!?GnV|Afw>5~7Q{#B@f7s~a; zw7#iIO-_4vsO>85-N99wm$$U;Q(laYXiiW8A$jJz>MP#cjk|RlW_-}U*U6npYFk!t zIHD>&2PFFTBA4C+`o>kxUC;EZQt2Iw^cBlax7vDdN2Xh!83u#m%{)`ff(m23dH*z5 z{a%e})FfrAaP}}l`N4sh=9Gffy)5gitgfzJ>+X~v?f+O`#V>B1(cB$wwwham0#IhZ z)-&^#8E091CztA*RYx2rC`Q2thY=2AaK&{$wqx*L8*U78i2dj%t)s|3II)UB86w6A z#?X?0^8?x+s-}s4^ zuz3;2h2Gl7z(p{}SRx2>SkbcEq|t05u#NnNqsBW@{{i-v57D1b@tiin z(4>h1UnFGDAnF$vGxWVFg=gPWgjvn&H$~t!@fS+I6ZY`8vA>Z15-h=DDW?U8%Tj4~ zz^t@%raSm2l*$%C-zR?L8HJnv!q$fiTi4%c=of-r+2}wa#*l{16DEtj@eH9SE0HJ_ zI-qZp*COvq-cZjWf2T$=<{(cNZ)dTEITNpO=ZW2Rde!bxo?L4{Whs7jjBfm;2+~c! zERi=Lzi>^mR>oq2Ncy<(328{Wq}YdYL1IkVL?1743yEZgNJfu8&f!YX+kV!tOrr-& z>H`hp)?JN`y6@_v=ya_X+JO~oe5k+i1FAcyW5jdH=E4y2eFZb=E%k?-6SR`PTOR4Q znsrXPd5fWXL%XfJPhClCVY8&cvpG!J)r7V93(S}erkUioxC@5Gbtee_>hki7N&jlr zCMHr|sKWyov{=%&BU`3){d|BZyHg6XWiY)>uP)?~i*Z$7* z+QMzW^|$K3ZVnVXHBQ~2U^}9{);z}2uww@F6;-&$y`;>K1a#NynDs<)XPCE^IJISx zBZoyqoc9wst#Gub~HwiL{Tn1X~Hpc)|zv8fF$5h0SAaq&_c8;v_K= z(w1?fSVu!=@HYa)2^m5ncxq=k(GP1_7f5Qyuj!mjnM=H{Z=~YMCq!Meebjx--wY4B z0eykl$plKCvlG}^spEjfK+fVoo{FcQ;0zDIHQNL6Uets&GYMyC1MM?O2xGohN&dlX zM4nJdtnCaG?GyV=Qwc*4gcdim61eP?SDZlJ*N`kOL2zjNDeyaTf6F&)GGIUO6+aW$ z+Ui1d;F8rAl7%-N9z*ftoumy>kMOY#$LMsy%%TWpsNhoaNwzPzCzt~iz+=X11Xxkx zreo+f_(b1*tOmZ`?25O*Zh8^@RMev73;!lp>MuBR81F8gi1gd@rDatR?wdXK+=_(YmZ`boA*F4kpHDEd01fvhhQ|FQYpRLQCJ)s2pNMi&Jgu8TCPu%PC%G!>t&z5=`$BC^_b(t#TIf!8tI{_A`4H!8k%FTQLpNk`^M8& z>AOd*U~+}={Y+HFI%e~hM*d*G>K%G({|V?A7SlV9&ccuDDrkZUz3u1AMv}r>YgW3G zHO;k4t0;q3AFn|Q&%D_&hIJGEyEg=tyzQk$)|kE7tayaR3`GhKW5=&^r805ds~LW+}RvC~~Ag##%)UrfAst^a-dj96Hmb;0SPl^)*SvD+A^S#R@pQzOmng zWyrksC<>mks@tC0Pu*|YMSDq4kX@mFXBc=lnWLERNPk$~?AW>vb`7T^pT+r$J36t7 zTfo~Ln8z;@TpQZ~Zxf2vCXoi&10C-v&K!baJCy+>NnX&pxg)uw>BqSjh}n!EJVWg` zRx-aaZ#sLOVCC|qKsHDXp!4Ek&#~!{pXkD%7jZn~-1ZMi2}SD;QKmsuu^Y7*>ft2O z^w3TGYWg*JLbaF~0GH+*WOcz?6TCUAkfcQe+yEi$_DgVB{ImZpevg#gvXhu69j7so zen>VU?73s4 zIE&_!RYS;7e^XVG#;g8=4v^8RDU30cZOTxzh5B33P~Jn&m9NQKz}O(48v8eEuI$3X z$?Qz|@zL(w8DhU~A@;mwgSiUNH3utFiP6T5f*mB2A&xeh{8)Fcv6t$v<&++vu{Dt^ z%jpNz;W431CzaL5m$^%QWfa0e2^%{LvC*A<##r2=_SLdrLUU_BZ#D6K^A_?Sa-?M+ zs*l1neJDOj-D!+YC($12Z$~H7-|NSC*)t~UcRMDqUc(RCr(@m>j?(?csr!xM*M!*K zIRKg{?m9?3M%vSXsw0q}w#61|siRw>Q+;TotXm__(~eu0%qyjZn(G|Wxbq=p%N@+A z%>kMRxPqZ1;T!zxbtP;n(QD9vzf4N&zfr>{SM{Ru+bNCRQ7QAOk)1D=v{L&!x;@@g z5=~aSHvTsdy;=*X>UOrR;W{?d=^k?-^kOlE`wF|A^OjdgNGEW3k4XD!0KSr%mb+Jg zryp5916<3316IJ@fX&)ivK?mk>(~a2q-8&6DXvB9!g)nFDO?W#q;2eIt}A6d?lE^O z&0Ouoqchra(EK`9&$4HNZqBDgDGf031&F99FVdsm0jE#&V*I-dF4QR`~iL|kLRBQRtsK@F$fDpbG!MBd8}3DbIft< zCCVt)D2^}W#ySJo(=W1>z+?1wP9^tf#SEKA{O+t1+${d-xMrRS^z&;HB*D>ReBcY> z+)g5`N04s3O79o+%kvpqK^H+BQw~N{Wi5ZKZOQN5qq9yOraCDf9tFJ!u3(DBoYd zkl83-RCJM*C)A>lac3q8c1 zO}nluAWWd|(E8OTGfbK}1wWZA^}f#e==jcVRlLilc)C#?_e#`J@Fi3e%ALo zlyIFPv$3CK?lZ~5$a{Mvf=iSaT^!n5>d_7s`Yp|-CanyjuWZ?x^NKO5d3KT$qt7}q z%$qsV8ZqY+!_D}{jw*-+v-ErtYD1IMm&{rFj(4A89OO_(QmY5%G>x`(!njnvl6JKF z@+wCOh1gpZYPPiS|>v~ZZfJC>c`9Cq^^$mgq+)Qb$ zKtYJ*4GQ*?7E9 z=-{$ZbX~OBIshDII;dN?3Cv%@4cz1GQJi_a`RuPm36IP9x89Za3iwn!j!)pZrZ)*D z@{^;#3O)+%`&2@eaFI)Zut0p)yq;6cYgKOHJm*h?ZvuRNJF5nGDVU1C!_5|0Ynj~B zpl6|pR{|bPo5pv6ps2t3|HAFwOfcQnp>GHLS*$Z!+1|obc|ZH4Fb%xGp$Kysjhtt~ zeC%5wS6Efu0bCF=@-K2LguN?>JSX9g$UNRX(bffL`EMmxoz6g0M9F&Fb6zn)+ReHm zH}em&jdBlKC1q4=$Wkgda6U`l=e`A)(yhr|TsP_Fa2@m=y z8b7xk*r*9|*vRvOE~*P{y-)E%Ig??X!nw-wHy$u`Hc)0%KV#+9}iTGhDToh7nAG;<~y)MT>ZL zgFhT7y3ur!HCiOcF2>&w#o~u*2SwvavkN~7e^Q3hOd?PEifERom9@?v1i`{93R>6jt-4E;=%jZRPFldl*hx-QF!TdqBht{$k(9TdZ zXG*0n)C&aXo`DW=o01R1qj=}Szrs?%!2C_Qsey`+qvCUaTav<|O|GeaI!bE|CbnIe- zAV>P$!%nbA>EiGLM4)ey1H5_-lP}=4tIyF(c{kMyv0c0eDpAc9-fiWJqEOxwC3fW$ z-bdwx_&WZ0<*eYR{7~h8vmfxMXd>;p_#Z)>Xg~LqnZQ-@985J-3U8^g41JInZz!z# z&I{336jbwqb&B*9UV!#xteO|3JsgG-#7|9ZI z)B6gD5Etm#fx5@7?+z??=Z18i%#m_kI}W8Z0Jl0aqws*HquDQ*^P){W`84aQdhGBw zDE24=>$bp06x;?s_9yw7radH*+#Nf$;gO7l2g~!M&xtQ`;-qW}D~T=5rq5pTRw`vJ zn%60J<+_h@P!fgX<#uv4ItQ91`;IMSRmf0y65)yTBC!}%DUBhgmp+x8pr&QNk)Y|M z#5hR`b9dMgiLEE8r&0QlpFUDAUyB@-Cdeib-U^zedXkv2Q#yqbh|iPgsT=B^NXF5Z zl!(OZ7^kz=h|5``<>SN&99n3fn9hAQ*C^Qqc8^>rvqyMhhGYk=kGEQqLcd7+CBDIQ z#cmK+v3zTDMNin7#V#T_2h2aPPoB;Od5NnY|jwdAyWT|kgn80BH zAH-vTSZbbV4R;)7vM7Rky84Xp5Kmob6%zPW8MZ21K~B6zxDXVC&~3faW9IAd zIwu)=pzuoB1}PLXqQy|QLbV8lJXOBa)!=W9$8bFGCLk^bz;*{bsy~4J2j=R>)gFe& z>k3L_kWsrjn*#-EeUe^+r!>zZw}KRHzMmYNpiiATNl>JIGmHRCg_~PiC?vR`xdr_Y z9&24&y&tkzwiSsWSM!xjI;b~CF24s(H9Lkwf>!e@UpxLc3o!8)Z$Not7=b+>UfF{q z>7hT}^P8qa8#+H!{s;MWoG&;8s@tz;Yy?NP=O>I4klNeARQyZr|9WlbJ#FTX&*R1_ zMh_!E-}R`v8|)prl!icZht{jf+H_1a2J@)uvFZ~(rr@h;B(Xa~qzs`fjxSbhqdf|# zQmkc-o3l){oLl1XQ}qoV3D#=wqup46nwywEqy_3rc>Bg&)mg%mN}BQn=}LZ^;sxbq zdXi!R-8IfnzK)3tj*+)ZCZcXCt&}en z;}qRAXzC1*n|ylLtHs+RUil|(y& z`>HHujHpku^->Eh)5`%CG3S|VD|`QneXo>JPIgNc2Td#=V?5{l`&jc)`b7kdR-YT)Qna4;WNZ#;`(OnXm;8%cyga=Qb ziIXDYJ@z+jDv@~hdc`xomaJDy6-+|6$rA-VHIA}M(5tvkN&vTKU6ZVXN|R2Bf5JXd zZek6R{Z_mJ%ha0Jo27^Bp*G)p{r1nqMR>`vVDa9kj>21Z?1)`YNYsn{ssjW|!u0hheR z`Rz`g?vS6keoUO8T)JWyf%&!Bt$rK{W#Kj~Yx-k)(IluoXROEUDAwxlXgLF zWP-MU{3hy>Moe3~s8o}{{C7r$ZW4E`-7-x%=+A&Hy-ha=zs&P7+Zy|fJ-Ci4h9Q8! zDO#%AMXJyIt))^3NeP-X+S(v;7S*IDJKXL zrt(!aLYrr@%%*=Je>A*cnlX@G!9waHb-rv)S%l^Uhm_~9t^(-kUaA1DVOh6w3h&;M zJVmiU>^ooK1dpE5r3{qVR6UZ%pdBe!|Ac$0>9)?m!`4W(G5qNzGW9+F;T%xK6acBP zG6l?uAF1$%ibGGxQSf>1UfC~E?xgebkFtJSCG1gK9qUouFL<)esbDWmt~#xu!?%l$ zsojyK+1r&nk+mti6(V6=Y`h#La$W2slZgon9!S5)k|*$G1jW_i)x_KD;w5KMu-0C3 zrEe#p%iBkSQRt}q~ z9aJ5xyQD#@1mzU{Wt|j&P4O=@RZ1$@6+9H}1#R)Chn02e# ztQg$g6%Wn(hyih}reN~6P{8O;V|k+tJDHCsF>STpfi_m|zK!S2>-gL>h-z&6iV;)} zw0y?Z7P&RgB!p!*SdvK-Q~ombQf9=;jB{zli>Zcjj3-_d`Yul3M3r%k;Hiz3yJT}b zc4}XTpH=VPx|guJqO$okDX!p>c3XL@XOadZ48im{N7sssN= zDpy%{gN7oU=>rs={lXXo9ZD_Hqu{dmJKAfAPxx(3xiEH-i+YK8s)vt?CA&UGs@x%E z53gq3U)Fu1>hgUSJ5hhZLsP8iM^=eJAWliyuiGgeh+C@lmLxA(tX?Nw?pLcK$@ATb zN}1}2%Nlu)0&oHi0%v`xg|56c_JW1!Fz{Q9(GR5~$f_jw7 z)mNh2t=TxMT(M1;?YvfcNOX7j8>;n|4LZ*f(DY3=GFNJh(v3=gp{MHH5`(qvx|C?0 z`lsG8q*B#r_{%#+>1JY1*U10VwvH+k{}iSTBUIU0k}S`QKA7~DfmPoP*DRW}9KDOB zFF~U9v(AjXq2@Nrf@I3;EqfLODDYOslyCBM^&Q7j5eNP_j8J=FFuLAa&hEe102Su+ zqMC@=h23r#MruySPMmgGKwA&tdIYCsJGpSt<>p$N*IXCtC+04(j@?ca)PFHPw!CQQ!v>H103XNGocC(ld?X(LlM$N9F{9!*P{tty7x z@24f2J!;MZ^KPzcOtGZ|nQP;kd7xcYL7$wotz>jhB>6|~Kb?=LX&L{uchcID?OV&} zzhfPnOPJ;`#KL707Jf7P0Y}|KOud3tu6s@GqC%T4=z=YlY+V~@FN%(L&!_*&S=(`y zIV=5mn~@chc)4X2o3QkOHHULG6lrU>(<+;sF-uUP$3&2kfd1mSsob+KDS3tjJo;CD|r;I+U zyCm)!UW+nWcQ5Z(!T3%Be|6Tb_E161ioLCN;M8TCtjEFok(7?SjFJ&x;685c}+*WOjeI|yaxp#LdJ3Ql!47%Q?v z+gBQ~sasn8jB64Y)^Dbu=t<@+=E#u4#?985zQ^@vtiRklbp=MJaUEKFxyJ6GEtcTn zU$HYB?N}AG&JDAtwy5I=j*r@?FC_#w4T^e+Z?Jn<&g6Xr3Hmm51G%!~CH)rl^NMdQ z5~Di=4ba(MZk^CQUg)-O4VXsX!H-Qiw9P0D!^e4RnK%-m9=S}wlQuI7NI1&%rc8>E zKBiboJII`#a)O!5jt`b|!hn3Y4se>lY0K)mKSZyALk)4Hn=K!jw3L3e0<(t}0lVTp z(y!6B5#5;u4VTCjtcOKQsDqrcod@t(}ht_iw7I*~j$i)73|^+^sho zPO`m}dh~40Brpui0me{%}68ymIYbUqF<21Jt!aYS%?-FCu5$R(doE`W*>Dbyd(uE50% zEnM!_3eb@=>+V*3mCWl3t3DwxHxy4PFM3;%B*hG=6<|=-_C?oeI zDMU0j;a|!z;XjN1qE8h0x?X1A5v^LYtDK?<>2R#Vs=f7PwV-N>I1}Zm^ye&WJfK*C zUxta0Q>!lEw#inndQO}weYY&03`$G=E>rWQl+kXi7_rCd+%l}Cp!Gmyx#^%bqNdTf zKp0&=Uf<1{+ptSF2b+psqS;>Y61z!VkiCFlubLCrO^R2#`~D$|m1(22jN?L5|LD@y zookz=720-%dR(=>wHEfMi)>!S7>l}RiAE2E&Yw-*LrNz{P*D_m-En#VZEL=YIh%1gF`r$}o)OT_ zoeng)_Cr?%pVxv-?+7KG(U{YuDaNt5t>ov@WCDvS<2I0z>EXm)%2dXQ+5(ybt1&Nu zVbA`#JcN}D99eXbbAz|WRRRVSp6F;X}9rGfQ}YaUK^b?8np*RtS|9L{hy_koAZn60+kU+F7Ax;cCV`QRZkp z&>}k1y|w0`^1ivdewlKTqPyX`A{h)qkC0nvYq5T^4^4UaI9Wj%l~^V{n(0i&NLR+3 zp$ergKHKRNr8`EwWmi&Zm9 zEQAtOSwraa~-V-+G3+Gkh&Xxpcct;M&zlo0AaHc#YEZ=7ez zB@Lign)LNJY`NiG(ONuKU!K-TOx2Bx${>B#M$O+(j?g}EU;$U5buCw`c>Oaq-)i3W zoEQ1jle&29I}MtSYyz>Vx6P+E9@Ez1T>#-U*2EP-glO}r@FzsMd6B0V;em0N-CF)b zkZNtM3Lo-S-Ko*9t4F-+eh#)VpEWoSIN-o0r(RlhA^K(ag1i>&!A`g2Xnb`0*06&F zpSBZoPU1*L+dB)ijR6KlR#n{ELRHrmY46cmP&^Tb-qlphab#@7X5d?ycL`&v``G@Z zhdDPme^E6FqqsX5SbvkCikY*(6iq7_3~!`oYBV+0KCz&0{;zfwsaAR!~)ec(kHSX z^pa9cxyo2WJxWtx`1GZWMHQzQXPFPP<5_IBDQ+u=1V|U|;a=tK9<7A#!Z}?Fupek= zO**`p{!%fUn97_6x{xlg!f4MZrR>V4+te2vPI(Gl1+32ehnc~>9jj+&@Kb&70eW!S z=pwKL+0rorGl}bLP+;$J&9Y$pR$d@Kl32()Kn*1Q$1iDIMrr4NC_PQHF)}Ng8SY?G z49HppdHRm$6vL{~-u$n^e_Jm%*<5~gwU`DZRWb%wiDYrT2~&_FavumfQqcLU>auYJynyMJaiM` zOPFA-Bg+x{Lo*UT7k@zQT7x5!RXzowWN&5Bib~1}#lDCETDd|o&qN=g@^EMeOwdu& zQdB|nJ^8DW&wWq(`Ea)KBSIJJ+FpZ7)!A^0i^kMLR>c5xm0`*Tw+r~`!~$cU1m&f zqDW3M9Wnp##xq~x=TcU%ZWEgtbnM&Yj?z5NC+fSE$=n#ml9=VZ`>g#wU0^t%bha0{ zK{r~HsQ&;H-`?wpt?E3$sRsHW%rV_2;yD7vC(>~s2==G=0k4=xsaS3+K9jbQ zS5WhY{x5%Qp$jvee>WAwIw05)`45K~ki-R{f%>oU=^M;-H=TM#O7O@i2gVRa( zpi}filwjyTY$+9nBdRCRF2J1pB!(}1DdiZ`io`~s*pbNl`71fsMcYQ=`InJ(y7f4N zv`TV<;3akD{U9EZY^Sa#drAoCRTMxHQaOWqN-`q1l0HZLD(NeOCH}r-9rL-Q$uoxa zNyc^f&3yxp(h#v$jZrj+_fnq%eh~Jm?QQ3!hbq1iP39|+@&w9R#e-EQ+AKw3Vj{g# zF(zyjqeYSC5zF|ZO0+x2?*|pCm6(eb2gHmkF&$@r#lJGDh)0R(hEfzj>eC05R+3-p z=4I`*=}DpzaI{sL^@}~}>6*9h_Oz?o?%{}1cwo{xZb2CAh4v`sDSSn12fmB2zGY%v z74d^rSDZ?oZ&|jopOS7~5FbxnX0inBpx!e_&Wxjwbo+-9q`%QGSEB^=*qZ)I`ro*} zZBCE}3A4NTH4d}rIMzHni_a2|DivM zTn{F)RFp;H0B|WaOYFiNNn?p#<5$oLqFZ%5Muq5d@kM5u=yql|D@ycp*>Uy~ zapvODoMg!i_gBDF*)jX2{B-b|q?R0|n!pdB6euD3WGYMX1&gAQ6qK4idcFKx(J01B zxjEw*bFn-xemZN4yfnC)^-1A2>pFXkDtI^^M79&b7{v7_%WU3T9+W|S5*nbk>q@JR z&=guuK@c6I*_S?zu}rftb_LT>V;9I^c55EYfEl;7p~DEsV&d)A{hTqRFD*FIZ_3o> z`Hi)dpOzODo2l2#hx6vsx=d$NUFemj8B0CrpH1TzjbcQY{+rrCyQaw)Mu0qswBA3g zNhG8vo9Iq9cCACHD40(FvWL{b_A@!f)MssbQetSTHt(o#+UGX?!t1n4t$vd}P_hROn}WBEhu9;*`IuQm23Ud1K-~a42=cOI@IC3|st~A% z>X%pveV|*yhTx~Hs=0rJpMgD&Q>AwB5$V6Yf3Rq9EI$e#$;{^Kh%@lF_|Hf;>k9-WgYxtu_42jV&PcoSsLMHH|xm}bKbP;b7 zEfD9!vpwNz_woK={8L=RuV!UrBK$Y(bIYCzIsoG0M$p6q=KO#wK=Fty;Tt$c6v}B~ z%D8x74vR`X4GgfGFfiAhlV20WH3C_MFM00VjFt0vyLh$n{d_vVH6&L+1!L#9K~s^% z4qp&GY=?xi!+Co-O!ftSH2D?>&p*^;>j)x9t||wJg6jnff&V~KI*n_ByyDz=8Su!U zv;4(K#q3-GN369+Lx;hCp%pe2ixUfBtrt!w{>`oyE^b)QekIJQAaO`STSFbrH{oBY z{eW0BC1xepOSCC)Ecc%H)yx?@t=yKM1Z_iL>lGz$Wk{JjD zYcP&Xd-K-n_e`$oOwmc^3*(Q>y{v3wO`;cT&^RZ26YHsQyU$P7RnyQ!KFdxC4I{AU z(ZjnsN$ob}QfgB$gVW(q<;8f`_NdUF8Q=OU;~Ue^nwwD0yw+M0c7pk|&DHBLliVyB z_m;6=ery;4IVy>*+6u&pU)0u89*CP!3(;4_g^fq54~l1D2!#WpU--!@M~c1^AH;Ww z6Diw5N+nM8XYL)cI`%euvZ4U|4*e1}pq8?=qR>Vzd8KFs`f^jY@E-PORkH99A*>)# zxSK>t-!D8(xfzEO&8FuEFBaWq#m?4A-GC4F^JOjIF)#*Mi#f`CfEaMC#C3>(fHWLK zsHC)tYY3h)Bkv6YP(P%>NGJVGY!dR7H6duSaEP;cmRh`xUtxD!@*imAitmHa9cz~T*dfui~s>zuIegU{CSD?k*9nlw|4*tAF|G>rI zlo{)VBazp3#iD6o4CkO=De#3N7F2M1Fhl{&E3Y{yXy%VBaTaXk@5wF~+!pYX7lD(( z>Zn+d0fGJ~Xd03*?KzB-*z%K*7_?)(=DR{qNNf4a;m{@tp9`z1?%K|RJ4Flm+mR)i zQvO|}FR@PGDvXW*1%)E-g{uW8#4o3`fNJ@S;nie3=gpG(5f1TINvAjb#gj-UR!-xs zkvbGK@s3IDGA{D|kNU8BT%kI? z{xNsA>S1{r_m&FGYvaCC1*g04oK$Y{?|Cy-0byf!fvR8L&NfZRit%5#PZa;V(^4IP zjiye_Q{b8LL+wNEY-2%bIybZ;uVu(w1<3<=p#=hc)8J31T;(8j}=0m^*)3UK) zK()+PT?N_(c`^{*o`iM<>e^P;%mmb}*kU=LZ&{Q55D>P^NXZ1)Ep{>Ufy$PHLH`1t zt+k$!9EzFYdWSPr>Nt$R;iv+t9#DFe^jZ?;vtmDLQms@mtFgIsf?R+JS*4VZz+Ft? z%X)|{(e1K63S-e3c?^C3^gqg1>~pq!HDX(fy9x@*kvvs?v(c9DlP^OPYi7u{VfDpp zWr6r(*-_Htq*2MqQWk|8MUx8X4*r8O7uMKmUlek#k1bzKv^gF}$gX0%iF{c$_H*Mq z=~?`hs$OY6vArlm@`y~&{3KCP%aW!_a_Nnc1(I^+T0foSHitj;i+llp-*CQ~E|#t& zHsP;JZj$-(KeP}ew`?XU7ist8qaA?;0d1rKbCwH z{^GUy;zdls^C{CMr9z4=Up)%6qd1GKoG1+1rZ#b`eJA_~+%CN&G;`PHt`$b|q-hf5 zJD(o^H*!uO535IRgYUgx3nLM)N$H|PlGx$3Ams^Jg0oEn!X;oz%}nGIc&_*-(gCHf zx`re{*H`?7IKV*MH255ny?8ZzSZMGHfZvI~Oc;agl-(OfAfOSM=s?32WQFKKRVCsm z9$S| zu3}$BGaRAlC_tdc3MjJ-5-WhD6evQmHhK_zu3Qup0Js}XK^&E52))#Z_Fal;H z?5;P|xkI<~0c8V_UiWvN77EoFGaA6-I^yyyutC=r*#p|^OBVeRbQtsIJQGYc!Y*li zy7=E=gr=KNOY_^BLMXd=MadiRw{=g>9nfGcN!ticwMHzn`G8s1hTjtSw~Y69;rF%> zXN!5q4Qb9cm8J+BMj*W~=&Pro_vuNsNbP=I7Rs*lwKlbpk~dD1f&P*{OHIKkm$j&N z5p$O;Rxv4IJ_l8=>H10gb)oFnHdan+b3CT%-q&Ag%F{k?cwJMed4!%&VpPAy2ITBh z+vAHyq-t??wKj^Y+JMVU(*^6ppeV&0#4M6-*_v9WT!Hh%Ms7K_$4+|balMOClW zJiK3VxT=}(brnUqmvn7Kzj74icwCNxKzkfILJ`bN^fD?$oU92q)cyh+hk{(U@tf7^ zF9b4brFtCkd1bB2my9XgtIVRj%^ss*(vXx~`Dunws%SP{7^_+Bd;%FI zdN+(v8zB9vk(AAq#%R#_0TPa;ZlyuIUV}+`D7vKSh=~@B*G4V=F3i#Ce1?# zS`Y7P4w)7MUDfPxl!23x|5aZ$&S;@mXI0a!A8Pwbep!g9k-59fV;ie8nv8#;&o2LN z(BSf-zUk$}F@c};Q^+4Y|I$n8wPWo}J)HmB<(}Jk-W}HO^}SV|7Pw(+@lLaA)A<~Z z@h{B1^dJKT_bcI|zL*#j`9xPs9twD=eM>8!yH=Oa-0T)@IM1`)?U6aQO4wqH06nte zjCmI3Tv3y87p`knlz~jZrT*53l2$C+tn;QUi@2qYqhS_3*PLNI@#xcfa(u^ty4PTk z?d+!5>R8{Heek^UYU5er$b$b2WRf-Orapw?y<(mA8})EJQS*@wgs;|&Wj6SqQFpM{ z&uLY^=H79=sj2k7DdIIxN?pK{R>qo{z%_oj)(gF2h znI?N@7-d8^0yb<_ZN(f;f+noe+_S5z6=J_JHpS{j_Qb z$lj@`fM=wd)#31_gj&@Yq%tZ*IZl`yj8`la$-TD9g_0-J5qZ91EsBOB1QT(uwPy%&zpZm)~<@3S}99aZ*UwE zk4OB55lSf9JS8*tqQ+m@zcN+zTlq10n)19VE%vO!q`I*Lwwdg_Tx67iIy+B9x?O*5 z5+F@fEpq4=;-FK*2t}tgXAJH+!Rju9SNf2OXedf#DkBXCVm#!ZjOx(ivdiXTzkSl( zmMb23$t}~e@!!Pn6>Pf~wiC1CfAi4S<5w*%9Ozn6JGAObM`Qinbf31)#yN?&mPcsn zQb}_q_D!gh)tS)dTVcVFky+tZ7rK-4q2_SlqivV7vDIa`uKo3t0!GKfhPv#v?M_Xl zX^yQcF_p`+n%i*HXr|SRuz9hI`7YVp2W4JHeLQoq`8u=8d6C79FSC8a70|9Urn906 zm!ICQ#w^H!TaRF$ta#euhToVFX{{p4qe$kDWL3yy({}1fZ@Os)qiRODsgM1{soQu5 z{51yV~pL*yMbp6 z_3S+ha`b1o6Q@NRG6bhb4Qg))|99#YMzr?Pr>t7q{EZQsUTe){RwQ0DZ(yln15D%C zSC;S$LqKvsq@K>Rn|DliRZuZyjXoX`j%2B~A^-bl<-cp`XLn>>Y#z;#r(U$+0MYV5 z(={%B=5ybW4EJP$cn zn@1yE8T%|O;hf~drYFM9@nekzqSDA%{ULF1aDxsf!TJ27jg?ioFV~Qj$hcSPck&~4 z@1@tlo?(PlxzZCIg8`qPjszF3lp9xJ9~xMH`9=SIIH6o}q?Z6i5~ zs~wNp`kzI$?l0)q1?XlJKhaJVOUt9xzW_|tQq@;dXl=amQ~i9@IHg^YsPUF!T3Qtb zC(nwi!d;i`pZ^%YN8aFI09Hf$n)&%%&C%+&MYk=zLQ%Rqz2AFg{g*{X z)AHu<1w6%c5M>^M8x2A9qsbHC^|OiaK$J`0tor$w#XS!S*|?kCio`5}vFny!DIVXs z&OWQV-`0y)vkmH|Su0-3rneqh24iQnEnobeI0L3xXn>a4W*f5hEPBCQdVViS_0+G}%&wtqnk8omzd^mTT}fQXYYU$I1wm0xqsq|>x%=poaj!^Z#yR45s1{gH z^{gIbpQnG#cH$9%4-v7#Wt_Dx3l;wepDtRCi$Hwn8;1XgUSg6EkKtJIDsmUmku#D? zCNCmfrZ>?hLp+&K#-b_-tCKZ6^CmZ#qYtkW%;Y_CStNHCZ(YDaiwWTFPgpYvZ)nG_ zrkJG);%1tR1tTA0DDYL(+W;E8kui>4SD6VkaAPwLuD{ViDOy+xQ=AuvXKMBv`LIj8CZwXpqKgN%ALxgKNXv<#9EL&;w zQjAGk08~o8hJ0qI6?H1mv(73%)7^y<6HGv`Hoo^s?rPq871V!{e?* zmuV8oudx@@zY!ILO!bMzX{2@P`NhRlXSFt2LvK|t4b(6WYF0U25T?tYb)!K0EEf!= z5VQG;2moJ=(&XH2ir4CCNO7_QWVh+M<;!ujL@{i?)I)C+pP|4Le$;f%v> z$zhqab4KgV-nqKSHdRldBm)NPdIfAleCS+8@I)&+P>^2ioA#tyEuqdjHUA=sX~|Cr zqU4xg`jTLKwAkXFT9KKMV_61 zt92`8^t=g`n{YGxwb?iDx8@w1v5T;MPLa13Z|KOg6Z$_>wV>l2M`=5tBedJ;FA*fs zH)a&tzzhR;xIk07~Xf%gUxmA9NS5H5hZWgQrE``R6PjGe(AFYCkN6o%TI)gZ9 zJw+)&W7RjP|KP%fDfCjpI=Y;3o?L*w4v?vPT9&dy8D(V_t~;@%DT zspUK`zC$opa9)_>d`a#hE9_Z@eMP%sJcYl(C{ef*FEUSZ*O51{QYop_F7^Q8Je|!A zZoJJT@fMc2vA6=Clz7fj;qf3Vzfru&dAGDzzN<@#9^kyzpTu6|UY5 zb)#Q8uvqKj@Ld9ytu#Bq9+(tLIx^pQoBs{H$(Tlaf_-FIhS^8(GE4?-CWY(!tEW;@ z^zpg>(1LZp<2dx!`ow7!41dEP`+P85hu-RTmV*$fov*l`QHwjSP~KxMv=5@L;2o?d zTdot`EcYv`$nF-e94E>RbMMSWw0iT2DZbP#|M{zRtDw{SZ%L!!@;MOp3FL!W4@l=R z4t+wzVVqa5f74mQnC@5Q#l#O?k29Z=Rb6c{r4&uK^JFY}N4wN+gGLQmw6swi3VX7c z4%|Y1OOf}hgAy;MSYJf}Zn6e(j4FKPy9*X)~Kk4deTGj>Av zXdo2y$pB@IgZj#@uzn&4ocZi8Xb!oN^BXrC;l&Ffb~cvq8I;Av%Y+W}j^w{$7hq#h zxNHpP$S{-omc*xXB)t=1)bSX*(P2_o=1c5d)*F^D0YcozhLB#u`Z?>V$ohA@DfEv; zc>Yd+m2^$GixU+XBQf(n4{K0GOP99~r^evJHJvmwF;$#L|3roX`OIc2h>!prrX!)T z>{MoJ-BQkR7Pc^n$K(Ksdju5T;ec$htFU;OzhaKu$NZi2l6q4$k0PQ&g=pF&#yf_B zJ|CdrI+;l@iA$iMQ;jb`XnE#z?yH5M?ZzM?@-9Nf*e`%4(z^<%^QhET^qf&Z}r; zv?=dp6M)gGyqPT40kzANOYAjTkbN}OeV8Caf=G(<)}H_r1aCc@@S6Ba?*K0(@6)Yr z9H0U^T-hRefG#`p2xFVBF6IUhq5I-}4wzvW@~SDt=&2nP0S7y-y@#R4kFg%Y+tht5 zU!hpiDobMhV@jjBqvRU(i`hN>Hyvww6YapznKw_;GXgArb{pi)DA?=~yn0M!-+3Aq zm*2Y>^OE4$qi6#WcXTsr?~|*#tVNe77ds!PKA?^1ycD^Jwyo=-=SJE>^KUyrnvEP> zRKjUOuU@c`qQaWz2cRe6Q|4{~wG*!O!)m6ITIMtsjw2tOy(Pt&vVTr=xQuddcHV>l zYL{u~U2SsWz5)9y?(wY98T?251VkG>j{h9ph_$7y<51x3LL;%Eh9R<2VEH)lS$bmv zO)3S{{!5!qPYoJ6Sa#sjo^rW%AY{ef=UFFD1WPi zgh}+CyqO{e5FB45nZf?#=PrB6`{HyBVcfK-;=AG#j>U?0*=d_n-iam`_GbRu@&xWU;qLcoVc@JhEy-j|t&BQpOh^!+5-U@Z` z0Txa1GrfX+L^(I=0yj{#eR3=Jp_XPRNG1?<+81mtd9rpR`6p$AR*ZT?^Vimc_R!n4 z$u+AP54ABx8EjIuO$<2~Z8cSYmUWyz> z9dGt-c}x?S`m65HkC-wG${Bx5W0D^M?xvE6THvO6y~jn?7&G2Z;D5kx>y2aJiMc&l z_%Ec3-3_o$l$l-PraM$+=bnn&w1Lj3ymI=Ljz@_n7+X3UL!UE;by?k=n0t)2&7RD5 zaUbFR<|oitggJ8`;ii!$_b-Bmk&$z>jgu$~W_OfNq3)aYCPzy?_ zkXfR!a~TWt_w5Ab3DGlfvoJ=S1FdB46!#+_gm%dYbS}JABEh9K1xdY#e&s0X8A@>0 zJ=rn3?@Xp534ly(R3~yyIJoHsWTWJ-1alElHs!D&)H=o_;YMr$ewe5XzX^Ir^o|tW za7etIvaBpX!lq|tZjy4D*JqT;9^=my^%QxnwdBGqnIJU3;qS77u!Lw9D!#46mnaZbH0k2s@`%xic<^L z^L|KDQsxW1q%DyKAx!psqFMA=6>1*~9)-~?O{OO?<ZJ zRr+wY%YnRS+>MH9Nl$qjm5$+;_y<%f&mzG=EzVBhKWvM*SQhoEawJ=!{+XM}p=cTti@7!0QDHNA({vjq`0^eY%IpLdjyBm;PHv)S8=s?* z45G0ee4fcN=GR99eB;Q{xh$mNUACB=ZX6#kXMZ#94k_j^%;(41)XWS!>;xu)TGRz4 zR@2Cx8Y*MRYCW49_P}HI`L2e2W&bJ1nI7awdd1I_H2mUhY4}=S2xhx7LC7hNav11TfvK4q*%VOC# z;=Sq-atNid;Gz6B?RCmM#S&&`uCU!%_c7>HU)=Q}t2n{xWj?E*)@fHmy zkELdcgp{3Fjpz%F37sxxFuWU&i$5?2E8r42OOZ#FhH{#cdZgcZmEjBJ(SiX_o$`{T z%H9vG5lpz8vKpuBF5aEh?B6eQd$s>!y9$YM_X zLve=WT<8=@rR<>lBbl#iD3gWW#f=qb172zykz}`Gz@Q-AvoqJKf+k&{|_H!zCPxNpi?{KWhHK9dA1u+o~$$N z5nwZ0Z@p2^;`my7%8NNGt<=0=ZlKkaVk-mK9uT#Nd%WYVZywj!*)a;ipP^~7w*r3% zpv-bYq_QUWHMX8&9qip#JHRgLtt+*#clS=qA#nVAkx9!q#6EN64i0@5Y05|Tvd$+X zT6vDDZ$o3TTDuiINQ%|spcd49tuF!vzNmSQ_N^b#9KgOPGitUI266^9XUJ<4gS1Vw z6=5y9+05?~W|@|8Y#wjLEZeEQt9k%_pRiJGMHL_^Y9eL_h^(&1yVpU~1w=tSegsA4)5ZC1#EQ<>?CGwiLg$CME6>EMYfk^mTYS35%T$W9=AlU2~k z=%X?c<2YnMW(J}gf66wn+N%;|S8O_LaJeT}nEp#nZ5HwclEG`}At&Wo0 zo^F}#B1TD$vdUt3WZcH2+H3rJtnM4L&>~Fi)VPt7D!S9 zY}HR?y>1=KX{w?42RtCg3QG_v;+3M|AYbuo(V04mBv9O5wn@??LE7po^+-!omPl^P z;L%8Fk^GL|W$9CuWXvkLt9IxN1*;P!$bDfL5nrxqL5lXvzts$gzAFeNzs0eN*Exk^ zlCnGLr+B%FH~oP4t@@j9xTHisdDL_149!-1E6^K-VcJN@EFn!>&@^ATT}!F@E_|b% zQ=BdG*WSz0i>h^?#7q%S9~JRQG|%v6YJ_O1SvhiyxKmBH6Og9_W#)C@bporoyoH3>e`r2n=NqPC$gl_cl{jK+wLXQwtbM1uPU4m+bSjD|_ph#w zwm#*VE|2jgdYi77#q^)7U&R%>jx_!h4XsEqqjW@K4zyl3lJvIusCE@)RSn-(qYqc= zuN}uo%GsoO28>T$r+L607iC*ta?AWi>e2;KV_Nj@q0hi7Bea?YG2KrQDrPN%A`3F>@@Y0inqjoz+vSIo2%5Zl!?tT^y^ z^*DB8gH?5wqpSq0I=Nemrl?wY-!hM@G6m%cw^eDvlMzo;3i0BpGIhO7Fbbj>_FqL; zxTngAU(*Uxo)yH`4JzjegUWlAc+uE`I%T|gOoo$kq9izeigJvU6&|5Xl9&1HR329? zAF)dLNo~uFmf0?7w&IH9Qu6}EN-3e%-==NlTKY;+D%+U1OfgwbOly&URaC{&<@c51 zP`Uh-nlt&2B3}2##arQ{^0yP(1oG1=OXCCiV%5EByqu%XFG-T;tJmkevh7}yQ|8GY zX#UPvFWaq43|=E!YFIPzsceDim@`QxPz>$$K+9zB42K(r%MKY{R?1`@#_2^>vO1$E z+fg>!_#Eim4J}t?=^cyeie*k6J>}=5 zhdWjlER=FOK4;cSr*!rvyqBEp`a1oV1mE+-A1nUW^UQs=m~MRHa7Z#BQ`!k|NOyHx z9>}??4tlcwVrMG?TydpCf_hqbs(lkync3a$PKZm`Z=Fvb8!=$LPIH(#v7>?6Fp}QY z$Qz1>fqw1mglwEn`u3)-!Dgv}Ww%U1H#_|=wr>Zb5@7Bho3 z#l`x8wQ9tb4kzIRyA2Ry`$P1D=Cbzj*n73H)*SrPvTBQ#_#z)>zE1v`UTFr?{Nomw z$1rGNYs`7T5AVMg3pad3k>!f`!q8Yuvepo48?Re*q;b^;%!kQWOJ12hsPpp1m}qon znyojPpjfhT6N?;bGTJIDO#WyN=09=SZ#pMK*a@In=0VDx29(*22C1qyRnwmqn~kfO zjGUcDSC(7Kal?G}?wKiuW^P{y&0yglne@)MPxR2G(C}F?U?;TvGNG95bw`a`fr}Lz zj4tfJ!j}djhnDTFzr;P09Hl4l3u6}Q0|oDc?(56N<0rxmO)|~!sk&E6mYvW{G5T`r zYJ&|j9-{20{vlsl5UZ~dv}ZQ!4hpv<5_HX?A5m9zt`c?NWZe|meou++rK)Gx9L*!e z&~CADt^T`6Qq9(*#YLrH-5H5rUWYDM@;QB@cDwXJ{61}keAx65%}<5gAE9}r9y?*P z7OhKjN>PuM5AD?If9bx-A5@;u;S`MG4caS8&zuL^66N01ADUe%U7UlaSiLZ!RsB?3 z=*v~_GJwY~R?jnLJM2;tWDe6(6+T-QlwPsyP;W^DW-SMp78n_;v;-)@gNSW!P z&NSMDa#Z`wZ>RdG7}ot`-IUw)|NeWJ?e@QCvxcX|TdKalrbIVF*g*Gb;InxwcndC{cGB&H5=zNLH{Wao;?x~ zRj{q^0&0F%Z0}lZKoYCx0RBLXYtJ~6IViEGh|2V;?yF{O9Conh34eo~&^T?52fVzR zGV2%;D1Fi=Lodu%^tNEHWsd5}CHN<{bz?}XXy>k@RIk8h_#L)CtUAM_e36)*rsPHJ~PAPpyKzipEpvEJ= z+r*jc^tyvC8MG7X487m*Wfk{&P7y~GgS!`z26IPs87XtpmUT*L{CJO!0R|)TT1ONx z+uy$f&T*em*yYKWI9aSTxsRPt8{8u!v&!Y&ZIs_d!@AOG?b$m!V;Iq?kd8>^o!G1G zc-E?j71mpv)qcZm)f7j&H+OW32AphBSn?q+YxTPBF#4I&-(4QepaOcQBcRL*YX8f6 zm3+8;IEOSd(^|qC6F$mvkpE&D(ehl>IPSjnt4!{gYV46+vfEHq()p2{Q6lJg$3f?v zYJbP=&tO=;^DZO}w~i15#IP;3!v4@W^BFOIir)N4<~4SvCE{Ntb3OXM`B2f-aYrz& z=t}!z!S~!G>uceMwCk4NqVEZC%LIvE)G;$$st9J7?#f%ecbM*}X1gvhh5oxHS1bM_ zl)Y=71$uGcCEqoTVKzbE~OLmmU7uC^t;?M;dOJE_qEc z>@`0eY1DbDkK22|elM0-8x5Co_gc_KVurh!X_}eXZ{nLvW4;-MmN#L+h6NoEU!wkc z=Yk3U=zC1tT>3RFs-ZYoJ7K|Q#LdRGo=K>^tsaJ1n5Avsq*R;)BU^EtBjPaQYL#`RE<(FCh1D zo|?UIBf%uRj+#h1K)-=mMfr_>gHNYVZlMz&GPBG4DLt&R)SYw@hZNib^zyQY-4*+a zAhSNV_>)nVliQ^FMnep7)cRJOGBqG&BAsO_6Ln52hK z7B7PRMY&5R)jz3Es8hLKtzY)X`LV1qIh0F$F(RvC7dLDRQmgECB9YOceoJ?RqqB37hK|Y^VAREqBJXr`qjl)ZtFNt?xWuO7T)k!ymP44py+wil0mS;l$&T7xp!e8}Wi$);CeQPJ)Lit$Pb{l4~u#SyKEFj#{R&bYp5^CedtlB8B_pQ(R7c_C-#@@WS*gHP662S=+^_!@>2kV<5Pt@e{XLI@>JV* zQw{nb*hd8qdl}`*eT6@QxlSn|ZpUv!Y$l%|;T!eT-_+^FDh7gnJQ>ak0EB^#Tn77! zlZy;0T-+T9TZb+%R3V397szI#(FAu+9JZfSO4^3MLNUSfNz-T>8nP)e#+@QC?Gx}L zsg=2elN^x2Ue3#M3YHuf<2t^y4UqbD*)RyDMzRd)PMg9)q2=_WL|5!PrVzS^0A)?C zyFt3o-d`9%#dE=lbqpW=M1L7@U2w!HTU;S^vmR;v&3vYbYrDtt6dizFVplL1Az2(E zo`P}bwnMUU3v5+Tg~U|;@%%UBD}vhzt+X!DSicKQcQM+jMKoGwH5;1e38GbQpzVS| z!3M}ZAwXXZUo3LPwxilaYr$g7Z*gJu75q~1vpfgPPrnj@NK0nlCc~;B{8yH zMrNZzE|!098I-Ge{cVx*cItB2F!^fq0>l~lnbxIfzWhU#7cM{%m~+%tgAoynBYjj{ zo{A;kQH9$_Qy*FXNOwBhQv&%!e8ELFm?BSf;(nxW^V=#k0XheP za<<^NI+e6R9%vCle)5peGsIr7I9M%rBhn zsK=~4%6RMyHW$^5=W@2UEF^B>KC5gd|KNw^MANDT@R?5-r-l2bidkV|yn{j-B5Tof zw+VPF#NDtsK8CdgVXKEs3PGa;j}f-a9^vw)rv!Hqs(dF2A&$vfPgy7aHA78zmVER1 z%-AC9vX6zz0wPP-3eAuj>22mJc#+J55P>`*tB3Vr5@qun-Emsk>(U>DtMbask>nBb zBhl}u6BO#n?`YYop?8!5+;UoTmM;Qt(OBubper@?xGF@f#s~5bYQ6edeJggj`dCRb zzE-_6T|h*sZ%0wc6E(bv-zn+3RJ#qbyk?fgo67@%&1C9HNVVxOh6=l4nhO4btTGB~ zXQAgCJ{2v&-Y^(bPvZ9)*pVvRw1*-&WQ&7lnfE z>6{O8KuqdbUA+ob)_$j8I|k97pL`Vivpqf>hSPP_yZ|476f$oARWTMA*HXR#~4AYJ=-`NL7adoL#b#uWDzhSp&mdasUKO*m_qWhdzO zQ~R1wvJTqe*27!{{R1SQa*{a}u0Z_(%21bEQ(0@VzbhYcQi+K<13V8(RV+(Dp&O^F zB(8wa=56i~9nkJ0t!ld^*+TJyZD*gL&PA>vN7F}OU`R28iw|hN4!95pD^{>}P)ym~ zoPK($Ed%s1FkwoMD2~0)wjMhqVXBYfZ=n#PYs6m67M3d+jQ1vXQT`HZ5F=?T$sJ7^ z27-E^+;&jTaLN+0gMi!_P25)YHlK}xN?x$76>c&I@mpl8HZUCG$a945N#+A+sK^aKtfH?87{%_hlYTu*KdF@^z?HbTfZA<|^Yp zU5a-Ha&hIvF)Sl=IN6bXr~VM-9XF!n4ebR_l@ZVQ%)b-ez={^)yhm_2Vv?O8?M1M- zGq?_@LEb2;0UOJEg(2Y!`G?ywh;@P`brZ;yLV0mBwL-*58=#kq3De&(TO|u7kyshB zq5Ybp$ox4@;RfImCm~7lm`kJ7v67cj_E@NMs|u zNfGB&&Gc0b%_yN9a$U6-7!C)k))1E>U#jLKmSTKVT`ldnOjSpfiIA>Zl>e19T{W02 zCqGjshM%UEXi_}#>3Hp1dn@?7HmUIt!xiReEW__XSPj=;r%<+tzNSN%Yx>_6gSbKc zmb^T|D!nCfkd&l98|FZsV)SqyPjfbWw-dM^FsI9(`Wf=6vl_b!F6vN2jv%|+k2IV_ z+v=5--N2HqmYiCAkhLWKAOU0D5WJdT?QkC}qdeCSMRBkzL5un)Qk)@fv!9_a!DO?J zg8w2EeLL&NqOiRuN<%OKy}?=i*o{3OW8dQz^ga)?Vqf$Iy4I4v{jchiZO5s|1|~PH zvFQv9Z+%05&-wzX#I0vPhO?lpoSUfi4P$st*b}8O{3gP;41?eeWkU2lu?xM)yI8gz zuw^bN&P&$G!1SWFQ{2N0A*`KN!Q76dVuOJPm`QEV*dDmSx}6*(5h(u6y+nymr`ReX zJc~LZ6fnlB#=_iPZ zWE?XY!KAtYJuM4p8(Dv^5-v(On`RDiJ#%>cKYQUy3I~HWLTLOB)@@JV{o03)N3@H`kALP&y~kiBT<+gsx%k zk>7M*!7fx`?F5zv<)~OkW@5q=htNXX+WR*62_Z`9RG&lqsEjLJLGDwsv&Ga%<+XSk ztwW^^nL?kV{yuIepwYycR_I~Wg){A_jpC0AbRz-_--I1b?+aQLMLi6>;!>@d#dRNvzo^P z`4i*#kq{Vy$iD@rwX_R*Q0-L%LJ4+mLA9uda3E!x_#^ph#BJ$ax}R69@;(r2BYd~z zE$(G|x9y|P=Zu0e@r$_*w(>RaxzEs7n`3xaanedA{~<9TKSDTxd?$H?h(((neoRuq zGNJC>~A;Wk`vM7%=kz*}Lrp%bW7NfyAz%9V>gq>0m6# zxx;-5SQ8%b8`#R&PyecDtGs&z44 z#%0wuGH&zQOGW_k{0&)QtPa7mxEA(zQEG4@S1f)t?x=twAKGWpkc3x)D%?EM2ElvS z7m8Rozxe?5t*E+ML~js}E6QRl7hlgHGe1iX&Rok%mfj1T&xXn8x?Sc0Dzd#5lmIUl$p9-}&6vn!D4z$o05jB&UB`0P zYKNXp5(fKQI|((2Kce~5Rz>X6ylt349;ta%{*yw~{Kz{`U7?Lib*CNI_C`0;SLo*W z1<`LA{)`T1$7t@@Z6J^_x6L0B=Wz?o-JnVW!JJJNr^SRN}faw zF(am@Q@2^3PF+sTX?Ga$4)~~s+6g!m#<7PBdxl-pZHr08w{(Zqun9}MHk1?-FLWWZ z14!q(REaI*?rwMlhUqN(VmwY8!{xU3Qvv5s&pD_p`PTjQFg%W37oS#mbBXk48-)s`0WzxO1qhHiv@cGycEz<>g_?qV{0V=Os}9s=>^v5v0`Pk zZC(980!789;`*_}IShgTx`Hbv;hI+SP?YSdMf?(4aG_O@!5ESLR9Fl+&rB0Z*pqD$ zgMM74+je=OFxyTb4FGwR2y_z*O-@v87S@z6&9_t1NTt0_=hufLAh4YYi zHZ_6!Ne~gekv~ni%I}asC|);aj<{QHv=i`-G$T(6UrE2quWuzW(*<$0i-2n3h|*K6 zR^juUH*ApTMlzoR7Qc-e#%+^U`+D+P<+4$?h4cT_E0) zDy@FXI4k{A;>ElrGi2ujSLAVtS6D|BVfFa(P`@Zq9z7Qt;^IiA?hCqzCes-Q}_ztgl>k95;$%+?qcSFRaCnT z$V3v>2m?Eij~espA5tTXNfk3_(Z<9A41JQZK7BU*o^c>9grPL`g{l~*Ec+(CWURC% zIO}-L|79J*6N%!^1+D(1fs_dy&&v2z$BsjJo2chHF4`jfc%A5(7}~S0hrzRH zXS$zx>FJ>stJ67Qmi(#R24aW&Ve=dmOfeRuZmU(~K;#W31p$t%*rAw>s?R^J_=U|( zOH#HI3S$TmuTkN#FADS@Ibnqfd3;kwJk$Dls<+ffL_T85$EfZWWDHya*FH~ z_Ca2k97o`!`q`8qBcf+1CsLpIb*nxw9*usZo5}uUC*WGdm58d^1k#(6o=Wa9QPbtp$1KYKJIyWRtttn&U-^p*M)4L3R;9P|D};yQIQ+lj zkzu)lqmnh=ZNfEjq)VnmsYVGjybeAe& zK&pOp(PZG7zA9@w3##9ql*c-6U`G3}gN(-lRLeEFLpx9Alg8w^e1N z_J=Xm0ArGOofZPTatYBr6hznw5RE(s^SybBoP)bjb4k9LaJ2N4`~hiQ-g?Dk%F;BN zf?;Ree4wtU7+W<-wb7*Jl?};xlLMzuv zl9~FV0}>hV+*Wby1iLkPo-~qkDdxO%8IKcmOhy%ez5bD37kfJUDN_`1JE27*9AsUq z3ln9t-jVXY{2G+tkZ^oivamq-GQV0lM|3EoLikL)HX&A&CD}V2BbqIn=noS+E8->; zi}$JlN4C@=8+t!9+~Pfx-mRL?C&?%!LjHUCsN7ybzI=6Bmq4q)#&HFMN@&Cq;YigX zUzIRP%NSoH{H#xL7%zP!8)LVj?gMwP`fbH#Ub?2C$i&;FIiJ0n_d^R#S;Y_0xzAMd zOZ1Up^Y}H!QB%|TVP?47MM2Adu~l|L?KIASX=T}KZlvj2!DVj0IV|%#_k?+4(l_pP ziz((b_iB4s$T{w^&Y3=u+;JV7#@N(xwJ+_zwclGi&Ffkc>oMkH(5*_a=_SNeWHy=L zso85ysmM>s?IwTBj+lL>QvB>7fw_{z@%+*rK}9$nw?1ZXv=dr6Srw($t%wZ|I^hknorWnRl0$^dOzrQZ$T zS+hW|L_R1R(AQx+3r6YvahaJ#y6*&fVzquaX>HUJeL2-7fNMyj%O`v?Z2)SWoQ-|L zp_O@Kr1m1lyE;O<1~;;_Tx-Dp&0DL*5?`l(*Orl=BzR~M)MwMV+Kr5n{xDr1(BS^Z z@DC@q2?)VE2M#gEh{C?j(l)a$8x(l)63>5{nX>fOxJ$a2kC zRle5*Lf zIgnhc*vE67!BgDkvqGOK{e+XJJXP)$H;f&t!pUtOImNdB&h@HH@&KND$zoeQkI{J- zmib8Drmxxf7$Dkd4#4#srN*wsD_?-Ewr z9!u<=I=ll(334-Zou(H$IG7E>NV_GK{PuTnpOP`{t;poOL)JkwD8p*a#Y&R0El2R{ zVwPBXiBCc^EW0RaQ=F}LX)9chcB}?Q*=lEai?7-V6`|HZbVSj1%WSNBF2OPx?~tBl z?jyWU_+kzuy^Y42{!qS#j5bHnr+GiNlrSl-j#eTkz^1Wt@xS>{w%3fn*AzCH?h~DJ z#+z`Y1F3I}?qO@Mq-?s!JBzGQfF{v=>lU7aU9YlUt=6^>u68IO)=l*9aaBF zDDF2D((mL=(LZHoWo*~C18b8^`gr!3nR&X~9AWr&-BI2c{~FyJ{vEF{{h)|Ea+z+W zJoNvLX5mTQIriKfs7}k-pYEp1=e|#Pp#8uPh_Pyy3y7gVv_|10U!``X_>f1gu3q+d zM2%*Z%4UPicG>Uwf3#YGDQk|lM5s)y(z=SQ@&20q;>A%%G=StrNU)|%hMC%}DOL2l zr)ZRF)$l{Af0U2@FClNdhARD;u~ZW#d!AgOeklJQ>!UU+y&|`(6I4Kuv+9|K;Zvcdg@-*t$zuE zou;jIQtUBhZ6hAPU*Oi#N<2v)1+F1)!#syJQP+XaA}Z**>S^e5CLy;A7tczFeMlV7 z88!7Sc_~-)FF^z~5UL%hx$?NW6xwRultwPYmzveGjVVMIf&T*cT5DiI?AXfv$b612 z2Zt%-o}T#wAH|bVVaJROm_H(d&lkK(T=K6CG6(hN(oAuT=yBy}F%I}p^HjW; zSl>Vse}T_y{wQf|dIfzE7Bqk2kEB)g=65A#l@-{117pJHe367SJ zRFN3Js~)P7@sc{BY7F#I(|F~hhVoXt@>r>7+e+oujFGTb<+iAk2yf-fN#W?(>Y@FG zTvI4B&EV}Vtu`9z6Dq?EFR<%sjv9&~(G4*Dvbwd+8};7BM$k>&qBJ?AOjkcW7}~6( zdW}F;>xR|YRCu%Hk0m2z z0QARvJz_NIlLa?{gjAZ~_I?nT=7-OXV(lurG-nIBv`jHO6Xj7gs_#nc-daPipc>iG z-qTtDZ%*sxCx^Bi=&lb7ZyDLmA0LZeWEjdjFwI45t2v;IMWr=p_>a*8Ei~E{EWT|5 zW(__7b{eE32BJ!;{vp?5Pvyo^uMj=tV zp+BUNZR-&2oSD$?=nOIs(Tp8HE<+t40SSYf7}PUNRx6%fMHtmq&e#pp!V-b_hF{3htiIBV z=tmrGs-fP@lqVteq)3i5uQ<8F-9uXqincCY)&t zhMP$>u%nIFDbr92AtA+|ZWP`MQu-uyyv6Ll3l zj2VbgLzD0;akYq8*aSitI;}xPiopexLMdS4#Eeqf4)Uw0mrN|}{3H|mC)4X+f*2uR z2we|d$$Jamf=H%Ak-JdWac<}<*jmUE>~H+#`V2w_DXS!l)I-^vUO@Ry_nQ8VE@xUN z9tS?KI{qa{U8J4hB8-^p09k~y>4w9d2xbfx2_xy-LeWPkowW~fQ8Z=o2*Of^BrS&Q z36Q6|Q1xu)L|3K&qF{o9O?}09sL<-JJQ5t)pTJd1Fs7& z<5sX~seZ&@&XY(#vYE&5YNMn1f9yRegf!*?LBP0{NvufXeb8ri3gRbZ6DPmr51h_z zuKI==%R>}2W0vzd$$Rnc0$un8Vw`Zd$9d`^ajBiaZ>U=%aAJ&a+$NCXU$mSMI>IM{ z=Lu&uM?tBgjLIB@hv;tJPSi4SXHp{OsiZNi4yThgyI&!{l4sco+@zW=*>jpl{d~Ct zZhX^5`4nhPYnMEyk=F*uDdkIGQ{^jj6A;VgUlR_awken)577IR@bSY*Xw}f$o*i3t zNc)`O$K2l&>WrY{;uijjR~gve^Q-b_SWoqJDe%k_qX-Ssu8%}rgZB2(*%&Tkp< z;5D76W|-S(U5ET$wQcFbkGep^{-=T^`Ak?l(i*;)2Itz zyK3Ijvr%JE5_8}BO z15m?=cM)S+FOXGer|MPIX6&7USM++qKPh=kA$d_mE$bJ}=<$ai$hiD3K}rz(Mma-a z%%$iv@Fzqfb~Z|i5a5NFcP#)BhbLDt$YkQh{OeQ;r8K#aK0w^lN#tLZ|BtQn0BR!o{=H%s#4aj=f{48U z0*a`JB8v3hdxwMs5+H3i+tPaoz4s0ZNEfBp0Tl(rirBFCUS8f0|MPxt-b;o%ftmeq zH+##u=brmLTciE)Uo0DA(+R&#%<-4ve{75(&LS^c5=6!&&72}6{`75(am2p~m>2sr zLM12^&-M=rIYrQLs|ue*Jayttq)o!XLyG7W(unci*ulgweGvaGS#5Du!ez?SDFW%G z*C*mo>}Owk{Kv3$0rN>~eA9y{IkJ#dFigg5Fo}aNij~zZtjB9_! ze>z6DatXA=1p922*;#vb?hI*T1+F88s&kgle}K#TpL6c9kDR^TrU;J^$$5*ueGt(Cvmn^95$wT0wcj3v=6uhp>O&Wb_lqx|9nc zhh1FdZ++U`zRGo7AWvIa(upnJi1fvweqW-r*0j%mru5}jyuXXwZq4+79vN@;Q<4W- zIdvDws<<=GbK-jN-EOfFX>om?FZ|CDp8J|BvjQy*C^+#ZffGD&h(I<9H!|Lkv?(e| zzmifBTep}1_{ROm8U#xdb))5;^YJrc9(w->pMl%q_s7o;uLvA*1qpLPI&9jB<>6wB zzyxv>dAB|}GWPg}(xmn{<3&QcJ@KDUjARx;^u(edEA9)zk|NSrr4WCyAq1yj_OsD&k;u#worpf|9s~0wxNNcTcb(grs2DTJ0h1w z+IYJuH3RIN#$pCyeUJI!TJZh{j>VlO#2FeA%j0S5PbXB7%G5t3rzg#t`W^^gL4;_B z$m$Rv=6#S(m<*Te`7k0i&fHNDl}C7DZ4i?ikL@?dm5_`K%;S{G%DPvHyrgt>^Tf|o z^?&Ul(7&4ih4%&iB{l~f4>=tF*<)Q;C~1fNLEdl$nn_=j%}L+q0hu#oZIg%xxh2ssfHLIEZ$ z4~CII;vOWzhPh^2XJiGmL_0KUAAEUU7E$pZ4NT=26Up^-M|y)+dUqpzUg!J_kbb8# z0b$7FU0c<|b+P(2vhW-1XLDY}=VSj_B`J4;*zOm2^#QBA zHuJ(fdVKgiU5AT)rQ8qJ^#K*!kpsCw0o+f!p9S~wf;asM*~f2QtrL1ps5$Fd;%wyK ztQV?qBgjns4tlgpQ{1O}PnZ0*U+D8!BCz`DcUN-2{AECp#NB9DAXz$ZS&qiD z5MRmWnFh%(;NgEy^I`l7XF}BM_}6aDLB|q$y_S03CpG)pIQo&B0;EUpCVmg@GVe@I z3j4k5GWBrO23-mLWUSu;4ur)0Yf*8Uaq~R2BSHxJ-roW)6RrFnd(h*P0;?QWBs7Eo zR*mGf;Yas1CpJgv?!2B%kNKd(r#9n9)LKCzv3;rzxFfNee&@qZ;SvJ(`X9g#2DiCu z5X{1~>_&+vBX~#B6D*_Nn01go$A<10Pprq^UMESeBN9}q;j=R8acqan-k2PG#EGLgAHo{T<8e!f{d;N&LkZ$--{M8&0&H_6EX)Ef+?N+VfM^;TMXp6I>sLnhVXIaiigsbA%^XYWN35sn5b(q2 z8pqdj+;5a~$mxE-eU5>RNl+oj#F7|d%W>aZ9a_RUy=z}sA(x`Z3#;IVEmuaW3M^Ir zQs2UJrU?Fvy#vGr9_qd>;=PXR{4GTfjsXFaqI!$gAe^Ym^kcA4ti6L4vQ6TyTNKrIEg2nF9qu-xw`%F9#8t=W##-bxy3is<~{`p5FhHI%7aTy zztA3r^=~s|$RZzVjWe}lzRcT#=Ewcp?~Ztp(CN7)u#i;f{mmG$dj~<{JKQrK^$B%@K z!QqbkNG_qh$HK|{h|LF)#IY!@@vr0)F+aCBQXb*)n$Kt-2tVgcGG*lLQ-sh2{QBTW zet~hg(9iC%gpP=1_5<;1QCiko31Kn24tS9+;12A5muL~^zu7J6IuTg?meP=LW{xM& zm7F+52(F1CNBa46#!f_Qy1v4@$4uDa<9cveR`-Y|alZRa;>(DujGmL$$G_MVM6M;Z ztX4@1Rcf-`WGthlPZ0vgB0u3?d3}n;;+$Q!#%dC}Pjun3<2M{R7^jsW-&aYHkW~zE z@zWEz`d<^mlGm-wCJj*M&g!6<0r#f}0h;0Y32e`;kA#?hTCaTS2e`gS}Kyt&+s5CThQET>+ES4kS%%sIVwAX3g>v1ka# z;~$+i1N_PS7lYvu8vqFBI<^Jr2*z#J1a}GsEN_J_5WL!l3o{WOG<+TQTeMR2-Op9f~D4#Bf&au)-i&2$xa3s)SS z?m480P#Z@*a(H;)vG<8Fk+?%;NwjEmuX%E6Z2GQsl&knoy$ad^!lM;)8MX;pGkPHw zg*{aV9~R+uguk0A@m-XTeSLyz^lR%3(x%vx7SUu|JadmmB8gzTb4gNl{MPj~l%=Hk z%U96mC92Qx1BdA`Qv|P0d}3_6i)~yPexdDm!ksvY)!X>l#JL9s6O7{pCVY|~X~}jO znNL2YYo6Gf6uGoGd5FSLiDvu-NmB&RX|el>Pn_5|R>GnaoAK94R!8&*Gs(pLbBVhX zD|cUscTRe@bvPl6;VHMbZL4H!P!lf<*&rbP*)33%@`b#RiY^#3!JZ`VDMW(*g* zr%?*VxOH{(JAh`<61xV}+FOL%1Ku>iQ<4h2eKXF%MY*`bn`n*-uyBjkV&1sQD8oIDS+A zeX4Yf3w_5sVY4-SA1~_g?+B2Ww_hVNfxo~+HOicSW_y2>Qh80UJ_-;oTR9wUAg-Jr zLug|E>(^5RZztw;mn@gB&>VNjt_wQi6?gPBB=Pk;D1}}I*qQEtWx*ypW+Lpc?Yel( zG;-$B2u?wC&UAO)dg4FtqUR`J>}leh2FSf{+9m;C{3uowkPx`&fG_wnxXxrDvn9-K zy9#tN;Y5_Bou;e};ya{MdP7o=XHxND z6_$fE{fOp$&uOost{e5!FUCCE@}6-CufCQAnh*>Zl|$lqV44TIiCQ{Ea9x&I8t!Dj zBq=yj>zF~ZPSpEDjO3RwXU(c8UAQd6Vrpqz!sanr713WCR%)`@EF1#;$g*kbFo|I| zMR0Z_`Niz9^&xMI?K#RM|G~Q-97((x_h|2;q?1Id!P4aPgyox@C=${YEfs2WVvahG zv5oT2r#D4#%#Yts*khBOpiKmiG?AvqUq7&bbf2`^)SBE$j@#*wn3g!C&reE74$$16 ze3WXjK%KUl(KB`JRB3;Xu#wC?c7wPwk!|@WetweJ+%Nt`vcfn$;XI{sdm%|eeW-Vb zY(;lk#U(caajLbH{m_s9CfGj4ThL}&tHhbo`VN^9w$bHgdBjQlgxJ-c?k|k+5Ck`sn|bjc8X&&8H+GqTFu2JKGmn)c@Z9-;V^}=*vqy_qGB2Tp z3aj4NWA9SWyvlO*ZF1)C;wAUTv&0dXqIh*>Z~D zK!!Ser^w4fkpg*=@n*e0t zCF>`cGySbX!Y zXVkwW{mnYGN^;S<1Uf#+X4wlyD8+rQ8W<1!vmB-f)~gb)Cd3|sl6*<2=F^g1ldl@r zB(oF0?%YYyPd3~bNjXCiY1dO-Y1@}-(5x6`v#&8yp?~(z6v1k@GMAbBU>O;t@XZVo zms90Nif;HEY{(|=wucsV?FU<0yJwB4?$1g{ z0axrBx+!GP1DQ;{&Azh-4ge2Sn_ZIWEbX`4PwfBHYZ%_+|D zMxm~&+K~uBzkAogeFDfUVXvAX+*il2Q4r&QbIUtHZxEupM<@#2y+TW(77i#C54J|n zR_cI_Q$nT)$J}}6J$@cm=ZU@J_CMtX`93x6a_|5oz?j(;G|gvW^suHI$K$qs$LDTliz zV$0f(y!lZw>%BFJVmf-VYvi+w;-^N2G7`M#j*%pWNgCn3)k8? zjU|mV*yzL7jyBV7;jD~3vg8(TJKkvaCT?HCzqQE};fO2pCsgO)QgmL}r@e2`RS{Q> zo}!1M%C|QwbL5KkHL;~QycV6cEY5xLIreknp;;yD+{9B;*G`p|)zCns*ZwHzQ`FBr z2jO)w#fE!fFPy`+`EWtpY`r+dl+dAh8QmPuUSx2*4qw8Zcny)*gvwob+h3PT@YFsRqHuQCSd!ONcm+5n=u5cmr{oUR7mVmAFZ z!EAy`q?sAlQ!D7+yF}D`j0~mr+E(C>-fy^>MsOWE%fb%l27jx_W1{Tb$&W?6f(jdKih zcjj%)-&nA|aBb1*;uR%JO4Z9$%jZq`;QG6 z4QgGReeLb_ksD1zvYYr@$A{Ms{~EbDT6vo}=6Yx2oo{z9-Alh8@xb`u^oIlEnUBII zc1`?z-1kKC)aTiTXK$a^O~$;?e=+_t`<46ag|9EZVZJqe`}$qcd#?`*KlFa2eA@Bp z@#oAhj$h||?fRDVedqV5KeB&1{!;mM>bL)&xqs^adj9X$GkuYY?hMnJ_Oqg9v*(o0 zy)^HI>U^~=3vAS57m61(F21vL#)rL> z4fXoBH%;HXd5gT`bR^2=?NyRnC}XPlSNyUOR1@1*~{ zfDM5Mf;@xcLjVdO}0?3ow7}_H}zFozR@vlF&+M!?nkBM0G}<3GWI!8+I@x0eAtEXi(!|-E{9$Tz8cu? zH{dXM@Y;^+Yi`WF@p0(x&E8v;!{U+nQJ33BV~fXL-??x%?;iQS%>%6mZyuf*7e4Zy z(4YA5xZ?@-)bi<{XD!d;C$~*Ld6Dzd_0{}WXJ3=vYE4Mcdy@Pe>m~s`^U0R zuAirWuKVKqb>7$JZ{gpUeeeE(|GDbtxnJSG=lm}IbKuXdztR7B{)<)C&oG&3J1cZH zFeh_v)4aj?&()?cSgmfn(0LJIv1G}~r8k#-TcM?KXl0O6P^n0>Z_RUU)wMgOc@cYm?VcOxw5ho%;qNvfbn5x)5+HD)s86H7jei z>-6e3p4@tBXM<7Wo~He$Et{=d>{?ygyxRjiB0BMB$X)ca=sAA3yeG4_u&?rb^@XyF zg_p7~r(O|XW%V-#5(Xo#d0)4^v43dm%~iK%-Fi1XHqt#>ep@(3yyJ3r_q}EJKHML8 zQ1K8McX_1q=*>jyW8}$+r%RrWKFfX{FsU>7=taTHfLCi?jlPz@v3ooFZRb1E`z`Mu zeo%b0`S|D4sn6kGR(~1%ihSGi?e+JGz&-0(HqCR8g zO#NAVX4}t+mi^E(;O{eY77B0@N zVQxw8T#szeTCa27Lq1P@zxvM)SQEG-$RgM|Bs7#9#tzSjsElln>W{u3^AVbu`NNkuR;pXzH`E7!W z!eP;b_?_gpY`$D0MNhFS)jaK3x=V(CW>i)}HY10fE6GdGFDR%etS@RV?kwpky;yd& z{947$%8{zu)pu&{*50eTUw{APy;FA^?lj(R8aaKd`9{lN>y@^P?Y$jco$Y5%cbz(0 zeXgv#peL(W(I-C7xxlrtHrOLzwUS= zc^mxB=>5$1H$N18jQ@1#^PoQf0Ye@ zseHd#3jSp3-P;I$X4+ld$DGRq+Z~zfnQfKfOnXQr17va_Z%#V%0@R$;1TBX3!f!$Z zIK}N7^cKlF-D|& zPd@>)MMI?+Koqp6o&W}UP1tX659c|#9;|2Y47<$Sf+^g6nDyxNZ@cN-0<&ip^uCMZD@j}v~m ziz(f-BISt@omgUYb0xhhKfOmpf0d0jtYhp*_bbw1pedDdd&YCA8X^aLg)IqRfN@@Y zXf+tl#<>lGAJN25vGkcI-%lK*AF9s0{+c zbfK5PbjgzVIsg!?43UA;I6kgTpcpOqK&I=qIgKBtTQ+JAzNY)vq<4AIv9i9}NA&7~ z(Y*Qe_t^}wka0B4oEgBVkwp;cfUfXlFb^o?9&~904bjW*Qs^<=Cm*=biS4rfr*uN& z+De7FL-mW(@6$)hw{x@Uuk$I? z5ysjKt5_x@TxRWG$mr+iJNW=k*tU<)vByB^V|DB>@Z#oSGy^p54M&%Pf<^@*0^bz3 zAO=j2lqc|W<~8&?Tn{;u)nN>J8FmnkgfX`g*azA3VI%sAMSf(5hGOG4K;#By-Q9)w zpaTs{;m4?FQ6!v#&XfSH^cjKN+Mrhm@-7KoEIRx!8%Y<8 zUb~EF@O{p0f~$EePZq!yoSTIkpkM4RSs!$T)r#zcDzQfibx;bnA(RQRQIgwL2t_Wv zsY3ixSKhx2zm?w_cnwpe30)!ZGI3PhPpC%donH^d@wrkyWX0`+4?{aRCJECaJyu8P zW@sJe?{*(rkIsK}1s=-zboV_RmieH6CG;k(u(Jf>rChD`g7l;>^URqq#hwyJ=0(8} zWXEjdVe!671w^ZmG3GheKDRf_OX$%Tk6^51aO^9*qG0^W94I@dyFCjs%+#*-Vs@wE za{`!{e5I&}=`S7vpEDf=%2Ve|7w$-iD$|R-#?6WuijpTaVb7YP+p5sZ@}Y~bp@gDK ztu*HEJkQEKOnT;6mM7CVwO7#1oGt4D?7{cqxrC+QYkow~H}F0Cm#ZQ89nE~U1!gs| zMpi;!>K>i{40%-iX-1d>#m(ghnEUerGRWY|Od4MWY*bX!2f+;KKKvvo7EA_y10|g6 zE~cOY?R?@2-{=g!WejgPJ<$%&(WvmlBAxK5E0a|-w)zTuAp-tpX>Qh}G~_whR9P5ebeuWpkwz?u@JDkZo#*DEO*_>fi^y$0x&?({PTYWXuAUjrrBlIIQli;RTZuXr-X zwJV3XCjg@kD#rvUsJY742G``xWEq1c5_`;-c?b$di4^DD(hr1D% zr{la2=%rCj?f|Mh#>vS*-nN}%OOUK;Ll%mh$jQP=5M8k@dJ9>^j6?O1)x>rLLJo#F zz>`Rrn=kB+q&?Zl+ab^#4&tuhy|{3a{fb-Jx`y?ZlT}%Ty=4os4`ZuYwW67*EA|-V zAQhN3Q4LW;`-0!XH1vd95cCcinTX&D?-AQu9Niqu3KSMqsA9qV+nF~| zIq!tv3UWiKhbTdIu_1y#oW(jGtbmtdo7}=7DY|sLfV(X7`i(a4B0*vg@oCDy zYQb#jH+U#(6m5h)q+U!q1s#?xikZ)>64L!WnL1o8r)OX)dS#T)&Fo6O6wA?TnP@x6 z;+_0lbs00P%E@6P{Uv7N3AioK39f~DG6dwy(5w{gC>v&oXvFsqc%9qtv>7~vP7HVR z_MZQIp@^gC!nMS+&No+A{JM)We*OIG{b z0QtNaM-2Fae!CYZt)qtzHjBsU_q#U=e=+his&J+cbX($5sUs`Vk2b6*(jk0+;^&%e-4(F9Odc5L1`VF zGq5kun;ilZ>20h{a7mmV`VIaXbR8K*Y}{tRZAj&qs>GVN`l^$N&(%NECOFO6cybee zGuyhTo14K3PYvUkVXfSKtY?@m?K*Z6ZN`&O7>!nD3yVV6yZ%)6izlQ0;**kZmvep z!rmX)3g=@juCJkD)c)oh@toYY3u}c(vI<)x`C;j?)zi8C3N-gRXNN3B`h?XleutgF z=)%C{Lez!#D|R!ohf^Bx4mM^nT@OR1X#CI-(Ss6kpPNv#ptU)XZ<^CtsmhgQ>Sm9z z$5R=S*({QLCnCmHiN7Z4BQFJCqW{7Jym|gFq4Vs+F8i5hQNeY*cusXv4=h|)_N=Lh zzoKYI`ChJH?nu@gc4lUR=r~I^l?c5@&&c@X9)vG?7NyLB&iD8G2KlppIomTG(UQR$ zv3-N%xfbEE+L;ag{8i<=(je}!qNI%NY+SCF0Kgt+lrRsVM8#s#0mM{tF)|lkCMfis zgj6`CPBicp+SdP2e6?*;*RZgpQTyZ|-?3(**pC}fzB)~p9aLz|V_;{pCjeX2IITE7 z5`H7A3x5ms2_O07Fl)Hhj^{ubI&gK1B;cIN8Euhjn_>MDKGLweFo#R3u2T51JxaAW zU06>3OnN5rH*+On5Uxp~gi#<+Jl~ta4CanF>VQYkhc^zVmC*!!QiTm&y>+SVIz6pA zN|MLeocCH3&ghdL6GQ?4tB;omI#TCw%9-XkU)Ce&MBoT!0Q1+N3l#k_ezd>UZ4bFDhB({VV0iO(vL_3gOuJy=D zq+@VhY7obvJ3K|5jWpes=CJB3?8G$eclH;dGiEC(C^o@tlA_@k$I^rX|an5|ihx3;` zFUgW^&e|1YivbuuAP;$hcDr(6A5`zkH$|*sao2A7O4+Ydsgh#JQ0Zn7D0-gpP2eo} zA$-RR;U9-hI54+5(Tml`*%@7hEoOBESRp7zb4`Zlpw5@=6^2=fofO${dQ1IziKpUG zv7cz8+$DWC|CQvd;5hfQh{$Z>EEJfMHCWEPwNXuIA!nIC6IsDBb{T-uQR2lG#lnKY z_Fh?SZbuYvORPrYCdxf>xGgC78w&|-lW88x&C<9?zOV#4NFj#mw zf{R?^75b*YX6$sQ$IKEmv(G}2T$|QnE4xuKScMn=DSn(cT`0|WmfhlOWVN!_bH1m6 zvd7lN5^&S%2Z8LSG$g}AloP3eiyT4b%@i?;&=axORqf%nn&b9Ymt+Z`K+$|Sum{ijTe;Ue@COM%tUq;MhdJ8=)MfjK?8gyRQo z30Tk4g*{ytA(Kc!--Vo|%(rdonH!k*Yei`q%>KgnDW1&!G@(?&91<)O&x2H%*92lH zknGGehVDguWM@rSoL zf4ba?tCrF!f!H6oe9?6F0-%sDW?3XixCgMT$ST%5Y?0qr^ah&gG6-Krjn5@!$z-yo zlJo+}xyo4zzUWb|Kz3DVEO(d8;Bz@gg%D4J!QrcO+T!auJ#1R!ViuES=U0FduwyQr za1t8W^(@OPozgfvJs@>axwFDto|1h}MwB{BYsHyjbykaTk#HVei~FV9JNm)v2?jg9ZM)AyGzl0dp09Fx{Ly!d)l-6*JB{BmNW|hysFT zo=;#Oj+?_R=6i5E zE{evp@KSQnd~};o&Eq~io73-L0X|31cX$?!QX5WI=I^7~mmSEa(cWeYGd9!NvZada z3|%%(*2Z{A|01pdhT<&+=fR1H2JQr;>8Huog#(?Rpy5bgyLOQYb4R^jULMn|ey^ra8BY|?VGD9x>?YgX(O~u(@*`#zaSWur|^!$H4=P<}Koz}}k8*CxoAS41e351-vQy<^ z1>Z%kFBC!+l12QUB!N(!w-k4ucYwnV1=#?5ksQ(q=ZotHL)V=bf0?*SfSN2Mb6O~HPVMGVXhm7YP%4UnoTCNe8oy)ui`kMwczOgT4!z<(sM zj6T4fCEOK6WgX#Jc~X&bcA`T&v;;j{cedbr6}h-8=V{sUjL8gnVUgHJ@gUb5TPoAa zTteF`x|F(_IK-FAY$6*tWum&k+1NP$fQK(~i2cDqlUaxM*P0ai)!!^Sn5$oPE1i&0 zReC}ASaG-DIg%}#ma~9zNtBw-CgAwTQ+7r8a@-_~{ChF3K*v24e#TJ>05iSN(UUEe z8q}L*UrQd->~nzvA?<2PUrrv~i*J)rL4O0~E50+@lDEiQfpR=qGz#{GKII2PYCeXX zRdA%!AbJORR$o|=1YRs1D6s~6b1d@TgJEcuM?L~pj9O`#kJ^;?1lLbNR7-VI}ka}d7T~wPlD8x zbMW~@d+ApATDZ?7pC% zoC#LC7XiDA$(+oU{i3Y0y9`O+o1b5_T``{8oLe9VgsPch=^msml_+{omCKq1FNycW zEBVu+j`5qhhl3V#;@O3sIanx5$1xhlq0JRuW#XJ;d6q@1G6NLBxlw6R{I<+vDY_5n6n9c8lBdh&N{i$0%CWyYLWB2Quq4@n_wa~r zuh2DYxjjK?M?EM}EjO=To%y3Esf;hx%#AJl#HMD>&XoZh6tc|yWFu)xs!434uu&Ee zO5_=cT7C0aWBfyIDo7{$o85b64?0=qS&ySR<|bDa)3OzhOPguWgewcT(F0LwPCETD zZ7jocL(#?F?N6%ViqxFh*{u)C0JRywAW{4mW8EhU_hZ$WJ$Ibs8JYhWth z7B%qXv&&IO#|P*Z6e>!oJu3K{8CT)Y&zA~|t$6!5oq1(kb>_azzwF7RJE`sL8F3>r z8Eap}u&4+t3EaRNz;=3iun(el9Vd|6XmKG}^Hz$>I9@SLTqbcXx+@%G!+E;}2SJrg zm{*v+s$*s?XwGuzB=2x6dbvx91Q8}p1ld*EU>TZM@)OLbPv#GS1?2Tv-@s3}qBH|0F6@I$0d4b7 z63vD+-T!d2kxdRCmHom#>voe6`7ST3UxY;SyDB5$QRGa?Avl3PJAXO6g5;QG0ndqj zo{GR)VL{RlFzCNh7>_J+zsc1`S{9^WXygcrs zik)0n=w{ImjtO;n?pgNQcras<^GXqgznY?&yPI1;gfcuhglNB%Y<5scpkyP9?c2w{gI#l1 zV|QVP94e4rbRa#wVL*OavZ8jE^cwqCxrg`~c&=!n@J;gcoHK&O1SoxsM~jM1(d5nz zW{XSNmwgxT{aAHw{_F!-i~VQB1ihOEHx#6eh}CP}DR8VikFVe|!RXlWfSfopqkI(cTGeP_AVt4cx2|(d?Q_>A$d>WtURd zGA2hmT8&<0W7 z3&E3`v)TD*Nm+e{8$G-5V5$-6Y4$n!IvhRiti(ASkUbIB1~iMj`3JqcxxYETT$W=u zSx@bbKqKg%6pM!3qG$ZUny&nHNOoCPwjV9CP&55%!i#K?!Z8+017)kj1hNeAQvXiT zG=Zh32Up51az24YvQ2GAAqF-_?$~*h5g|}-_GKK#R-JTaXadu!WQ@Z}1|^pnRKnr> zBf!$Al*~b3EVw|y24DLq*QAi4n<0M#T;ZU`Qb#RilN}7^Wj^mTUun@K)sw-;47!b`qdOjj?1-!~_H7^q}cF@Jj&`24%{T6D#``I*% zBp_GnYT;A#^onQjesV+cD(Dt|B)1T{5P2fw81yKpEJYi3^|2PK!r$Cfcq@@M2UE-% z6-a-!A7U5q0-AQSvJlnU@0g5swPG9EPkK<~fiA}{$a#n~MkvyG2sKDYjz=Wke?=tZ zrt52NE^6vvgT6s)q)~01d`)gkV*&R(>`?2>!P91z18i^7g2G~!3+`ynRxCGsJna^? zC@@!c0lnbeE_{ZTxZdV&N2?ut&}{UoNF!(QNUN(HtdJDxzHO+a>aA1FfIG9s1Ev2V$ym;wvyxAP%GKYY^ZS% z?Vdv`IwTdpWNUum(&u@hJeG;0`@wuT)|D&A(tVJp7OM-6Cy!hiTzsn ziUu~?;2{qH-&8-9swStGeiOYTX6649JdJ*mMd5D_%}Ukfrut_}*Ktg}<_kPoe_bru zW!P)GHe^2fO$4-hW*lHuHE5)=!PV6Y`SqlWrP)#+LVtd$=zH{% z?eY;|2{w*S15RQ3Buy{?rG$!^OOR2wMCNVez`GGjUCNe+!&GaT-nB96Q^|~TYiS(O zwv!Njx4^OB7k!jhE=y-X+U?*Fe5DmO!tqsk0?T= zOy&pb?$qcm7wSX#nYvrFH0j)YCf!PuF7>2;6&S&)j0W!4_-+7XpAC5i9>+S|vY1-P z)mQz=q+;UTYn0Xbbyu%Z&S!UZno|=pENZUMHY!f#X47uTXyOt&SM0znWE>TE5SxJI z-0+aqz#~?z>o>3!o&DlXvR<`x>{{~mvWd&36n5dS_J!0PxfNCU)X~gi*+OOL*KuJN zeW%P5oJD^wsw5;a>iBbm=L1xBj>~J%88v-AnY^>HVf1S9*}5GUb1Ci>F|G3{?~2VU zvZ>PC@0mq3qs$HbAGEuQ5C%e*NIu8eF`Sf&E^5Fk4%7J?@Bs~f`YSoGy0gvGDGw`7m&vGvqSW*n+N_-SJY8C0dI;@4{e*ls?gc|dv_9YsqmS$EtOI1D zlE;6NZ}s-yJVV*nDd-KP?{)HDSeFsiq)c@ z6W99-7&P8Sr(nPVJw36U5`XpXja!u3-uCVU%Hs~_Q}?K5jf$cuYHIa`lw9hc5;n_} zhR=;o-bwqAb}E`rXGz!lc{0p+j~%-JRdn=aG%TT}-NnHtXpa3ncshN>84h%lK310v z)i7%Ec~B0JAO)c;@G!g&Drc(4zl3_2&qI2kNvOy51-uq+n-oF_Y%`V$xxofkS|L4n zb;mHI3NNZ%#QX{`$lIwbS4je(S+F5w4{5{h@kWpp%nR{Trkg#d+yOa>%zZY#_W2Of-AhuzWunkNcGZW1wV$7?kUhpqy7Sinc z3$j6+pZo-sne9gAfW_jM7dL=8B8}E$Fkir_`~a5lzGf4^F0M$l2zIDG+Way< zAW!}@V;j&VoxwK(UWv9abinO=M%-2q;EV*Z!0W7V*GT4ObpAtoz_PsHx*1?o9NWDG zaLk|EkOw$rZ!9?qIHd1S+XY0X@)@KcXb*0NsRsu^4!irph+1V8ePheI05q2eDCVxd;3lPP3V{ZY?{6_&8 zsKU{5CV_5fz};v7X@1!63b>wHd}c48Sv|k5f$_UkSm4iiP%uMIW<1WijZH9qq@GLW z0vn}2V?ZEO=gG2&G)NX z0rUDxIev^671d%sqqp!kyoOPijVJwKbfg+a9%9^=X#0EuGOYa6UQ{xCC?}GO&dZs*DV254MDFL!GW- zm<`{)Kg_mcj$M1o;xixhtj5kVUpMW+)<84MsQ-(hvkY%4>!L6=xX-}g?mD=;OWobw zEqZh9T#vgnP3o?c3NFP86k6Qf-Qh#O{LQaCd2-I(cdvJ?MToI+W;^(Zxl8YWOiU+f z1^uu`Ddl)QzBTMR9*I{wQh`T;@wbOiaZK;?)5&Jq^+*Wui&EAbh*H}kp;^2Z6U-T`LRPP!cDfQ?k+ga zJt_2JmlyTr{B{lMgmAgLVlrA~*oU^B>Vo;cPC+_{=V z{vjwcpgd`QGp{5XCWl$VH_p5wRj+slLTRW@BtPb}Nwtm=V{ zF?fR@v_g}@Ye54PN8{ts(bDO`y^&}1KDRGO1L1q25233cdCaj-sJhnJ6~9;-S1QEE z7bIp+#*%DNiY4d+a|2d_M(BrUtwQ^#j>L^aPRekH-YsLY9D27WA!wgT{MftT=m4Vc z?m6{eL652*#R>Sb()H#cSX4eHTZp#gj7A#KxyFkb@kpztHHL?Xu?=WbNQK~i=gGyCz(lDuz#L(j7U|plzkx1%#UVDp(uR_t(5pNDU<{b z5zinQBr)j$VMo=W?Vtr~@?3_$z`tBtE+6iYg*Ho%AdP!5&KG%BWnyQeBXVytT~Rmf zZ|WXibC;fI z8p{0Q&~iWeF&mT9PPH-4>TTpj+MoCzafeD51QT7!vV?XppLiRRgJ%)jJo{pYLEZV0 z(r(%VN9VAI)mIzhm^kI#(!;ceVwBZ|S|#&Uz9qMChd?{w!!F=w0E#}8Fb8jO_-8gb zNRojbf3O7bw9T0-v1A?gWkZd3>PhA={oLZm)N{=d3qqb#IV;u@mlOl>?cjm*4sSO2 zn{AC>ihI!egK?~oyx|dm{v-ySF>*0^6Am)$*qjS>2KuNavhWLKH*PcONr7&fEQR2! zH=$MlD^fEL;^opKal^1D?3f@Ln@{Dshod+#zqN@=F1fzHmhDw=t>y$Rvo{vZq5`s? z7|xN?P5rsU#5f&<^aJBmKQm(St@7&F-WbMZ1X|HkbbuRz4kX-8zUBIXD*(0fqh2OM2iM#fL^uM6aa#^gz)rMZF3$bE#nxl#7l0#bz zK?gAvuH6x5f;#3UP26|9X#-o`kXyciK2zhLQ%yOSC9BY%QzbWn^ z_d%9hoTWJkr%2_xBj51@OeTu*_E0~uQSnDf8tVw@O3c9-k2d@+a5*(b^B+;Ve}c+` z*i;*+2m|*EaoH}QG2N4P1zY7aSsoaNmCPKky3MT3>`cOR z-`KOnH1b~1E&Ky<(}Tp$fp;zQRK>>hrs;~4`q>pOvh$j&cDr<(s#!OWO;(tgNAzP^ ziR1|t#T`#SMfPC+7jqjl(2Igb;W%k>zmEkGLyo9aQ8@(-sS484R3?;h#)h2JT!-$9 zCX5}dSwzpGab-E=LVlP3mzF@7xQo%Jz(xiHPR09DZfM^p&~PwJ}_$+?~- zhh*;7GV8w_XWpRN#T?XMCw25>jiVWyG$^&HlZknOLxXMR$t3%2wvFmJlv12U7fGFmob!OWn}hg`3GW>SxJEz-2{fqz;F<_x{_k z6?CraPUJfgzJG)2e(k2(35x29=)!R_&*DTA#0BN;laFLVb9&;#seDs5A0$tuG^ey?S9f!B(^jF=(Et+se4xU2i$_PA8GL%ch`RVJKEW9L!p@jJRz+XfO z*x>%#!T#m#_c5aEu(}QUfy~C@cnwL%XDd`&san-%MGx{jmb}HSXu|l|+|4YYYXtis}iv>6*rZ6>62L-ju1_p|~mk zB<~{c0@U0~sgDq2?y&n)e5fzXqNq3ri`X2X0x@Km`+wLeVq()iL#QR9lF~jhUd;pQ zt@;&)gNpxY&dWB)3RL0PSB_9j6?9`#q`#8~QUv=hGJ+_f-}@KiZ>fH6Td|GA%EqpS zu-rxEx3sM}eeF-wea#*EX^I-d8E%(shxQmM;wGv-@(P%qilvEns1Z_oL?W?@9p!fx zN2nUt3WyQ&OFDoRUG?Z=XxR!H?d|F-&=BF$DrdrI$GQY1?p z6Jb^cp#3zaxF2MY;%?|w07*Cb9Kf3BHkW@KJgum%w{du#Pth~ov`T-=8+DJ8ovP)E zsC+(oM!GMjMBK~`yNMa@k zXy|WifgY8=&n}ZVvQ0V^8Yj^U^!Tz8ikA#Cdh@zxfFlyJ!*}LVG=Pm z0Hy}uxO-1<45;f5+lJr^%5p5f@qg_HjIG#DeUt7R_E7pkGX*(gF4^@lWJRIt6_4zd+6bFWer3p5Q}W59@E)#o}A$+tS_E_l9gvt?jHm z#R^y-)n_IcX;S#oDZD7@2}+arhV>)QN7PdPBS-s_L@S|myMpf~#?)x7`I;$3!Dg`o z!;Lolr?{quwBGXN^bHjxwZIF7*p`bJYT=cd`bUM+f(^U+e!6^a#4*??1yg4(`3goj&ql}`|-6*tRrpoIn|$z z^Xm+5l9ybQu|0%mrNQcHJ&_-19$@Kh^4I4X*C~XWI-QcZuRNr_DaPajl>X@$mn3@^ zXQY2|6=B`T0!HGy2*gwIE@jv%BCevpH6p)%?lp6F+Y!TcJ(L|I`>laY(?GmZq;|c? zCOZ2xc1r1F>LkErVqJ1d0lQcABl;!1TwWP$Ag*$d_h#Hor#VNW352}lq4j1rkd)+W?`r>XyYKT19^}ju+2?Pit>No3hTUED~WNA(y~y5k6cr zy3xNEEyOz9{vm7tEB{v54k_{u=S_f~n=a(|N;WGi%uglt)Cof-JVnx^T?C)Zys!L? z5D7m1Sa_~BSBPyfTC5e=OaUa|n}clavS3s?KaQxur*))`y^4@-**RMc|2kNq}TZg92sB8SPB znJk2zNYqa!ri=CJPvCCGUy8G!B7URvG{_BWWG;eQzX{Y(aMkr25E27R;tK{y2iZp2 z-MIn!f!XP7Pg!SEXJ!OgrIXWhMX1`3N=qNCSVW$Tqok|J6=6#lKjN70U9y@;bA1OU z6U&N6=GQ1+TaVgaDzdZ{*_~uxxjZ8yb@b-w`f;;_D^*vRFR9Pvwe-bUDYu_$3(chO zlGlAVkW0zlu1Rx@E z*^@B>ZZoqpL_^0>Pki{q0W!y>2%kzM6eQ$3oAWGvZAr$Ns#lgUT{ZpOa9QJoEYfaP zzT?%Xy2x9S8CkouI-1X3V-Ex$q<+$mz4s8?9V*Nltc*bNd*<)7@lBOB?`%k^u*^3J zDV4#bH%abkergV6#VRXRLz3KODRL$%fK8W<4LU+;nTuXe!ELI@+1bIFt8&%(u?5?W zb8Vsac12IiWUDLL-yk%-fxOe%SI*Go$2Uk@)l^Q|+bZh9}Qco<@=cVJmRk)5P^1$WXN##4y2gZHHUD5qY zTVA%KQz1ApB`J2oLMm(u0bgx`e`DS+#76{i@Uryv;ts4TE|aOPQycOSISN zM~VAX(gcl{c3ok2m5F<5=PRz@yR!Y{ z|B22Umq=9^eYKI?*o1s#DRVq>qii}YcbMl-Q^{WM$s97#`ITckgjupn*BLrAy$eq2 z4D5dU1dT5yv;0u45mp*H6tmO5XuipI#@m!jI3glM+Qi%o6fpj@uh)8V8u`L`1Ky9& znj=c5XD?J;DzKYShO#};pGN&HyS1wXumMr+OTDPED&pf<#YNeo@L&f?GATewzhdTl zB1AJ4Hl?OvrL*itC0d}jrqR=EXU#j{KNilgl|S9EQR|VyXxvq**lh~F{B_tQ z?vXUk-=F5Q-91#qM(U)~EC**^XJ{$iT<~16vcQzPf&6T1wB|`JS)$F;cwG#G^hcB9 zH3K!un1zZdN<8!*j*+STTKPBV9KUx~2SF zxk}QHsViA2S%odjw@Y-Qp0;k1Pw5TjTv(fsr{9LSM>?s8pj(5c$X;Vn-d&g%c&^J% zvMcyxXsOLW4=N{BP^g~1Pz<4osDIuaG+H<#X9F6O_T2OsEslS!Gh- zG9h)AQ9`_l3)ik7q~U7ie}rdXU+G3-nHNq|#737O@P>%jt*SmC!(_>22c*u#^Fkdr zT#{^m$^7DN$lgFdOIcueO81I|G=bXZTb(=9?@0ma#j+$NM6W{ z@*lyTqON-!q56@#oJ#OmqF6n@>XpffjVwK_e}Ua95Na7wtZk2aRmKMMcg2rHH@!mk zHtL9at+ZQ+w|o)1)^8i@>Ck`%kVa~R(-`avv0F8)>bi9`v%a*_%tvbroDFrtwKh!q zD}B7VUhSE%Sie$nFS1JYK&A{{D?80aIBv)rOo01*attNhS%j&GBZ^N|S8^xQvr8@3 z$4Gg>40DxWtQC7`LGpLuY8DnArax`2&2X=WsRvtZ>#7KZs@W?hUU9Z>m>q8aRmflwSdp z1qDTB{8;+x+(0}z;ZAlW9u(DPpz*|za`g?o+3&P`2bkgUo4E`2Ip+~JB3KsNu#-MX z2GpFS?!(6NvE&c_ox(fh&NQAqhbT{&XelAKL`~A4Bf5mlQqKeL{7BgW@XO;dlSIsS zt|Yb+nDoHz{nD*OPR)F7h~#(~VD9mI7Otjmr{>tcQBUJNEM2JCk>UE8q&k>Y8OVu# zJ!Er<4i7Q?k|=fF2HFUdbolPIidyi%vBg;oeJK4S-OHO-(8PtO9JeL0f5+kGET(fr zwl0V6?@+6IQ(JtKrKP0J{V^Ru{_9i*))J-M()wqb&7fbkvnmFfQOYWqtoVYJvP;Ro zbKXd2#~v|#Vq3x=>P9l+pm|D`4)VDzok0b=`%#Zc*l8GkfM{X^>x=X)_@e4gT0-nq zI$OOh)0lrvxgvRX4v?>m`C=la!tgQLGWL34y7F%(!eyuXJ%u)Ks^fgVBF9}y6hGZJ`;OcQS$h zF2zR9*=q}Xh;epZMZ%PD$0>{@ZoFP4yvg^yGfI>u5S$+>?k}u5^jf?}bgcF&lqP8P)?&f1b$qq6If1 z1#_s|XVQg#Q>*t=!cSyXRg>sGIm&iNe2U0ZjfR?uc~}ogJ~)$h6*htRh;E1){O!35 zwd2R0X7aWwdDqhT%jLgXF~Jg9`rdlsOzH5-o}%gO{+v+pScXvCfX2|CXb;ISYGK+W zcpN#zk={9l=DR*<$y(6qVI(oqt@!dtrLsc#Z!~|D`lU z@JsX6(ofV)wOsmMJXp?1DxnG7`;<({I>skV3j339JSQOtFy2qkbSYVRA=vRxT-f5m z)7pOTuI8Jw*A%Z16c{s2J%uIOFWgX3y$V3X#fRl9lGi}j+0UV*X#yGz#E@-0U;^PGy-@2cYO$gM71BXF_yGE5b^84ognB7ZF^$`?l}B}toy!(&0Ag(Q(MNLQQSFyjbK79qgyQ;k)1)QMZ*ng z!am{=>iUG!&}4b9U{tb%F}r2L{=~wYL75c?ZD*HfvCWeY4B(~IWmJp!+bcHa4iZc+ z>ZTbl>}#J(NQGa_sr>1p$J#IP|A_A>G(q#B=WK%OVTaxway>q4&I!NM&RI_m_1QO> zm)As9>iJQ%xNWvzZds~ohu}~C1^kros?{Tl6g3z|#2UqR<)6SoP$4(Xb+Y6HK|E+j z{CUKs8YGs#>qG;>6MSk`BO1}Taye2j4zlh<-a?q-Z*--k8WEv7_;~6`^at`Vd^;9{ zb@7~!J;Ybvdkas1$6UA$JHyDaXRsK4+ISb(AGpyOmukX+OZlW2FW zPikLOh>>CU(D!&3&v}k>p}PC8q#Ie@(O0saT-`EBq9fWHmPzgr86{cpJYs}d1M9&# z=}Dv)U?tfI3|vzJkY8YYnAky*eD|D-4a9HVUI6u#FF8LOijn>}vKczT?W*TV#f(k9JCfpy17}rMz^ocMO>iScZdh| zRu$JBhEf!5h0CEUvJZw`5;y4z)=P4OWyO_n3jHj(8#0*M5PA~XMYMX(Mtwo_^_`+B z>w+`R;^F4F1My=ga1sx4^{FDo5j@o;E+QF3Q-TTWlp4lOnw%lrqb(bUIWkt~!q1++;{FrQr4 zz>$Q;2%+&;Va$_{ly-_ zEt2ifP@D%N$)0!^ax}C9_rSV(oWXbEVONfTCg??LG4PZ`?ca;Pl02=+!|iZn{w~}b zex}#slaVY&i+4f?ipOIw(DEb;b{BIEjlynWUpx+C_wmY0pYUDa=g9|nB-p$6C;m6! zS9ij$f%SRwFgqBmTaO|5dpZkCz;B9{Vd412#1~iuel6s;OLLBWUpv9vE z8v_=1Y{v#@#va>>UQ@+3rJzcmu40S=Pt=WGoM1+I ztXU=wGy3Fp173)7<}{0id+afR4%?d6(oew0mB}$n;G0r9palL*pLI<} z1`|_GwxJQbukNiye$-%9r3hW#Bd;?uzwn7p3*WFGVgzuR!YjON+IqM>BJB%`cP|R)x9U|#f z8b+xkR%y$9p8eVI{lFe#N!X1 zr^UGUuKSbY+6z(3S(W^>&n@)8XdO>?|`oH0ib@ze;p6m*aPdji%GU$gGuErc-cI_^Yt z3!dZn%ze z313Y~eopeuhPR2$I!}tuheR>i^~K2fviR3}b<_{5Pk> z_sb?b7_kMMcSaL-g3-o0`Z=kNprP1ca*;b9Jp$@l9+LmFHaG1e|1-a@s3Zc6|JbjB zVqLm!J^oI8nmLF2DcvMRSexuYx;GZUu`%<}ugrqLNvM_l<;I~K!Sy5G$p;1N8m^OF z?Hy(Jh#}VPwjsdB^i9LyW_=z#3;U&kp@~?MGA^wz`c0M*JqJ~?RKP}bF162f7t#*? z9G*tLDr?&9U{Z^+OaBHR^P;V;U}{dBsud3~N0J!UpwAX*v61SBDJ5vG!a4E}I-hIx zcS73ec$d#eEHUojHfm<=gk6)!2^E3G8y{3y# z`h&Wu=0!lrZCP)>Nk|4W(8USqL#){kQDmdEHj?zO8(f%746OjBMlh;)u6!9jKKD94 z0Q1Q{!QY5x8_p*x(Z1?W;j<96{6F7`$RwuN`2f6^@H@6n`8=~=PbWo4*30V7vXlJv z`MafI0?ue=zYFikmN2`;i_woXAIi;-2o*et_#5lt(I51~^Nv<3-iWR@ zCCjtKcx9^eHMAga9%qJb7^birB@Fk1=?`y5u2OfA@tOYQA>>c&eIgHi8`E}_u>|c8=dk>qYJH!~Vm23r_jK#qxshwC+hL~J}eTZF5 zOu}7*7J*s#F84NkBN%%qNxqeyy?dVY5oIWw$EA}$Z6jGnIk0vE{g(J2<3*h#v=Ten zNGwlR5Y^yqj2x7M>c9*55dgW@V$Xr!ffh%apX)A_bfs*3iGe-Ey|C&SDchm$pk0_| zx(Bt0J^;bwB370`2c1o2pHzJ;xnW7ZjakU6pN? zE6gCp2-O6tLl#Z)$wuz7_-~?!bx9S1YPuu3JAQ=n3fPCeB!0Mk#U_9~d&8us&5o)# zF3~u+;0N5=Lp;WT+J2+1|$u9`y#1d&^%5<=bEsy+xJJALH?N}PwXRpxgvU2`g-4?b4$zpx~1igcyVb95D5?0gULAr{rIlP##zmj26SR+i)pXG|p( zn(cJo{JZo%Qeqn_Nh7A2(F_uQr2i|f3cH}n37v^vkvVyPKwdC&ojs5)#Lm4d^>AiL z%{^^OR#DMewHxnicC0c=uvWcF&J*gXR%w;!GsI_~LkVeznPrkEF@IB1L=XrO-?2gN z!GMpSXl~INMdPdEG+NP>LT?o;)|h`O@}RBCyRsI?yeyaYklYZ*v99pR)O&OVawt05 zL6Wou4j^#suKP+b5Oi(YsBJ|qRlZiYA>Z zfK5v6OHaV`Q7t5mj|eCu2I58TqwpHAvf-e%A4yk4sE3jx^L8lv5!J?_^3{Zge3CQ; z`~bt*;obLwt*rL>Gm7<1nIl~(td{Na=m7=##?$=wS`fzj}=?s>&yySd{!2f!&wqOlcSld;k6DA z>}bDW+?5o&CONp=mo+=oLhH=J_sV25YH646Ftn;#rSZB&bT5`t=SmXj%gT9~6R7#J zj`$9umZQR2z;b4ruN=Egjdv+P8KP%ZzM7YJxWHBEW5djgKlTmO7ZpF^G{jEXl+arIG8^GjhGkOC&L7a>#KeksYGcXHJd-l205S69L+vGU zSUNcSEjY*aG%Xdm(kpc3X>&+eJwJ9Rcqcy{;)-W+d~Y*0kly6%?cfHNm#0W%>L}caUl<3d%KI$fDg9J;s8>&}A9NVVI6+PhJmjyun$wRrBlG~Ai z^iM?WpGUsIrn-58clgJet`--ewTLqA5=B`(^j*Y@HCHrN@h8@(S_17xC(FM|bi60h zgRm@V3u{J<5eMl^bg%z9G8235wj9u4R#k~v2463nVfcuQvwYQULgLj@^;4vr2~o1h z3#7l?6Wx*ZSvn0pl!&u^F{g-W)O+ls-xJ~wUf?znOa>8^8%)=TfAd=m>%c8DtJQ*h z)hzWA5KaHBxD94X?#pVxfXufX2f8NIF%anI@K&dTX?|)#4ZPj@;irJ1{Hkdc>zWs& z&t;6JshU^xZN)W}hMr8GS1h9vp#8D|WJktZ_C2{N{)vMmIT7BQ{7$&}ZE!FHzg_=_ zXMp2ne#YYp&VEBbMeb(E(WJe&6%SLW|fcd`eS=W`Y;A-B2YVtj{()Wx{g^uc;jr!-V*jp`$g-F3LqLo)*7$sQ{& z3W~WJd2Q+(hLhT3swtK&2+1N$bfZr%yoG$={22XCj4wQ9oM#)J?W#-8-liF;mYFh` zH_Ac!Vzjr+pt;W9!TG2bBtK=Q%X6b?YNB*fa0c-ov(I}0?n@;*uSGdxS;1`M+Jfcg z23@lKiAtmvTV=GT;*EJZERcohpJ$!q251f@WzZeUim1utQCVrwG;oUD>$MPjL%nmV zL8lTfc{*cssjF$e4k}Dg_Ejh60`k4$ytPC!QZ~nQ!I7TPuAh|nA5EyW5g*A!MgPEF zAd$=W^uaWAl+yyFod_?+bH8VDIgfHqWi{&!7LxZy_Qi+`rV=KdL>MoAr!Ekk&B#!6 zKwIKm1}fi&%L zC|LAR^+mEHU8?vGc8$9%#gUY-Wz03S)K^LQV;@|1g0H}{aGX5_9-EC=*TFH`nWo=x zEoU?ILOS7I+J6vF;X{=RayG3>u19_2#&Pelze4L6H|(tMTJj;D@46Cb0iFNIb^w27 zS(UB89h`z`8?Iu5_1<_IHc1nSdkfAh3-G|yeX@6WW^5?uj~9o|q|f26eLj=vV7u!a zun08g*=-Z(D$@_k8+xG1Zp@|9nHjoD@*(m|y_fXnw)9(VPc$RyUb-fy>l$8D}1)NTU;yYo*62 zb9B8!RlF4Zj=DsS@EK013E-0FP(Zrd{^YDxPc>||^imefI}DAA@nnp4uY>g4q1rE< zleJR*gT0(2kvcK5s50g>?HN3T3ZSNYdlG+>Bb*;#_lfa2Tuwjz58Zw9G3_^*#1N={ zPmI#~D!)TLR33`%nZ4vn*@DC{2ZNX%S;zP>CxW(;FXsw2M z7Bw493#8)>WAxSdDQ$o43ejHWZ&h3R8Tot#p0Jn`$#zEM((kw#fj%V6jP$$$=2Fv~ zve8e3i>2K9#-5<*YG$n|tj*BboPq7sJTQ>LEy^QWds?*YvFc{rU2e2uZn!VaN>u^l z$o^~(k0}X#(AMp<^QE^*+0pv=6duonBHevtbNyUzLL^v3G(ax;RJi8tv#Ii~Rw8i(^Skx_j;~Wpb&yf4bPTyJFo&5-9P!eVNkpXc6UQ7f-r!d-M)61yXLpus zsiWDUvUSKL<5}(x|B7x2do)#}ddw8Z+2rMPcDR|#qY4AA(Dmd#&wIpc;;r*Sd;_se zKPdmYc87ef?YkN$^_E}CX31}3xS~6+QF~J6pMpB(tM*vC!(HwdwwIete+?K#uc1mk zmk~TN%}IkDCtS1_^UI7T>1W$PeE<<+sn?Ey&KU=&erCzEiHe8Gk*YzmS20@IIccBJ z7WOc^%6~O=jwU@2q8s_t=`YNSh|qw1ef9&+Zp$?<16?eRz$vl7a83I=bCq_Anx8aY zc~ChhI#`x0J04FWbu#WjGt@RyGzGcU_IiAW?H0ue#0u3?xL4pBSt3&7wT3x_LC)pm zED)ywmE(lO)0Aq`m{`$3b7 zJP9ACc!o9y@T4oSGS91Y6JF5XC7^H^&mgj;D|NY!#zfF1nk6U#qY8>b|!tb z@H-QM{>n|KvqeL!uGE7Jk!c|{JMoc&A~LNI zrVz^&{YpQ`K2cW-f%H1^+Ww6@EbN|ri7iP_HST8A2~Qj(Nq$tdY8cfTk|txRo_=C> z1nKV)Mz#^HP6j-OaFu;5HLG5bX9~wF!{OI(cg508`^98 zqU)3Oz;Zy{lHzM%lw|B%ja8l)en-(JO$}_5&Se>&9&|mu*Ug=Xq5AIZiEVQ%kt$33 zSs`3rAT&3N8y)Q6@61OQROg%=VpylHiTSN@QZ5SnqR5in40y_&=d|7v>8(tRYcBXh zMeZmWqlXkPq4dsJikoRR<=ms#V;#Fh0D`U+uKF666&)zibJE?4JV+65Thml zUg(F&zabYr#B4Q|=k$W4fsFOuHCyBd232hp9gvJD^%jrgeJIEi?@1eIdk+O9^sppJ zR!1$>|Ag0sj8o?#DSmV1?&v`eZ*~Gc-{~DW4D4f$)Okso@MMSD-T}px@FmZAYw}OP z3sQq^BVal{+-yhwjV#eGLt27sRBzCH--ogWEYD*;W5o|RIdVL~Q)XhV1{;DMtSClb zic5-%(Zg9#-UswhiZy2=dLpjcbRKiW^XoFPpx}k7mDmGclWequ?jFpH18beW6VAjS zI=$wR!;#jb{4LQ#^sML+&}1ISn*}x{-?0ior`SKHUijOHIBgUDDd>Pw3r707NtM9c z{S#dY_B#CmpNS<@Q_Xt%6S}(GnVu&aShS8x&Ulndk>`_pWVe&cV&@yfh>q||?cW3% zG)D22aPqk#?L^FSm(tUT1x_{~BHXE-)pb&DL|Ar&+a_FC*u<{N=x+bXY)_h*y@?Kv z2{Bxza>E{LDDqoivLc>L^;ygD92q;`sXU_CsT=-|h$6>UyDBch)-q3dtRSLrrSwnw zGCR!GCx%##v+2=Rg9j57He1t`4h=Xhe@+2!hp(7Crbu&_IwpjJKxK^`GNktCRkC#UWA5xu^ z#`*=xvRTY?1EZn$I3|aa)X*J^u?`H`J0x&W8Ty~Ll9aQLQVINCjR3+#+mguOrjw!`Ba+J=3 zcaJYLE7{ve`())XPj*k>-DbKMhYKdt6HR(y4CTYU5H*l0=(+e8 z(UyEm;!gAkn+O*I7mrWqU%2g7&$P!H)4ARm+f=s>&&WKe?7u6NH$Wa!c$6O_Z8lC2 z?BV9JRidFxh4`&Fot~9EAG+npx;YK6Ay#{QK&tSk*Wag&vpAjkkZv*_IoLmQynbY@ zR~DqH$v@4zrCO?Y6D*Q{VEPEn(t43z^n=Bc=0OScme3xOi{t_iUt}|wdG&v(bl%t2 zC+UCN)cc-h?9T34y)f$^X!vsqST(lb6!Y7A5R?nBki@rgBAQW6pS63-4d^5H-!u&^r?v!8NstUnE*8 z{~7;AZ0Ayf$4k1>z1$RV7AU%SIdxmz@Z%TL8Y*Wt-Ord;@=y7?Oe){mdOquJ4o_Lh zUub%bUlV9_X}qz*7piG-W#X;UH$h{dGWwot8$1Hs=s257HDfIY(jM+3ClGBAp<*NYt#j>3<0t zz^1#XC4g9XV=}Zb9X@*rGGvZER0sXYy0R-=63CA!S}r*v>}hO;*NbQ_9KHedh2#h% zNl&(;^WeLox6mi(CJ!7d!%kjXE4m>3disypPt<N##h97_VjQ#N zPlm>0)%t$WMXZGxC^2DoM5S;b_C0A1G7}#Y`WNy8U*{2zUd0nG#R`PX>606UOPN7? zw~IQ{?p3a$8mcmv7Kc#1v^mgtGKIPXeI~96t0ZTM&507Y8Z?F!B1$mdBNP?j4HtNP zMm6ntl)y(((<~9rmQSy+3tvcEZJ$IXT#ROsn9q(R?T|0sA!wGYrDO?L;AP~xknP9} zVu*(~Iu8Hd&gWe+rnf}$YxPSS`2vIHSy`ztKy@x>plFPuPQ?>_lGPLa#f}7Jegf3W zoQS_IIYG4tbMSuRm-_*v0W3SG<8884A4%db$S&C(BbaF-OKgIV`ugla!e-5Fg+gM@F4G*;7bQ zuZ&YbriKL;&RlSfxtTXtAJjFKgQ_=aDJpP)a(c@mQ27Q3f zbAOC&#HrIS;jYx9gLUvcDtK2p97bL)R>B+^Y$=C#6VDX+@Mi+Sk0G0gQ9L1%2M)&1 zK>L8G;JGLd3~|4JKEcnn_Lq#8pWFXP5+F0xzL7Zm8-=eWXSs#uNpKf-fP53|O#j9V za4Gd6ONtC7pT|*-Wd7Shm(W4rmHQD?3HqL#0P%EXd%r^aHHWHyO1h~|7W9#$i>ykXEyty!NW8KNG>wo9TEd*KX8i!3btDZGFi7K6Zb^tr$p$Sl(CwgD*x;+7ra z1CElKiBMC1+7;)jJV%eFy7b1&uype@#S>IYDkxjV&&4(Xl50gmsJo4Q0I zRt$`Ck-X-l{{C6w5d;wfNlq9}f1M0Vmzn5Pv*$Ry=9+VA3tMf?Zd&uAVfv`YY zlUAsQuIn}=Ya|O){UR<%EYkaaOW^MGU6)~wbMfZ@3XN$TT$ct-u5Boyps5w>&8MKn z#oZL^q4l{f_)jP)dyk+mRA@M!G#UD+eilwkT;!vDS4f%}!dVVaC&r&RM;%UcSP`gE z8H;L%lNp&`3zfuUp2obG@D(hRE5UPN2i^rxVnNn#uuw7}?k)ZbZVc{)SED}edAI-{ ze0(S=7Yy4|LjEQEKSgI59Yxl)QQTbz8Qk6788q(h?rE3uy5;R2cVZ-jKnU*c?l8E$ zxWiyyeLwq8_gcNGYTY{LdG;Q-Nkyr(v;u}6)I3|H{+`^}hulXgi!YIh)Ohy>>Fl?mj%X{&r1J4S7KiB!o@I)o+u}5k<|L{v$f%Uo3xyZ_2ry^B9kEwlxmNyV)iH1$Nsk)AYf% z8IEPYLKv}`!8Exh5HfRD(F&`Yu7 z&O5|i%xI%jIasuLLB?uqt)X4q3`{)rg>=GZFc-WQVB^5GWNf+1X4V*bE<_ zmm^J*yA*`pNc;nK;$6iaYgb~CZ(lN#)a?Ale@5aP)Z8!R>V~=OFVrn>2iqUrZk91q zv9a_oIv%Tpwu%0ZnUa>^6@D@?P)wm@h20_Z$(}y%$gbqh9lm@wAgjN~jRb4U&FnPr zckVdGpZsq8O)JS8)Fx^>d0cCyT9XH}`-5=uP{Jm1H+eQpMNTE}`kW;i$uG?%+*)2! zH;Ikrc9wo)vf0nB4Ris!)L^5IF#<>u-2~4x3Xn{DWX&QkQB4Uw$yL;luz|#BP~@|T zhyYWY?y(IfWo;^R&N!@O7X4U%$nl=;sZ;CTQwxPD#1p`C-_!%hN9?K0o}@2R96yyf zP0K?Q#A#TpPe1%Qh}t@vo#}|HZek*A(xMC+u_*1cs2YXF}@dX@LY z{Do@oy2uU&gYCPKFfz?Z?z9>&z*YW*?hW985C zWcpqCy4+3L*OTqMoz_&cp8S3t<~MucgT4ugb8m<5ci9J!1%BQ+-0Oclfd4T52HmB zNa_b_JpDc9BN$G13;s$@r{;_MZZDv$4bxw=7L;t#C7b_qE$82xd=0n7e$H-cA3H%m z5-O(O38m6w)DeDc$_cQUZH{R{Rx#^>hZ0+;*IxNzZm^&_M{mm+Q`|;3*zv`&nRnO@ z=tHDvLy71(!U@0FT{Wm$ARR#qSi)e@EZG0U#Q29zPEp!#e z3$OET*$4AMIsJu8>{CY^ah}PtHmGmYDop8MV#ZTfhc}~ndUhY zy8-TRn5|cp{g=N3931c&ni#qq4i?sK2U7hn>bRO0@O#G zCDQqt02EJW4tf-0GVrUM)SIfk^Lh)^+qbcVR1 zTI&aRlJhU~8Th(kx$!oVPHoh`LR&!(bwkkxnI|8CWu}I))%emFg8od532vm~$)4W9 zGlTr*LhVrF|0zoOsL zGBP|k6nrL+c`qgfu&AQeM$uREuUoQd+~#TWpeG9Xh6L(7Ia!BMQcXF3m>MSu=K6sD zC7)sl&=j3WeFsc%XJ7!?-lND_Agg?=^^Nc~&)@v7Fxqm$xR3YaN9eC{8vKaxhAmPD z@Uxifq9cG~1}2>mnIv&hB@`rL9@9V{YOq%!u@~f&ZnH|vZE~)eQ;gfpON?HIc(zoZ zpznft32I@B@*B5@&&Yhi_TXkDZl?*hW#nGbQ*a}&lyp#sJZ}=S!S0eCqMKlh%gx-+ z=4(7{cw>Q>MY`XnV&sd^#c)v3#Chue%h=D92ul*S(LFdz#Q%VSc^B}6970EWj>UI? zJ4J-GW!_}R3DZ5-HG{vQ#sO0ob(_UqjFaDH-Y-vL-x<~EJhMo@G`^6k68eOH1|!(7 z0fUImB7ahcuLa)7bPWUV*tE7>b}8`2A-i6^?CycU{+{8DG7 zWGfqOi%t1PTg|g$r-9doHldrzQ2wz<3VP1Ga@S!7YJADV0z@LuxtM!G`rXpg)n3kW z-|SLl4`Qh0wyIS9(UhuLpZ(s@8tRm+7dFFJqZe^;==Pu!%mTdDYa=y=+*4eeZ&S{3 z73Vxwxtkt4$E%ari?(8o7H?tcr!}kI8n;5nvU}=7kzPrcg+6F@)F$={_9#d~?E6^rblj!IfW)t&z4l{o%(t;H@IV(d1&Bb{ySk1t1#nDTL0@y6hepU?Eu zbta}Irf`pl4UuKcadKkdCF%lsz$=aX2x9Z^bS=Us9n*qJ*mXSkuv zGSorn1wPn%K_?|Rv!4*6ZELu~FO#ZudTwFbEIx=`9(S5u%lL*D(C&14z(SBt<#~=J z3cw9llk1{2+_b|n#(aWnXH7EA#Fv|%7)sU04RQLO5>MSg!I3(gd%};2y~A8(kAz*P zZZKIQ7w$7X!Gj=%g3r#it~}>YW1#)9{S8xR9caCeelVHM*HxsUo$*rkKw-Q7K}stQ z(Y1)V$)xZTLf=!#Y@BEzq38zpccQtnyZx*S&$sJu*$?LSrl(k5IUNXM3bOZ84$$wn z6lES1;!X3Dc5{F0pGKF_zXeCgS@4r6r z_Pj z(bgUNS2Dsp6L(7uFr?ro;uZ^gh{$j^ZW!4%U_HH&-0!&&_yfkau;@GJtZ$jWhkQ); z%gG}5A-^2aWQ}UF)sM`{USxvFk`$eO7r8CANMv{43=?f|phG}6dI|9NEGGX0Tdf_6 zB3LisbpCMWIJo7y$}ELX*^knPm2E8BY4@yg#z)l4WQ%?@<%~JS+bH+2dF(){i$6;p z1kXJulO(uqepGl(hw!evV!=V?xPth4NN-=wotB@s%wb<FZnfz zI^8sGNOT9TkZlukm>ETX^_vHdQ$O5a5Gd8v=vO$y-j4k{_kY%f_zb7fEY-}jDU8ih z!5nUAmG-~@bTJ8xaD=}QHHe$cxd*?aH#0YUIp9yr-D8RF)M!JCLNS|1AJ5(CY=_=( z`q^%%p4z5a8YRokT}-yrD#JJf8y_x2=!_A+*u8vi(0zI?yVa+eyiY%M`yXCU&DU)! zcvTQY>2j^PjYzF?xU0AFkoA)t%N}hyZuyqt(%&>uaiKyR!?=j{Y?yF7@H2ImQ~R_e zwTzn^g$Ge?#+0h>(nIXKijndz#EFtsiePAcL7}o)(LLvfdQhg$0cq-zXInc$K{1T6 z3K<;wS?7uV=kL#n#8b~c)N9bgV67Ugbh7=*SE>y7%i>ryqs_`cpyA|ma>iuu8wFa*Z~gnNgMjwZ+>O@ld74n)e3w zmc_fm;9nUXZ10gPiBHWr=z(aJ!5_O2I#C#mfAxFFwj*bFwx)UlweI8w6*`+aYywrn z^@?3&J1U^$0r3kRUhtjxqB@uxL41vR?vi8c(i{iq&@}g^+B%EqU7y zX9fdzkI%ptT;i`(^rP2yCT(ESbROLbunmj7h{#N6WL`Xudt`N295d5vqA zFg>%}K7t>RTx>qc4T^ng(6N&uh6)PCFDRGQ(t^)zDxW&<9!XrKMlqYp_E|jf@kI|! z=QMBgJ{f&w=mZAp=$&Oat0owS7+wj?@CUvwX$zv&xI~ ziOH$fe2;rzuY_!oUCdh$ui#+)NoD@7byJ@z4}^@e!Np_YX&GDdTf#SzN4Z)f+hWh!2BI|)(@dg$CTORQ!N2+Xvk%FY z9^s;KDHjB6T#7tI=*oxaaCMt97>$yK6?vdL(!b}8!}=v@oztOJHAG zdXD%;nb^_352Qx_&-@9*hSeK{-P@qX4t3DbE1cv6RDe_zw}2bYhZtV6&2?5m&o;==blKGQKRr- z)lE8uc1cB$ppjP>zvS_(&iT7JXUa~Ojctlgv(+;1qmoPynWdqj`W19lfRQ^Q&QF}Q z$ToAkM~1TTL|ODDRu5+_1oh@z^%;ZWJj=h`fipG5~OIq7kE zoT)0QkJI09Ear|iPWL*(7$X=WePm-Mqe*XhEgiB4oca&z&8w z7@2*4|8Hf0tk0Hm6|88rAxphU`N#ED^Fn<_&_i>y1@KPT4zI{OkM=;GN58`e4DpK~ zS`ia(R@LmSlEC zSgcs2F1N#teqOkPV6PQqKS9$@W=NCJ8++1Z-!ZR-^@Y zv~9PlTzm?>)T{tUHOI8KL0oz<49K5R_2>xF={E;EPfoi;XGR+iAH}nW=^Z;NB_DL* z8;{CzggM216f618RuAQK(b?6Y&Sh??&uG`t`_n$c9jLr0Z)6W({U%~661tF?p>V7| zoSx-r^KQ0E7FpKT9Fe{^#TAZ|*BX>&P8q7JqW)6%<8P|8nsZ$D)NhcTSr};O_Mx(hpS6fgH&x#%^xnGXE|srRlGGVCc{(>`rXRbnj~Re z%5`WwdpV*QzE5ZSY(+~zr!#Kp2^GEeduO_pShuLMR~LNQa8TMI*OI$LcHTKue^((` zZ{zz^UZ#9`rKY`JlYCNpln)Qz2Wyx>@5ktT;C5v@GV_}kAnL(8BOTx%~A zB7j}TW~W!~6n462#%#S>hh$IL)Ux=QXihpnmeI;E%p++H26`n&n2W{Dc9RcgCx8jDtF z?`hvyPD97xesmgq7+I|@M2@2|X$R1|7!lPGdyQLt4-;*Py0eIKEHe1uRMiNia$7(3 zXmmmKRZVYnXTcF|8*GtT1GUGRsP*vQ_+-^)WENhY)&&g_n~#Np%eL4#~i9WfNRPzDf;6Zjjp}=V6^l z!Z#mZPP&~m%g*SWdsF2d!L89{H4(1lCQ`j4Z8jX$lLfq1xWqK+0!`zYDL6tv>YV*irog%cHAKSXx*|qRX*3!x4?2Wxb)^bwYJn-$VCR(@r4p z0PQ;Ny&Q(DY+Ukw_yv6-Vi-D;^7i?Rq2%?WmC_Ztdv?lXMXt5=nTq!IgQc``j#cTn zsfsa=6UM1?4R6r~%{^U{tUolCXOe1QEgKL%8#zFW22ZR5=zmx*c~{b~T`F5rXxd20 zbMr!qH!4oLg6v(D-|ZfpRz1rSjf~YGMw#T4_K2=9u@dgZoec{`0+{aJ9%w%BJ7|#{ ztrE83vQFhYYx3lw#TN_DE0}_T)+NfVIibuk)l2(c$flWU8JV4=%{Ejez)-U=J2V~s z!rbvHKo?!)4Hu{S?`(?Rf0Tx!_@rK3b^F7d95wh}#5qEF7*^^w@O^ zX`@VTa3N8uOBEfFW=-FmI<&hs+RzClpdy07TEkBjJZ3|OrQrBXEH0u1KaV?o7mN4B zg(DOc4L#f02f7aH>mS0t$f7bCd<41bnup9mWx9jN0jx8gj)q`0@-p-%zB>5=wh4b7 zF$)hP7~kPUSMu*erP|r#uI)>;$H>~c0Z;^4SaKYCL|Q}(eG175QAk@7!{#HKNwusF zO(JEiBJ_&!ncL} z=#OX@q#M0jx)-@gVM#3UtOmm_QT}&s0?ZWy{BYi@#7v%FjpHzFD7aL2|w0&D8 zq4Brs@>4ax%sR78TV~|x%}}sD2kHn95^Az%!7sT(2^?~gxfyyDEu?;WlUN2Ax$Ctm zr7*5xi+XfkMMb^(nd@3^mB#ECZ@j0SZ>PweL;;YFyAZ!G1)14({zA2_|H` zYDy#@Wc{ppM>?gWiU*Mq@&R@&IaWD<2V@s@YqXO1qxmNDCtE?cl75rDkTc;s$uZbw zpGV|!qH1q4ab6y~WfxJcI9csW;Hu??Rw7<)wzVhLX`XO%iB-^SgeN>B(ldlC-~wWfjz7bo(M+!gyDzD{moO5(}q(Un~*p9 z+ENJZrK4TL#k5AUp`F<1UrU}t_1sMLIrJhsEYpCEXS&B{V@B#%XbkoaeDyw#dz0Ur zQjjUO-!;#WSnK>^(W+z4a0DWIjZHca$PLA_Ycc>J#LZZf{H$x{CW0bQR5I7J72%3t-x`11_svR1pjBEUU{~4WBFSZJr1} z$)_0~_=oEwOe3T1YFR5J#L_M$6Dcy}MGp{@B(nq0p^KR39v@L2Y_Gct@2MMGRs`>> z`k7M#?=2U-q43G#BI+c3C0_>hfIqt;B^AgJTS)SFB*f$&`3@=4g$2AsUa-JJip~bN zo5yeqGS1d5W3jADqOIznFg!oJhVUI1_(!v zg-OV7*a06ISwfs?!kM443$@ium|{`sK)PAk?3yS#4tp5nltGgX;;CTlTj5i13twQL2oiCH zu!($+H{n0Y9Q>O~L#`uMW)za`347d4;wi;Ur$Eggv8+*NiLAsR?gH^Pq{py)>oV3wy<;UDR=*m(Rpl@l^pWG0;T zULulqI&S<)7FyPqttVHSF6G=Jo*4a2p#-ge#0(*Z>m=}5{I<|Zwi7SqDpDa_!>)?S z#KY+4!7K6Ql*ua#9|soJjwF}4`j!}oe;r+2^9Z%|kKr`Y&-|S#!5fY5p;SCg|5>WU zrwPAP;_(iA&uBq(XN(WhVyEaeo{d;7NU4q@+ZG%xxHEl?|j``|sk&Cf0JQuJ5{mocBH0U+3y<#`nuIg2O z5CJOYT4&<_l@Qzy947m2=uCu?|%xdVTZf_ zKv8hLevBb9<61>G{lu&bq6hVZ>FI^5?OROj>Uas4$6V=Pc?$;#N3z-U?{OWct5En7kIBH9+JB%AMgR-b>UYo z2po3=v;Lr5k1|OB22W`%2!e)E4PdU+1m1zaQ}%#WphHYgas&B2IDza#e()Yc7|0JB z=5iH$Oo1P}Nz6p;WVUc!#Od29HkQn!?lapp$EX2JZ%GJoZ(hR62x?-I&cbJNO$)fEan)YlKE_+$XkFJscW{WEv>MBVX`RJ+KdhX z4zp5mm^@`%no&=7)%Qq1hy?y=gn%Dsj|D{I)988+1j_{_MJVfC)ZjSE1mu4(_|jgv zv#7IFpi=^yK(MV*zFtH>eNA^Kb{SX4PbWI*%;Elc0(Z|p3p>TEaDR`@1P2R_uwLcY z>=T$(B^KRPdRn13h@&RvwTEVd5zZsBHslH$n>L8ZHV4J^!gm__hpoZ8^Y{FUF$r_r zU4z~h$(wVm1sQ+x@0%ZHeY4#*jgYp`DGa3S3GmQ8Qe4oU5~5TGr0Lva&7qVntO`0F z(}U@bJP3B9_G3T2?gEUQQg+h9%f{pZ(;a!K)nFW_94pM$=c#n$KwWqB2hDcAP@62l zxmnQL1dCSC1U5408MOjG?6m{*CbLRI%&dBT?oi`1&3wx&!!qr1?xXGiGzot% ztc3qkH*gP;UfD&g4jq_0lUat%hWp3jENttv7{yNH^QWEx{X&{VT=%pk*$)`m!;}J5_ z>28=wrWvKWTydfwCX6FBD9`OARZ51%yd$h*g$tDOC!&M|HS6wJ@O;@+$)&) z0tOenG4^It?5D);)i3=A;VWaMUhzEh7cz+(Ocy9tGuP>H8ToWEwKHK4MN{69U4aI? z4qQp9fz$IEfdaRDl>yR++dOo?#7tDCpb~6A&UfaEp(AV|S0nq%gs{8PQ|Xb+h4?0_ z1N}B)Ci$K26F?H5sb%6d`31;wFB|$=A}vRC7Sj)*rO?`#L;l4%4WqS6cATz3Bpy5# zR-}1R4czUx{lLaz;mIUmCi|ZwVCtTS3|E8PoR#|1&ei6Bbj$6DoQbcuGWa%bq1mY! z!JIU1kdQQ?-X@+j#Ypgj?Q{} z@k_%b;ag#E+QEC}Ekkazr(L1S1ZI_8oB5jhpQR*eA1E+hjh2wfaRVOo(bkj7a69K;( zx)!;UjYB>M?xc(GAzrP)Yce|btMj|8x0SRXmJ_;2E35bdu9_vPP*`V-R3DOm)hBD0 zWtasNx|DE%t3V`?lbFlc$iOIi2!6}+DL~1hoJMC0)ppBD+g0^8VT`3*vy*Hz}qu3gA3&LG=T(u;!H9q<_gk9QKm`)r?>mgFRf5e?&&XWtnYv`3E9Uue8 z$$vfb$Pn<4eVffe7aD>s2p!CZH1dE}p`&UyIpOr9exbyNo{ezhKr^ev%qZH=(uudPWL2N4GTb31@7*6CJ;#>)-JB& z>`v25$3Fa^VV`Z6W{Ym8rAGRSZ!#67|IOYtI1?PqU%LFrx0IC64W`KZY?+T}jG}kB zUBbJF$qvHutY9J&WZIiskEsldYmQo})7rO6_VAh3`KiBIqv=RINAEX8MSP%o2`_>~ zC<2FwOqKS`B)3$onQCkGEYxPc(-r15W!KV6T>q1%plZiL`3%*2t6AAf($m~d^)fZc zz-ykzt=0{NK85>m`_aAuBN!B4<++gZBTbe?1(f=aOP4&M@f5*Rgjuz8}r_3Yr^{ONu&bg$H0%M)M zHKDM^hH68VUCnQxh^%NM0c(>V>&3ZWY%l%>`oFM9HXmQ%ucu+6*s~{4fYzo~c~wvm z7w-B5&mr487Qh#wG;1v)RrE3sMf+!BhCS%{MG@OVWwffRjN?ntUN)7Gg5_pt4@V_=VFj%6Xpm;N$t1#o(a622TZBq&3)ms5?`AREAUS26$n;Hv zbT{bSgcQDj9vwBArK#N^BWaon_5Bw(sFv=xiEW@!7@5;k{2iSmO_z==x9%1Y3=ed7fTg@2h>F8)0 z33syk8BQrGO(*rTES+J6ZbVA7@Rq+8r{S77TZEFyV?%@LCz=zfJ>l(z2-etwBkNm{}Jy;_2c#BEwlClOYvq&1oE^XQu@$YREQW2D;8q&L-k&51QIc?&507dWw9grlP&_8%X#3B;_Pk zx14`egv8A;QGGLQt#yhvAVF?g20e}P*2g2}5D(sodHT&_?hu1Kl++^f6T72ygvy;b zQFukY9v+uRYSt-dxstVOvPpXgv^v#cNr98&?-+R`FY=OZFZv?*CEo&H@B5tzA+~!Y zfpcI2bE0Gc)EB>2@Bumi&B(2SGZY?93(`NU*2W+QQ?8mzP(FT^;Vc#z>8|UGuL~Bq zY_W-Rkp7o+7dL}RAe=r~oQ__@?iGy2rfL7lxrqt#495}de&$Wc!*{h9ks7$t4Phfm@gqWXlCY&C+lSKj{anA#%*g4 zvMPCsX&%`eyIZd%&qws<#gj`=85;qH`wpP~2V&iyiN>V{aHOac?#TyHa|3@fSEIjWrCm1vV7(Go8z(Sj(f{dW^zSemcau&JjAw$VXWn1Ib1L5L zC}9H62)=NT;RRHZdryB^Wc%mo4rQ;l8HB3Ti)NO$Cioh*acp!y;TOw=9^+yeS3m=O zk8bvk7N-JN-Q0thY_qlee2r z8au~%>0jzsM@|#Q3AW(xEW_RL`%V2}XL{op&em#c%qJ_P%?xKvtznVtt}8?lpZnZDc#ZpAx&!$LA5yhR zK1}OYC7W1?0SBeQIQTZIocx+C?0&xCJSx=#b! zL@af`LYcupETM9bN~RrO_EsIFSXewtGe5h1zE(RUt*`4YG&$joJpvAh{%-z&n8J1# z3eo3*IYJoj=Yxwh#PjZ_#e9?}TCm}^_JAh3tQ!<3&n{X4x64|amk)1Gjd8U?QW89D z26TCJhIu+RE^M9tEj})AJ%5Cl;}gJaB!k=!i}@%NIbZPy?WJ~=&PFq3b%p8Z=}cqp z32bJ{R&m>27{A$i3?CEq*rXPzQ+M_2iKxI9{9LlD&tzscdCh&dxZ~Lff2&wb$W^;a zR0JbEUa*UM%K;PT4$Qp)X1dw_J`n2Tqg?v=Od$xE5NQ0n{FuB;s1^M z3+(q^N{4Iyagww9*UQ;dD%bmwQSe4=dK3~nKa1mPydWvYW_y^5oLxw z^zh)fLLPO_U&=Zu&g&3WLoIjnB>n}zwBco#4pHX9Hu4xMV0(N6@K~bdnWq{1HIEKdW*M}nnhGRo(1Ge0 z**KUl@Rz?q0?fUXThZ>|jhaT!suHz<7?gS)o{vw83`cw8$9-2}KZ%G_m&870_P&jo z3kZJ8&g_*$Rb{DkII$ydm%JTu(paH*LR=;rRbqognXK7O+)W(?Z6j_(d_&5K6TWk? z0%FUFQ>jn5|LxhI(SlpC`9;=LHmBmfWE}H4ccN@8v(&ImF@$yyuxdEpTMWVHlITDHtBohXz~x2YRA?zKgU0H7c&mqT9o|Ax^I5w zh?4v^p5k}QyXz04p2~5;cIj#Le6A@;rS)Oz!dt*$RIT?ZG@UFtRFX^=Ih$)zujX&9 zy`QllXJ7H)tVqZIY-R~->%w)Bm73k)V~RrtL4vEU3U?Egn*VV#!zMx>X{&c9v<>;< zKz{Pd4fdvnR9)$x>UZfciu?-~Wp&LvY1uEC>zcs?$$V^kp;-!v*&}>mABKMlpoEXmHE3^Z#pg+n7^LV%Er1TYZQw4 zw!bnt)dJI?xaXSry73`1pr!0e&of8>NZ7+AXKkIod24F%rbQdtr$4HhkSoc2Td~O4 zGy6~R8uGTRWge{hplI*dpD{)}=MIXkSHID7!K<|&xi+5eNLP?^Vu`YKYTBLx<)`#V zjq_BmvfvFS^=-+ey#AW=^7+O!+Iz}0l84)=OO%1gN^L`Ge>5LH8gU7Gjvn&4Pb|hS z91D^ANUC?8SIA|t4M16{m|6Z&wOe^6$DzKh&d|RRx91;-Xvhyj6tQq6{5~Zd?S`gA zL}TgL37-@AC1T={6zO60r5)epy)+l@BubK;k-6Ej7Pd#m?eaWJ9Qdf5XMC?oRXx;0 znHM#z&^JCCn$9+cT!YWh0bX)6k^Hg)$*`0St53>uD|%hVN_yvSbd^i%b13}}`C5_j z=C62aU7&icGMa{FkeaFb{&6ANOZ;EKU*S|{l;?T04Vcl4W>D3|8NF?YTt!H&U9QjN*o_ zHB{EhTE1w1_Qwt1ZGWWNQow8EtqMD1trhy58u?|_-*)fRjp|BsyXdppX}Z%voyA#E zna4pS2ITDDrM;LM)btYaOFv#41Kr39C`REh$p-s5_@TTFe-vSq3q=CR617H_fOXX7 zCA|^nzsJLs_;K{MPn2l1{YA zTHZqK;CAd>=rQ~qE`u*2?(w!sU)}-H=+R)#)Q-cu(f6Rg@pwoH565?BS0U4h?gI3J_2-nn)QfWQd#H-siJ~1k!uU-SMD?Rg+Gub_>kX*^k+lyl0@D+gA=k)zp|{aL zq~7}tK8l>*>?aS@HE(>P&zBgUNzc~sH4=gb$5vvjkgf0 zl53UR)r=shiuH?&g&)&rL;TU6)H<(xY#w=di&FMi4p%)??&b6^YOgTZ``NB4t1K`0 zhpPRi?byHSdj^ZLzqX?;HGML)koSmH!b)~}upTK9NqvvdqoCDhi}Xy<>B@DoU-{n( zmMCWD4z>Bw-phxN2NrbucBX3NApVMguD_)u3F@p1y`#5ZC7Q3G!iqMf@tlAanXaIz5K1f zX81mn=dlfK4KnKLq$fAFD&uAMD?jIy%72#KH&!Y;6kVsDtNP@fgQlrRIWJ50YUW!X zCNI?b7(Ye+2W9Dc2Hb|tV#8}8vK8cPUx<~by4BCcwx(|{hp{7BOLD`odlKFx!G6i^ z)06O7%7IWTT%(T2PQ)*04GCw6x$x<*aYQlt$@_`8-J7_LK!;>+*l0q7q?x5pP@8<6 z>kfKe@xj;;>!vCLFR^IND$QH$fc8@sz$e2w34`z&bZzK%Vifkkdp}W5z+3mhhm;>` z?jeKJ154H+N{y#83OT2Z&~HM=LfPaF)DA=H=U5lSAet}CXi2;bACB!0^}rkOr4K8QRYvtl%evfb}*v=ogs6Ux!i93!jAaB0nk{kS*lZ z^oM91xlQa=k0c!-2J99o_YT7ir2nSX8a;2W$kmSG9_P={9^mF$av)EZ<`zS@nTzN) zF&8&P5si$b)6z~Mcd6#sP;>+M6Z{B^1?#*n;R8uW9icvJDk!_CfsMYo3EC-!)}}k! zW4b+z6ABSz$Q0-&KSlOOXc|t@A1Ai3X`xs;gvo**Q&7$smhGIiO zx9VN$_W5M-BK5}HUe1;pUuU7dO!L}45|CQXQmOTarkG|*lAzoA!^vT=T?mb$5kK~G zAd0r9T^@VUCa|LNmg;G#M`5g*EpoR{)J(`x=P(L1Y&;Cc; z6knBg$sbPU%N|>9l1U0Xcbc57T90Ltz12UIbBQ0?o#}{F=r5&n*XEd9NgpD-ZosMIJHh~p7vMUc5IuIv1fbl zwK-NYv6{3=?KZW2ic{ORjc*z0Y&sJ9(T6gYV#rsU>J5{d8&xHd1<; z3dfVv)Kmsh9@~M!#Z}#ls-m8+JWrM*KPwNB`Dl5;Z_J}ps6eYWGoa^T0D zBvyfr$W7u1=&S8byaXe1yNf&X^wg;$Gb%VHj9dkh#5MUA$X7(-`*@)&8^0jBh!5gV zxI*hA{Il39SVH)*l`u*SW&5cgib;}Xk`6=>GcZL;Jfc@buOUa#{{$~3A+V}u1>V9w zvSd3xO60q@$D^!$EDLd&r7N`$C(P}k_IRz(LRE@i;~QmvC3>(wlW8K0`4V-PSWCYS zG86B?!)0eMm*;$O18kFft{WC7QX9lO?^llQWEk#eql^deQI;1yTFuE`di}uZQ-o^}h1NfPk)6)>Yi!FEFG~{Au?Yk7? z@P^j1%xm}@GnKdvH}koXvG@r#DsT?rM=zTHzkY9D?5*EvA@l5VXv(76Igt>P!yrSbJ!mNJH7kxcz!mII)oFiZ%zLYLn5o=D# z<;!jfd9o|{oA{&3srGKXpW49e;^t|0BY)UD{Wom}^V&2&S4_jhmQ*Kw7`-3E(F=(Y z!KcARim&-sXs2pcn$9Pw&*m-VR%vUjhq*@jXxhZa8{5IRnIooC8aXo&&XZKqo6)3{ zm-Iw@N=$cfl58XrZl%=O>Z#may}tN9H_uS-xysgS4 zLm%Yaq`TlFQ^LSH!W_K?M3Vmnn?Q5WscId25Pn#cz)nG`+_B7hG{fAC8HRNsztGFE z)28L(Uu0C2m_M1F(+0p|^Klm#MqY}Jq#jcR!J*VD5K-BMIZ7@qxXLUhS?3e_1bLWW zPlu3Si5=hyHPXlf6P2dq!62$Ad!a}pxtKhhS_e8s{Suuup}`}m2;eH0(Vy9B-xQi< zlN@1m2hmbK3RE#?@lY^|VGOgVYs@4?8s(s$X9ZEwbY)Tw^*=f$Dw%3bw+mWD-UKH~ zFVhPx5Av$PLrX8)EdHhZ{acIVMlL`iqzWt zb6=CgEok~;;*+p8{wi^ldl>N#;bVIQ)D!7+e8C(w4?Sqt0@=jads<+yumU^v=tu`0x=ZjV+#PHoPn^&85DmK<>_InWUSn?H?UOpue+VjSEIp2V6toiPz~Z7p%V+3?r-AuCe8N&MoI=)$&F2%S z2|2@^#OCQHvk&kNvh!>+{8?r%W*TuS(L$TZ9g#P|L8>yyB4#CD{Q^j>;lIz@LB-!#L3NREp5PTimz z1%9OFfsgrv1V-3mU(V+X<2f}~#fw%gb{pT<)QCCBnN^4AU+jCy9(p<(pC$kgb26Tx zJ~A^S=8MnB2GMl9n;z)pgogHFTYtX2t&(lU^|aPvhuLwK0>c4jshL%l(4?Tv*$58t z>8X{VI~NysghJTZ@KW*?6CSXJoK1(gzw@P@3`-ri%Jq(3#%^(vsEj#f|E7oOdp4Cq zMt8A17E>rO=G2rrYAt^!_660F%Lsc$%9-Z@1Bm}bw?-NtU(nConajza4mz_+Z#?{# zq1?&34z$~uA@2e9*|V}TsBczfGD|HoOJi=5mApKxE7_QZ{8teQy2NpcA6Fu#1h`2> zndDA(d_ildj+vcTqPb2l^^BB$1sdn>%xG%4JtD~>-UVEaeoxL35<)YGTK0?oJfa)D zBVXqwr1Kpn_fC=je%kp#xrW^Bn4$g;dS)|ge`>Z{UKz$nSD6z`hKxo04ftSU11N9+>L6<({hpwg%23{S{Vge`6ej^R`42Y`ktc2OU;D zG{+%5b9?e{P$KOImxq0f|H95A;D{4UQ}SEj3veAkZii?!?q=EHsD>Le0!!~}XU2nyT@dVm*> zn+_IC5_;Hvfe@W{oB0cXbS{AaocuNaKj6)#*e9Sm?8+{W7d863B&?8(dqc#+=1&)Gif!IH9tGTs8A9OU-65-sIQJTxMEq z2|b#A9_|MQ(ggvlL=)K}TXTDmbsQUKJ#SIt-z}l$jiwyou5e$|i&qI9rK`C?{M<}E z^McbQ|4pA@*T*~s+n9G@+0-g#R6tjX%!`9+>&XZa5$F4LR%&9;QX z0BW@5f^W9?KU|PEQ|IMRdxj{qT$HP8&MfAS!!Ip>-fI1kFcNU$o8~+!o8KD}NlMr) zOOF$PK5cJMxLf*+`&aB0d?KIa{ZG{tE_Hv@bl2IP2Xqq@eeHFI^z19v5>r*`RC5*l zDsC6Q3G)yCz}_G>24*u|!T+p}3zmtQm3{f?>a)aC??5epR=ED?M`z$9S`PT~bc>jpyfl3Qe$_L|@XWNz)y*_n zv&-=pWR}sk4)E(NlVvEHlG0fS#~#O?<1AvoESb4NwGW(5PXe{((tMvuM>Ad}l!@(e zFM?x@)y~OCfclhuG&)^Xve=y*3jPCk1x?;s zvn0*H+*aIPRv&praWPH=yxC_ zu#0HW=)_O*-X-@@Bi$#-uZYgMnHr}@?Hj0U<#Fpqs#J2*e1p1_F+k`B+KEXR7=*_p zG4DZ1SURl+F9J4*4e>c5=W---n)vJdF^#pvDe?zoqSObMi~%oTZrC zo_$0(!wpEA!FOT3304+n+eEjZ-!ksdi9o}&@%NB_XdA8Y46`@JAXgt-TjPI@_SW9& zAGU6m!7{03pm|KzZqZOMHFYUh%`b}I%r@cTqJqTiu2Fnv|HC-^dyskbYS71h&fO9n z<~-=EHvszvhfKB2y3O8BI@Y|;x-&DMe{D%lnaqtgw~E`sIQR{bX>>y_F8B%MWScMD zL{6ZOlb76F-cR_0Q|FzfPqZhx?aDH1vh#HA0CT3JbB2$1+2WJ^*?%poW3MoSg+~zu zz{d{`_NRukx}~wi1Nttrw!EQiFEO}ef?_@NrckC@rc21*p{Z0h^S04dOX6Ix;XwL% z`v=pT#Gm5sH!fz8`ESe;mc;EMUIy%9bRdr2QTjuL;`53}X$C^w3$nDQwa&bQdbMJf z=YPh(bLyORrmN}G?PK9JiG<}e>WjW77;$%4A8s`130O`40)^mU$$Q;QETU+P-eVf? z(;1#uspG|; zjDOI?!dXxwl2XXDzPC^4bH4L8)466XJeCE8!-& zD(WP6f`UR}W)Fx80N^k^mDo}=TZ9Bf_zH1mQX zjs$vK##UQfdTjCm^EG-xTsgmso*!AuZl+^H2zo26^e+(EXdLD$c*Z?7jmZz@yxQKL zt=t^>J=YBOPR>*N85T`HYvtMANpHmS`jOb@{4@rR?97g5CI|nbr_yizyHerw3RGK= zZOJ$G%sXqouW9d@XdW-S>D(t6vY*>S`CVyutu6URNgae+TyX4Q?jTziA!By4ErL~I z!YXs=O7b~<5DD`AcFfd&&dat>QayE_u}zZhaE4p^WSzC$wX{gRXz>Z36FUnT{PURU z+!F3)cs{d`y(*qXBoGH=*K;&aMN z%V^7igcmhd*uF`MF`I&DsF3uR_A)%(pCMO49iodkEo)4YX8|hKSulD=H;qBujuq|L=5LI>|J*ya12Vx=NJzG37sn zfyCroYyKRfE^DM`9@!Z7DIuK;w)T;Q$*<5NyJ&VbYe%GL#VV_KT^;6nI5?j_wdIEc~G zN&bC67kZfiDx1K3S060y%$$}-`hGCWa`tjD!ip% zgwGNi?hk{m(VyuHOY5jAdY|4~S|iL=-6}Hk2W3j1nqQQC%d6#%q&r-ITbMk|zJomz z_s80rO^n(lXqX>iwd^~lIw+22nVCyxQOOFkdidXK85$jb+_2KbCHku&s=+Pcf#1jz{7NRX4#U5k!nO8I zxt{zIn=7laC(oLjS}!sOgA%iBC(L7FBP>0IK9M7M3*Rzy8ry|y6gW?$hJIV(BW+B> zV|^6qSwXuHRVT@$4IR}NWQ%LGTBYK9QFDDe^-g=B;h=6dRcA6APUzjR2I?=7p$vj0 z_P{OJmXP0M74h{*Uzt>XcSofnQPpr=qAFQ4d)ZaZSlzG!R@cJN-TGet#Q2iLjhCP! zx-#f8g5_*MK4TLTHsP)DQz4(o@nq_uuDL#a()N1ULBozUd5S-#AypIAeo(ZpwYC*( zvDE6@Beg`DaS(c3yA_&+UCC*RDDbBV!I+P@9`cGfO&&hbI_DL7eCsi302a4uuY51| zse)BL!!>z8(+PiTUaXr+q~c=@GUB}Eps9+SkR1;{BlF|mV}r@NA&&_e)puX3Y!%aO z^JB>u`fc4$nU_9M_DFG?c6pnq|DhxJk6H=c8EdKE3a+ZJnwkMi)-G5I#>JPSd#NiS zcSJM0VNa8+$<|pLpXEHZ#Md^Lt~P5+|5CIS1owQEk}u-oG>5qjNSVGrdqlO$NHPCq zUW4A!7veskf#70DD!zs~x2s#`c+Z0MpL3eK%9ppy{p5IA+*>ZUhdY(ZTh{GNt!B1m z5Zp<(O7JREjBWS^8P_0`&5rAXT&GV5SK%|ku=>v#WkrM6e#(~nu2+x9jmRSlqhwb+ z6_NPhvf=U&FfA!XQ zeOVPOc~$h(XOn4tFKvE`&)z2#s?KqJGmg{VakP;=F)X%DNV{p;D5S-(NC(yz6oX;l z*p`PG{Z}Yf{>pm2>|BMvT0 z1oQQ10p80g0{_E?(=9eV%=q7$4|P4Vp4P1><0ZRl0z5BeBP$yURz+#aMtq9uR{^dY zsZICBJ{de&|KX}ydPX^*s0lv4ukq;M`C9XTl@(^f?Wxrs5a#2JrmVk#WvX|N(SL;fILhcmrXug5X{bi^8GeJ6XMRVw3Jvh*h_F3i$|1C?W*t{2FpAL-wK zFlfE;449!D3e|(289R_I)Tg)rteiR?vWQ@)qOB>?`Q~w}8^~pXx$>@ZIe*hvqBirx zZF$-p4gv{!fA+8`)Hs!GrDzU~W&G3cAd_i2wif#z*b#h+=tjNTjOPYAZ>;PtD|Eo+ zpA?_%?eh+)URrlsE@-Y=vdO)=W9E*An}+@TZrN$mZZ19T8GL|k8gmWZLazvJhc5?n zHcBM*zVzCmvW9sXW&bGRJPF=5syJ7Yd7LK9kwDDU1=v#czz}4~kghZZ35qm7IGlrH zdZ3FKE+`A10-WnLl91Bl%NI+FivKA|kiRRy-O}{8TCCM3l#iX0s>MJM;gb zZMEw?Cp7u`HBO(Tg>jiplDr2hG0%&NL!8{ez(9et2lTnTf z*oe4p#4-G9@O!Eyd1y1x4bq-p>C@-v7gW4AY&IPCH8Wl?^|8g6-a-gShI_%sOjaZo z?WUNER%7YuKd|3;ee5N+7K%4h2>$ge!T;RIT5d1ms% z3{*EL03T{p!Ik)Jc{1up*wf=Mh@2H0huT+lsaB<>Is-M) z^G<(`3OBRHL6n-fVPYu3kN~}*DrGDpqt>P!LVr@*W1HYGwKI4d@s8TL_J(Q}Kfh+Q z`Wbhs_>{Jc3w2M?w`4EyPYoJ29M3S`W6tZ_LUBy6R0Thz52wBpr&68Bhjy*$7WW``AuMH&rZz$S znB~#sB13}=Vu|J;dQ~&UUT=LRp<+Fa3#MseTubd2#eLPoY|tm#E+cZoU28v0rb%L! zNk&88`JE|45ii#~>JiqBi444p7lO*V!HS5YW96GvJqnO~pnjP@%r-`A@LZw$>At#1 z=%)d9j8Xq>`pfz#=P#(pT%J4{8O6s(^+C6?JpzOAap2yHDT;q8ww9h#zAU}ytyS+X zdTo(vXBRZ5G`d}R{Y*KAsqQ(-a^pTnX4Xb%u~kU?0bdr@MovWI*_#1pvFmh?nqWnr z<<628%8Sc7d77y`z7|gizM>WdaRRdv3DLuDDGQYV$ak(3E@&*J7aVFr=eOth^U78 zrY|BBku|Z4sIGXg;6my;S+dpwD`c-~CL&#xZAwNUN7eJ(=g~OrEMY79LO+LCf;o&6 z4Sx6xXp$@l|Ah=ot0Yci!(#4`r9_)xm{O7NS06LI(DqyAfOhF>iYj5JVS+0QnQtuR zrN|ek2|fg^hcoppF$Vcp3h+d1Oj=`N9$p=Tk%Nh^!AVprYW}K5MivUJyk#=MV+tEW zYNWU02|O41z_mfzpeNC{$WyFX`y2fiSLCk5_7byFv+=!T=a_!PcJgZQd~!YIshe(C zNTgScF-8!ZeQHxS`IlV}1(N~HPIxToKn^1P$kUoHXb-BDl z(uix9y#wvVjz}Jc&0`itDe+i3KBys)4dlzVYEQX)6y4Lea&C4<7{VP(gfe5A?GpJv zlf;^2^g%}R3*`*hBEVUjk(K;kNzv$8wlH!x_J|o2n1a6t8!I1alYAEnhv>@lZaFvT zA9)_~?F?UBckoT7hK|dIX;6FH1w})6u;pTADKcNUoS2JBxa$$WFr2v;Z~-@iR~0R^ z14@;?XkBK}EBkZ(+Jam*#jqn!bOIT7djfQOOvjx2WR=im`{axm_>HA5VIJCAG&c6e z`mw%%Xz^ZkK-pAntEx|VD|K_r@7ogfSjm6%GXp5xf^0Vu`NdkTiS+=f0&+RE>9gP( zTXg(#t{E% z8__3-kNRdv2XdIPsfHzUpbnCJax>B;rJibz4UOJTWfKE}TTo}I-Zh=E9U8L4i+9nr z_9o&ceXZp(zTemrI0!#e9`u`tf}5*F!8gL@oF_YAjZ^xNIPQ#&B;Sz@gPu|{O1eyr z=9uOeeMcWcv)vc540ygd7dwQEqvqkg(1E6_I43qAi}5db;~YOii+@fw6W@rpQQt^F zUJg1=wWYRJ?nkC#UkbC3)40SHgQns4gqG+f^#YF^ZGvWM8mSxxSwzLx)iXVX647WfHGI<_JnTFwEq6CI3uQ3pNRAji7WZ4@xJ z3cSo(ga?AdNz?Fepd_jlQ3>QhW5@+yW?49tWA2gP3jRkx?Csze{B`CXV&W%Y-H{)h zOWz5#vCrfSuz~E{%$cGUs4y{I^i8~s3>EVmlLJ{|KL91T>9V8Pn*e$3Ix7Q5*n(*s zekyjDb|YHzEA2(Jg|J&T7+t~JGZteZTtY%I-jE#;Q6tXdz63lY7J>ss2Tf_-ukP(o ze@~G4GxW$+1De9H;}sl_G_z08IFSk~p8E%#VLq8Y4}BtZjIYNExTNrx_zY%cKni{h zd@J~6>Q*$<`O9>>pp&o=TAufkl))1{TTIvBT`pYx4-({9DA|fUv2{pmkJ`=W;wrEy zyd%6bc8d-7e=2^X|M}(^Kb9*Tsiuu3d$?1OvZx*L7xc{+X|%vbZ@Kav($w`JrvNE+ zj7e>Q&a~#lt`&_iRbh#knSJ3;;sfcZ3XGLxPbt9J_L7Uvv&;?|!yjX!lr4xq^gDHt z@g`lW>#e*)2OBnJ^K?6CR`ODC1Gy3PFW83X1bIL?^!;wx}L4YQ5|3DPNPBUp+}h?0OQ#M~eS7*B2P(>`DUbq?)GSE=iMC5}W)Ci&!3>A}IWnhxVNNfUg zq+><;nSc5Q;ui={h$Sb0_=t<70wf2PlC>bs*9JH6Z5%p$BUj50!C!N8iLD~3*LN6Q?6-Kda3tj>?%*S1zv2I~&%(|UO&G}k z9x<72>u!p*_YE_z$D;BgKn+&np%6W`!BwhrU^^T;Q;xhnJftqS z7jO@?bGiLoi0(B}#%?x1rrm5Olb_nlFi?JO0sR9Roj#XF(JKkfX+PW^Q3Z<0ae-zq zox0>*#Dy8`wlVBE<8D^U2ASUD>C6GRwXuR3iwsc}&|A@Y;;eZ#mX+oK*Kl_{4-n#5 zL^v2iz7JGWH$hkTRdyy~v|M5SMaMA%nRxUgHj%!Gl^g7I9KJwV4PN7~a-IPj;Y)o6 zW{~sZ+fd)hPZ2Gt-PH2HIEn_5u1F@Bj5H_HZ^;T!PrJ!i$Q`;DHBcW2mQyhb3e2Z0 z*-xq0)SlE1)N1OT=+4lAA>kY~3nT=*6$w-x$4xOq@;CpSp2WPN7J}!@DVP8xl|*(7Qd$7S9iAF$_=L&-X}SIlS9z$n5Nlk@5A{-;HAQv>TxkSm_U ze}jH@0(Vf?Y)a!B%4l7n&ZpX1dP=*JyM?zIS4cH~JSmr)%vHpU6s^gqnG>^f|GJjuon@(*>JBYl_a_g&d@&cu(q=l4srfm0ie0=dPUNWMljKwC%)Z z>zV{Dp*Optl8CvyK4c-$mCar1#6QuvbGfrg&P4Oy_5-=A!4(@O{~tQXIzgGQe_?*D z@l#F}>UCJoI^Jq*mijNJhVtW^uo>w1@Fz?T{$Ic^8Wy|UN%qaM3;Y9{S#guvVNF%N zLdIFfXrAg`nH%XIDPs5shWFWTxYJPU)Di44WNaMAoW`=l3+abMe!x?@1$Ers!A7WK zxjL&v^Nn;`!gUpJfAbW>IPDjqxAB-9@SUNo>?vGlq-~0i9fY2UYs1XIz2QN0HW?al z2~>hs*1gsg{e3pw5^of3I_4--G~_2Ng;gi3q#;Ju|@Po zVrTeYK(wL<90D_dzeQn*gfp1I=450dK0=V7t&H#a9Q304JC})-NcXWAo}RgxSw@UZ zPNeS=e`5bfk039Fod7hoHeeHVPn_JgG0X8g;Ecc#0q8BhlGtU~%B>{3tIo6QNmK47 z<~(^Z<1o{R8k6Ls11NWlQ(Wzz!bD38hz$s(WMD79Q>dY5Q80g+UWD-67dlKY=l-G> zDNEQc;w&bgnN1H*KTI3x-if=x5xP_K9nhC<9lDL8>4yG?$(P^*H&qvW+EwtG`IYbDm^|7c(J`X4(j)*(5!O-q0NLhGa#7OO}Qy=RkX*KE5ZF&Zk8B z$U|I<;Ctjarhci4SWoLf6YgjpkG5w&dTtsdY+v^@m6w_19TSIgQGW(m?8RsT_5&A)AW1es|s5FdVClf6LmeMz=dal5;Ti%Pj;clnwgeaWZ z>bCj{`!21Ys*SCK?nmxO3u62$!z8?c1|{Y41|%fLz_q}vVbhq6#KC|Ww1oP_YTX6u zDa2vtcg;mO(~+W6>(<(i8d@mFTSptKB`WheC@}qsFbQdt_=x)tJsH!AHR9EwKbbM4 zHefXP31%^iT(~|CuXemM6hp!GY~u~>TI+SFn-hjmcX1++4&&_K;W*P=8-~o)Y_ohr2g#*o6V@#Ime3J8_zF&i3UAAol=cCc>7Yt=r>6k?bxLD)~M z$$BjImq(>ma=ppT30;}@)QqUD^fl^aXm@Y|#QN{24uhxSUUrhQi=={VYQJ7|u27#; z5_3N2CY>P+1XDBbaOXjM%4XIL(D-0R0&1c<(qZ6ih#f2hpZsS~3+S#yioG{m13k1J zWL3JYmTY#WGF&*yrIzCvLX97S7F|ec9V@1RwaI5dh)Ac z?tmX$dH7fAC7Ty?oBYUFmNq2Y&^E+vJ?J(Vrdo1b-P9w@Z5&6WANl3>)U03JRGVMw zOXjtuDzP)Y#k?@4Bgo?~gwLdSE-fgGEMUGb*-Y%CR}qg2ze&!(`+d7)E%ZDokB0cTuqdA);*njIsr{-9n87t#InIFRgqssY@XnklCPPCQy*D;f*SNQFMt8yMX zm%mI2>)U!YYMJt_J4hQXZRH%NpObmpKE${<<(hQ}loU^xCm}-AEItZ57;;8r-F)}2 zr1yhS*fZZ&^#aqQJeTI7Zlp(}1Im7`L__!7P)De7edcsq43v-(YLO#<$F~uj=%dKT z;_5maQq24&%lwP!P@q5`<=5)I8J~L{27m1|H)1SP%ynu^4<)=^4gZs2vGNF(TyI{B zUXRP?ALEN7?V=s*K!}6^)MS4zcn0c_8+ofus|{~GrOd2{VEx9RV2Gfo@?r#TXa2r11J&301@463Qf2-#?k6_1T zx%RVoPWCzLUBWNzkEJb9my{%gl7X=(w}$){v6t;Z)dgRtogl$~I_N~Vfev}zlMQqi z-3`f1)hK62@{}~uKA8GD>#TJa6)RFB4Ainjjz2)Xia9EJ7Uo2ZWIUiKIG%0=UMxLG z70^>mWuEEuZtXr-1ifGRKZlY&oSS7c)0;EzSys{&sS|~#v^`-TKaIv?db1TY9A3wC zq~V~)z)P!^hEoISR3qzN&L?U%I1liCih=g~+_ zujm|hAhR;OHC@I82jz$ftLIBZ$2QFvGTp;0-_$PW5=)T0v0ZAe%?Y(~!lU%N<`u%A z=1nK`^9g6vA~HGV1YlY3F? zF3FTVQR)io6`!Pyd=1onvrc#;w1H_eT$}Y}i5DCLjK5;LSgT=Egx)+EYaCp|T_!yK z9T*GfZ5&(rMlo7*rFgFLkK$;-GWDt4guGwcx~yTI3{ji4(RszlCnnhwAfGrXev9mn z7$Ov4UxQ8DB65^}d!{`|G-Q8^n-E} z-q^~JAu;im`RJeU+oHYhVQ>h$pFHBBF2v`D=kpTcRPa!CE*0+Ih8A6O;+^6NlbZ*ThaDi^3 z-Cy_siBmGZ479J*>3t(w{1V-ItSUX**$j_Mo?+Wc^o^fwL5U~Pjf6OId)PxRgc5=_ zF#aHU=^>CrkJY>`>?!iSYw~M|KDkxii9}hJ)^(U{okly7NMn-3)|q?}hnt^LsZlBX zDe7KWCVLoU1kDslJNK7{QV-}@^^1Z;>b-nZ-W$+MvfpC?OEODb{Xll=Ci_7^C$6y4 zV1?*eNCKxKPw)%D&(JsQJbGN||az+bZXV`xEmlW4F`E z)Tdmxk7I;{86g*{)l|NxkXwLH?i(4M>>B^_Q+O+cT?`RJk;4WdFH{n5A>)|ZhhNbT0v}*& zK*zP6QpOaItR9&jQE;+|&obvHIuA)O&sT1yOyjbn<;qaU2<=48DC-$Xy}p52m!dYF z<$Ff`2RoU=fiYM($X?wh`DdlRYFv8nGIfD3E4(<<{#-J*AcMhWqw`YXKgt%O3vs9V zp+ldOq}yNx$y1Gx;EH?(&tS^~ZexwXrj>suKV4B>k(l;t+3)-ZSpzG?8%jy5lFi_| z^kZQMs6cT_bi}CDRj!ZOV|6)pNzw@82=l|pzR*W5Ex>^81s}GqRy9qNuP#+XnR!*0 zG#_*B797(_rH}2O^v@K(=o3buz7Oq$M(h4njzav5_Kb4$2OJT53V({e6{in(h`h~5 z<*y_uE3YY6NH154oi;_5Z=SZhYM0HSJFo2y%!VR;oT=8NHdZNu;S}gp`X^M1{D_s} zQtVIgKvF~e+IUsgP_?M8jr@zIb-7jfS^FsOfcmE1Z5^jQVq8GQ=(n408>@`lVMGo= z2hrx~5y(BPB(@#)k!TX!i)=-vu78?4$2efcRGGpwzVw1(ITYd@soDa^nvZD?BNDQU z?h2Y=uo*sMNwQL?FTOOb9*H9+#O%TtvSDy%Vhy=(?T?&I*!>!nv^gGBGF&dj_qtar z%ZLF&j`|E?!SCuC5D)c)VH!DIddx(VblM>J26-Sxj`bwp2Dc+3sLpGeWoI$QWp>F! zW=2t*OvyBMep2kFk8|hM&1gHO(gxE@byM`^v|sK{<7cowH4aVy!O@q|TIx~IGW;2( zTQxc>!1A(kd(JzHUjZYnF?V%LP&5~2vBOoFd=Bzjvxl>3?&y24Yb1q6m^qYUgznMj zqr0J_Mbgf9+(=!h8=6_@o?U(=XS(x1{>j|ej+r)}JlS@LUamZCT?7l73FedP3SFhJ zG{<6W#+^v6fmG~*C@FG~-W8aNw*kXe%*gCiAS?6B-kBef*FIP7oox9ed*tp5dMIO^ zO`%@u&omG*@_J*&SV#Ih~X`u}a^h;v9QHYFevO9cCtWn}EE z4lYW_j;d_tDvwQ+V-0FdR69`lG_HWM0BeeS4e-G!%at&&585SZ}l^S23f4TAoHSI zjiqUu@R_hGrjXc=^$OZUEhNsbp*69Rh1GMk8>QQd{q=1XGu%xL$toBB+qg;d8n1!= z){oaKV9v-&_o8i}eQ9m6wP@p*$>MylPtZE@Iyqt029;fTu*#|Fs&*9a)UMX%It%qf z_5HZ5hC0I?j5dukS#(C|1UxnOBO*ucrs}c&m@E1={x8ujsFKu^-nuJ_Cx*!t|EZQ4 z6AC(uGhvIPtL`soB^#$-1D`=djFHGgtpaL}{><$QpTPf0xs8?(U8Bu-0{K^vm+VZw ztZgippx*LcN`I_ZKBb<3t+g%Jd>1_cB0mA&jSMybVwy&73M6()BH`iW(3B9gC+Ufv zhJ7Gk2Jyr`YUc9(vfEA5lacSm{cTsE3BOLgsY=b2Ivy5oljYf0xWW}jZCU&w)lutE z!l*f5es1`!s~6TNb{aPD@~j!AHQdm|r|@d#e&kZrL+1rN5{V*nDg?=+y#2mUvi6?$ zj@63!?*803)e>h2_DADy$MiM2CDt`^U|4LvnK|6FP-vdmSiEZ)ACZraWRe2X@fN^R zwok8KA3mYkR1m4ACx((WgJHlHa0{fr83(KLK)HXD&O>bGg2+Gdtn12l(B$;II0Y8!rWJ-~h z%HGHk6sUh_J75ys1IbHVV%(H+n!sU4w3>8dOM)V)IWKBYM?TWDvR{yBLMAbIGl+ zYxwV|bYcYAE~tx0PUu|ORx=O@E;z1rBfp%d^^MRCyxS0uMG=L@P1tW^4AdI0P%eaJ z#O$oAVmJ3v5`)E(nNgkbBjl5yCgdP0y?n0f3pLEwN<&ji9Q}12DVf;bq$w63Y&}r&K<|8DKI+u7Hy+WOfBGWS)M9P@9T_5Saw#5^FDT5 zRH&W8|0i2Ye8k<#Nyei@Gfg|?UV_(s&W<3v7*8hsBDcV+qvEL;OdI$C_>l|B|3>CY z{^#q9oR@kXFq)xg#o_2DRU&=>6Excl^>{yhbH!D00-l}aBc#x-q-?Su+CB0oc>_-j zyh4?bWu+=;wc>hSA)KMwZf}G1(NwYHk&8MxR*aerGxXcAxu&M_Uw8xPs>ty;iqs`u zBl0j1SxsgTGXr;1lgPKlXG~*tjJE~UPJhXo54|&v6tm}tOhz2 z4jPd;03U!ACk7I&@kf!t#7DAAU^RK2npHH+a0l+`2FB~~Q_C>Z6=Vl+LI+R+nGDxq zi?t<40sfb4IBLO9WymmrU=mK@c5-TDBf>^L2(*z5#TRJw`-pPqEyE?^k>E4F7A;Jj zO)p6;bQ5|^)@#z>N91Q|SL6vbKAl59QCSIH@V_W;gpp`3_Hp`??Ws%or*%iz#*SI~ zFU&#imvIEc5MI-CW`X!rE?}CgyTd{BQOPGHhNjb;sGOb{AB^$fVfbm>4Y&Xc;Q|Bl zn(6+wB-%>#OGL&bW*~%H_()@s(A7A|w2qHZzJLyLww$N%MfPy&D&!6GIW8Y-!wd=^ zjd!P)1T-MJ0jVcOD>(D5e!BgRNcxxlyS+1d)Y#2>R)5hn&LSx4p#bx&?2B+3|21Vh z0=T-^BWN`n9cIQhG7bC%d;?hRs?rY4+hUH@WqZDYBz>)W7BbUt(241?jhF3v<=LiJ z)~{KWa1+bKBAR!sw zRpPj0p3_3SlU55hvWKEQHIZDXN`d>48qHSCezIEMLTV+i7-i|-slTC<34n@2$44Bd z>Tou2KJ|yZk{^OER#+T8@O+hwpNK!u#E}h%k-8C3Yr6D^;~snfdI0HJ z@`*vB`Ev+S&elUulg+TcSSsmug|OS?L(4YYNJZ+J;yZgZ@>}9$w6j_Vz=N@%^8+_Bw`F6Amdt^S z1;j39S>jeQjd4deAm1^wLk8%TcY;s>S9!KmT@aaPBy2?M2~|LCbh*&Qq{e3Q0rhX} zAy<-o%>tM&vlKJ)Y%hqj8JSS|qBq<0#1n@mzfOR=U^CUBk44 zOr?9#%kYrmYc4H(w(z-i8lowvtNRaa5^Aluk8TMJ$PQsjUyoEJ-oeu@ei(j`Z!JkB z^4I}I9r%YTw_m~|S}A*$Z*2(28*-aXA3%{CZmqViXTLc(-4Aw^D_j17)j-3u>ayWz z>!clQE&OTpeC7sqs$eyjqc?lMavh9knFZ`6%Xst!TWn)o1e@fzU^&8ea`n}XV;;E$ zc|5ZlevwHt^RTQW4MPxnqB-#lbZy>eHIwd3ps9E=cHk&b3rKItUJeD{0(4 z4*+N@T;Ehf8xV(P3#~&R%G%IYJTs$^Mu{7VSLhHmCt60Ypf>~*lmn{EeWnn&C2@n| zkRA|2dBx|3jS6C2O&zI)cn9@sYBSL~52P-V%`$pWU#Z54Wpq#Kr`Q>crr-LWkZO^c zC;pD(dK@N~6HVNIlDmin_G#oPvY)X5`HbACs-c=vV{?C;maLpm$3@ zQP-JKV)6~4A*MUAnO=ppA)e6JT~EkH^k-WgvOm*C|AdTX1}kgCy*D%G3b~0%OS>w% z!wd;tsyE}1+C<}$OL&SO5r$Hm@L&8>(_EFj>(#m?`f0OIk?vgL$BW5daz=!w`l99N{-xzk_>wPJJ4ZrKnH*X`Fd5&w( z5YvSkMOOk6MrDs9_VB7y@eaq$jW?6i+5M7y5@(*XPq1ADGw}1+ul(-f9PrFg9Y=c{ z2%I2JIk8p6 zZNVeh^Fq${5^taX(3p--4oTEY#nq~0J@JkHCYf6NlQ%y3s`#l&<3f_k^NVswpYeu)`tY$?cU)m97C2=!h852*bq<=H@=*Jw#N4vWdN4e|J z6gZhv!!n?feS^T3(JX`(>2S6cwp#Il*^2Ma>BOjr8)>;rck)Yuj{cVplWwKgh+T*Q zTqNqlo$OO=Hq?Uk;Po61wmZJl-0A<5U|KN=Mz&M5W}1@+vt{%d%A9(hE~7ig|DaX$ zL8+V`$EfH#teU)m4PrY`qrfA~Nvgm;j(b#5<@5j79Q<+QTfP%*+~YJaUbZbNyWTOlPj0tsQ-m{baaDyV-x#Fg=3ZD4R{a zXMLHssC}$DWja75% zP-+wZG*3>s`0UKK)Ew??@*AorH#N>o)nZpl+KEZnrub^QAh-q!rxOBafZB9x|3~wB z>Zrewu7Yy=x+zuEc<+#01y$QKC1VhIU6`EQncT%sh&@j(;YLU8C;jYrES4TpcpQ8~ zy(~cO*Qs^+<4tp^oZw^aJgRx1Tmg_5{IhZda-sKgx{P#td`U_&Tj(4+hm>$zqCj#I zo6E!&^jCkvK8J4V^4+h3MaIDnE)Z`%Y0mJsx0$q8z5fC|6>iT~*OTo1!a`_UT35ad zElg<7%_X$bacm_WMUM`hG;~E@i=F-q@MFMYnP*?*pJlsZVtqY;cG}L~dM>?aa)0kW znLUYrgN#ev&V9wU#NXnYlf9)Q*l@ap3JVIB5y;Je!PeUK-k)UuWZUMO23$98^^SDy z)o47!-5cc7g>i6I)_y(`-I|)jDe<%Mm27~#E*&B=J#G-s0tpT&yw5KIE;;}8&2eQ~ ztG%`0w^lZkStk6*s;0jSacLcB649E$2xVH<|Jo~P215Zq!%&#JrCT-_<@?BhG zRz*({SBPVH*W3Jlu+k0(_ab>_Ngy4~&^__}z#>)qy&|zvmhDj!-LiWNBgn>SEBF@V zx1^cuH|lxp7Ul_kRgy#BVfH$ohGvWRY)jxJUS*o(pGTx=ANXby@02GzKS?eR7LHOK zv(orQ)VkCk97RVY?q|h$!`SJJnlVOwq}9w@V1E#$Dy><8q2kSSs_zjk(R}bOqWy}8 zo?QA^?rPC%QzvsF--el-nk%OJiNsl=*XCf1SQBJEMU{!9t8R{}z&UP=CD!ldh8w2% zBDo>zU*3jXd-)IHEc-L(5?{qW%(%f7uqRWtvoyOoVHZQN3t|l7gq1UD1~r$BvdFS|Usv3D3`Z~+H^Jlzy*&Vnwo`vZ%*tx>3+k+B+iMue+*tVLN+4-lb5D zvsc!^z+3Q8TDtEYj3w3a*28ATT7+(7T}ewWn%?QSInQKFu{ z`%0y%pet}PFDldpY>^c&-a2wpYkX6XV~M*xYJ6gB6kkj(m$YXu(>0FfbEjGsTl$m^ zw>b@DakwKw6DSxHKf?7Z_joYE{W0^AUk|@aUEwXo8Ya4gv&6vI{(N(4vZO1k zWG2~1ly$Q|GRKv415o{zqQS1_>T?C7!BvW0Aqg}ocVxf{KhD(ns?fsJ>7M6!T;gzH zBH1>!0ar+!m-J@7F_2AK+R!!7vDdtX{1Ee;>q@6e28i^B%~n-O6G$8oD;fX?bhQf-k-4frLvD1bY;9l- zwj%q1?-jl&W4d=Hu{$MGSVx{r2=M)=t1*k%#q@)yS4@BAp(Vd~7+P)EB&HU4ZQJ}1 z9-~YO{!KK=JL~^O9L#R-ixO3oUe88qO!7gYEA=^{1Gk9Y9y6Zp#u%ak^k%lTStwGF zO+;kyJd)Ck4qYQZDTrVzN|{&NpGuv{%Jgob$EV%#^rn|3kKz|FZR6K*-NgFIZIQI| zENT=T!A>?67yiXe(Ot-Y&FIt#q0vl{d}YAN{FOVxcYwK&Ip5ohd7U;yC}$fdmGd3h zLGeS`DmFUWEgC?wBcD)@Sfz1EVLRTgy_uiNr>W9{%lN6Xt^Nmmn;eO6822ROfTx(- zCOQt9aZKV9u7s1sh1j87SafarANF+Qa;lK!3`+}WZ;|G8=(M+w5({?s?9F@VS9l__ zS??y{d`1ILTOl!J7hlL}XJ^dWWwPr!BUF#hy%txUiuD_K4as#@VejXRlBe4loB(Y=j zP0cEG_^Lj-_qw`^b{HcJpGv-&7n+y*-PRwrTxzXD@A%>tiDRz$rWbAx)LwB3iA3h6 zBX}e1acl;8gB-T;vPx@GuAHs;VZkeE>lfJ87u!su9P52uEZ3adNYsvi%fMRB0Z@^t zGx!uSDJH;wW6|k@up<0ttc2)vbb1^yIx+p)k7FiwP{!5ob6?SaE;*nW zE6$u>R|n~Y&<@>ZTBKYU|Dm<$Cd*^m;MivWM(g!%S8F;=mI;leC#JPT=g=)OM{-9w|KkQrrKT# zceFZZ7rs1qzgxm3rBowg=OX$6&d{5d_s#BA%9l^eTUq>9QKfQp;Z5%qO=&@0N}~TS zG|*jWo)}25Hn9;tSd#^8_bko113u%ArZhpCvd^P6_#md~vf)`5%6;>*@>1q^FF31s zQudeU56!HSNMfVDvhXh0!SpQOZQgDh6l|^T3&?ycvyXr!p0UZ};cD)f^c!}Y$ypMa zwR)*xZh2mhMTY#5ipp{WzexRao&k^2ryx^iuqb3Rq9e?PqdqTi1Qv@-}Rc)o5-`#&X4jZNL zKd$@o#aI`(e%fk6hW?CsN^K#2t?gs@M?Ga(6VqW`A7zY7W=nKC=_3yV*yhZ~7MVxE$B$QxBTn!g?U#FYJL z=ZMl8<2KjE;1Ej}cV}_dhK2HxeU7*AD@P-j3YF?!yB}i<^By7~{yKFJ_Jiyi^Nhr) z$tz&h4>*0{N$o0RaEaQGh1T=$HN8SFGD%hfTLv$3G{=?pFy}I&k8Y4Vg1D5I1Aiuw zR2No4wG>zALzP#G*mLsmf-jm}@=MVg{S0cTSPAWOwN zxRt6AlhIDPNy;0n8{Ig@OH|XfmL69`2#x2DR!jM^f|t5%zNZH@4&weM6U-mD9Pp`a zGyBo99 zawu9kH~9Bt#<~e_ox}h#osX9e$KEkpS5C8!$$eI_#K9^grP07;^{GIWYmB}vI}Kc9 zvLYPR+q%O360UaqtGkP$V4b`xcm^Dv8YIVHV`6B!137cK%UWBVTK>gWuIo^I!ZFV9 z&L06>H7{Ulxe!|nY;+HIys-U*{&N-Ux*(UJetFZeOfv);L!O4`ojTOGzJ)0(EjnxU%7pb%5J|*40$P9?Y6M z1l@w)OIeIxAj4zK5Do@AEB3RDB7Hecch}Bwj#otJIuR+Q(+v!`C`84{Dxsk;Mgin5*xQhXKVZg}wN`g5iwJ48+e(Z=Lql>=DM6Ud7wmRVw9X9uYdvn?7OboD zIs5nr$eVyA-pLtTpr1l~;$$?QQ%ZZ_A%-qVQr#=B3Y^vg^CP&Bp>f$$yt|1k;as0B zZ;A$37TIO_N3;lVDX5XP1!wp^reA?Jd6(@O9FQf$3sIf0%78f!Foe{e`{8=@4>PC==9m?@?w4Us~{HDI*b zQToye=!S)2T-y!5`8@Eac{#onin7JKcEgp9x8~<)eb)l@CXuc_E{7*-BllDKlLD@Z zX+t+BpD!F}Pc)iKavdkl^8)`mXIRg39bJ3vPq1S5IA>kwK4=fv#@r5>0S!{uL9Zf{ zvw!0NmYsZ^Y(b>QG@w>fkqeyG61$@4n0<=lvtR3Y|dQwqmeuR?RY-|g$*G#Uo%NH#Lj zH3n5Wwt`*JvQ2U=WK#9R+%yv@zXIEtp_z+O71J~+9G^ztjc!Ob zq1P4{8Mb;m2WFeDi6yy6OO5au?__Tvh~EOxls{_?b2Z?{>iU7-xy7=t&`0)n##?b( zFDG#^c9z*J^%Lbxt-?Eoun^>1Z~8N+WKLL=fp~O_4fKz7K6J3YKgfj)^JgrC(PpTGox%i+a(6EcFYDk#JkP(02fHbPH-s z<>G!mqkaSS^^MN`4Gr`J)5jr$`1=WQSSxOnWG{Z7DGIstx_Q?GmodL=KY7r+wPXWy z-@3o3)G^zBq<}Tz!0C`d^~!ZVke=JlebqNCO%31l)Q#VU9^r0F40tJXcHU(;Pb7}z zAx)HneSac5H7I=qwd<>pLM+^L2WW=Zw$(Lm!S4d2mA{G8U|P1CJPLc05$XiCH2OGw zi99-Yirb`KnExC~(8YKj!sCpss36kMycu?(oos22_E;OoSHnZRwQHp^ndk~hvhIlP zx9`dQDG8n*y_MEcy-L@(Rv5>I9N^z(xe#z)wazEgpbz%#kQ!+LT(u8Ihq^u(x?-_V zJH=nP3mKP{LoCBoN!Q6sgfn^xU5`Rb!hqkl6M>Je*$yM$AIx{QC)&Huy0*Ea;3n=g z+bARo`kQf0p7ZG?0Ip@un_wf z+zD?2$3m@T=G-KX*`U=5-$=2^)0$5d5^Z` zkGJ)pK6oUXqxn6Rcj91-mP8^GaYS`zw4-v1J z9k5|-*sz36xR04D?L)j_I{9B%)&;~ay-n?x%)1 z(fQf`UQd7_U%65Nb@k59sSj@#?xmhXCA=%%h8^b`Nb(4=^5jsb3>1GRxmu+USW10@yBKNhx5GV)P~4c!54QMgSaY57=D+zUep<9 zWM%%z_%-Ejb`vp7^A5X06c`$VlZlV!q1IGVYm3sAlMeugJVfcilFUWa7x++8GF^<_ zi=NDkrJ59!qnp)ty?3#3I)Slc2aFTZt$2d@ifb(X*ygm@30$mN-y}P@4$0n-yCG9% zH%gB7Olm;4!4E|bq)$_d{GLc1V|x#U>@v@w!%?dhMvAfi_HyTV>^`v1JP$8-J=FFl zlA)HeG_n;kC1VwN0fQ6wQVWQR==QXh+7Vn1xomTU&Tz8*EBP-n3fKf|&?c^IARN8# zZf4qy?Syt|^6-2_m$!$|V;wR^lC$tLiEgStX^yT>H>T?c`nX%T)Z8AZ8MusC0ylR5 z3(bOS;7*QkM4SUMUPSMrf_fQt0o$F|8$XFZNk2*)Cc{Mk*Wc7gu`{@Zj`fwgt{`@H z3itsXhmVDtW3|Aqa0l#=?HAGo=M5rJgqWe)j`bma=F0GJa%1{1Vj`89aD<#e{gy5k zT^@_Q?VN{5C4JoWOQb{`caJB3x|TpGl*GCpHd6ET%MqNqr`(MO=)O5YY$>fww#&Eq53+Q6MIc4r;tT^)y9cP#77Ql~Lo8kge!%oRQ zf&IZYNPUJ+V9v&GC8Uf;vYCuwCUIW@iLbRdd0XrS;P$TTo@+pNcXQ8Zvl*HwlxhR8 zoPQ&~k5IfM>npm7Ta-E;JIB6_AAmn($4O#{rwqxO9A`tKbIAEeaI1T{OCMNcKLjrF z&od2hpYZuKO6a}UDZ7lc^~kdtpwoqkDY=-1Zx!dk1@=MIXClb#p`SSL;taF~I8t~4 z%yl&`h_{u3e}>K&!$h`8f?5gX`_JUPhIja4GRGj#JX@11u}(s}*cyB+2Su$XCNppR zc6y~emg_?=Qz?kP^b#%SeoC)21nh6<4dw;L74&}F8Z}E_1&-ylqHDkh>D9~-q*-D^ zMvV`WQp^f5Ro#gEp&8BGp?d1B;kBtqBM080T$T>DAJj_Qdc#HPIuND$hi>8ekW0}q z(312>x&%#6I7~kvYD=BWIBKnDEuk|uq8AX`%@fh9WL+!hx;X}9TGSVaOutBDIzJ$-@tAvJ}4vsGZfsR981^ zXJQ?ALqC%243#KB(uc(7sL7vLYjNffz;7qarrwcrrFpcKp2OWoE1V0-x>$eLf3OqV z;*JA0;^UyjmLYgG{8FbNW};&hM~TxIk$scQ!jGl3Am5P96IxO%6(>!h+tVT=SL6`J z;+1FzWFpi@BybFNxUp2Mxw$3w3VWqh;{twM{*35BEY0pq93-8oEUBhu#aB@c=~mK4 z)Jpm~(;PP9Pq8`3AVLeCN1hYU>|@X}GROD`n?#=1q+<`LNwOMTpjKz)5hLlal&8d9 z+7;i2^w8%eWz-<1Hx-2DF%0q)mN7-He55~9X={c&U{)B;qw|<5bsi=W>DfK8ugtQ{ zHTVW*VM?0lxG9TUPqt2nVru(0K z(0&kF5t?qIU`?=|mPLB_&nt3}2fl(FGg|3QNuP#UJ++d?Hz;xD#2 zyMli$_VpX_yt*y4hwrXy3-sU)7#o?7acWC#T_-NgUSCnn?ss;`-o>)+VX0r(WF#s6 z0y_kENG`HXsR~BV<|Evzh}OIJ+enL zm!S)(16gV)Ve@nx#y< z>$NAYYKC!GRgj;-QwHJNSeW#r<|pW@t5&~=ndpv$x-?o%@GT!KYNQh{UBc@UM z8Zw+2opg`Hn7r7X7W0aiS%baz8M1T9*01dHBZ_uK|ws@Q9=VQM- zpgIhnExwIsi&UKGtQ+`qep_01Vm9A2(NC=A@?v(7joI~49Z3!I8(fYShdS9WqR)f< z%yUGJ%>-RB=J(H1{efNbrRCvxe{W7!OPug%Q}y^`AvNI%5zo(vnM&;D!Xs~zf3SnG zd;U4fhVK7-F7>}al6ST4Z%dZvfw4lr*wf2GsDgsp?#x@lZ*anynS4{XFJ%Y^BUK{e z@-@C+GJ-Ww_t5j+Gupx6CvR8%YKO+-F!eK2!U@YN-4&s?eW3C#&jS1MT5!+6yo^U& zHrz7#0(%oZ8)p!C&7@>B^MxLY{N>RZ|8%_&E}L`g79qk)o0juC?5lNc`JuoC<)7Rd zuzoJb4Th#>IN5DTelo&FU@zk)G5?a|Bx9KX4Z-5|c}^;`2J4va6`xZjNtc;{WLWHT`V-}sOr%fJKiwnP zqwXX}1q(nq z6&g-mO>L(yiyqho%&?fFlpt!hXHyVU4HVOp*v8f&bZfSwu{rge?XTG*@cqTtBvX~bb@LYr7mD=%QW-*w zdKneYe~~Yy>hqO3R`Mx7E5k~j=boiZA`f$D!eDYg*EISDd5|rM+({l~yV<)_1N=p% zn^bMzQ{64{mT#tN1G&{(BHK&)JCoINv2Nqy|oUI-SeJtG|l#VHact4ZVQ#}-$^t0 z6=Xt~Z)=;jX=I<63(Qt( zQ`7;b0~2l^?*%~0Jjk=seL$}e?m#taozNK>Az#bqpz@rJ+&ZjSWaHPt_owV(6~z05 z-OL%PXNsjPekc2%~YTZjLZeVuDS1T#9b z22z>QgSkviO_<5dqC3X)rT5TJqejwWm^IeM!cu&Lu^s=Bh}3HM7~+A_$5oN0yr*1! z@_lw&)=at5jpEzlr{p5h^MS@+rB5=oqfgUw#CXdC>I`$w{DJ?M9%X3GPo*J^iL0dd zDHe0}>5q9$SrgMQE1J2@q@?X)QW+-c3w?>%9G^j_GIyd4)LrI#M`FeZUn_} zsI){pU(X{4iw=%lD^t==9c{c>_)aSV~w_n&u0W~cbzaYsv zOizYh+Z$*m21^{*6x#x$om=x{zJEb~)-msV$eteU$wktVF<}6f6#qB(i%5z7m%T)lmOL#WQ(Ku5o;*i|eTNXyCfLy{g#11d_`K`c&|yoX_4-$mdMK(*m2G z_9y=p_a;?wFNrJhgV|S9_h>)!f>uWLq~A014Ozj#&^OHk|5vdhwc2+cZ7+-RR$?7< zdU;5^O~z3{PlTlgc{$lIsWX>LHHbUJYUn!AqZl*&F7g6RG3WIOfoN=&`mKKeURQC( z*BsxQhj`u)w(MNt2H8EMHGhFToAQdgLV=0%*;{n~IJx-WuSy@%KNuj=Lw96r>r(ta z5>VInS;;5zm)=||HMhzWMeWO)D@>w$r)Tq%Xf7p^lZpxVW^5)iBlaBwFiWKYx`=rd zIY<0eAF56E-C-7~>UmEv=Vk9bd)Zn!)xtWqN9G=W1v@dVm|MlhCl|3hS$TqWyQlWSVDx@We{v(Sbj3n> zy7tS$XR46~TM4agYx?a!qJLpcWoDX=+PA}qbv^LYcGIy0lxlCd)B@<6N08@}#qW{o8eK#5p)`wXW$a{*m!8U5 z%ghbll`m&B+-~($W;%9Ncbe(p+-8cVznKnNPtq4uC4hupl${Trq1DNmNF?1i+KyLK z_ZGHJKj{5bQa^j2cXA*?cEH24GgJ=YC`#!%2z3G0xQU-;+-aT3;mYlfOYHWn`(O_9 zJn0mCpXn-Q1~cjS1yj=N<$owj$Zi_i?As)3A2icvl-B|skipu3?{7zfafDZ6SYx^1 z2~(VL82BAoU0gr8tfXOL$g;ka6Fu77=V{XBmhLLZ&stgh)LT1mM^Q&=vJx#A4^?Z2 zhtllz4Oat@p0NPF3i$y?m}gsNU)MVRbRq~(WS>ZTV>_9aWsA~u<+-8PS(0WTu)FXF7QL*`_qK7Zc`bd3*I%~hEN1#c-8*_c@C9uD3fwlou=4_oSMF+aeQ*!VN z$nWSKlt9>)nzTpdedf2**QjO|HZ_gY;-1YGy#Xc5Y>UjCJKXV~4X}I=t86AslKT~y zlyeN32S=uK#1pZO(fg>5#M8wK)f(;jxhCy9Lu9_+K$UwWgVET=0kPg`*w!$+#G} ziHBQaTve{__C{`*c|33${zLT{T!LK6J_j>cD0v}fAcjSsCF99Mdlkn^Stpbq*@C2`f@4YRZ0+CeYMS=4n zJw2-tRG%K5l!nzJt)~HI*9ug zPR%+w@1n19o~CRY(@j~YWD)XRlUU?(cG4FXB$(@(E`*(>%*u3+iV( zr#gj<72gL|V;cA6)#B`Q47)auh#u~Y1NLj}4SkeC+& zH(e5&*A(u~0BBV&*bmyW2cef>YjQiHFBTV_NfG4G`4xIvyRq<#af_j!Z>jl-X%qd! z*3jAtndj(dhaE4R>A*imJy;5kRCyFa{Am`1!ueF+=73Aoj zSr2;$niTf6l$fOkaM)pQ;!3r@0W|JjhBDW2=)K|&G!faGH5sYK3X|sJlL={b7P*uB zQPxvk2>cG6)Xj9o3I_~dz~!XTOt_yy>DC%}nC-g*M-2KC&fn-N#boz9>{jLlWCGD5 zNrqh^$A~NBQE4T4%3tuCU@y%hWFtRPe;Nhw-(p*Eu)Bq|1NO*z&AuHk)17rD5VPeD zcP-*o<~VpKnVSsEl5dPu=X`37F?eXcssH(>^A-*B7I;fC{E zyc04nVLzCCHX~c4ISYidBl1qWzB6aj-#|wgZNfbip=+d_iB5D;{w&#B|2jPgk4EIg}CYqaj(oH7o6Tz;&=-9~zbN_SYbCu~6AQ`(mArbw9IWO6P@1akNcKF>`QjQWAZZy;Grmdu zdNE&IA<4z{%mn{a*|@SqZkSS00^)}>-lAphZhBk6UfVuXMCguTptV=vtMa7%r@vE9 zH|GQIz|?u}L&Ajku86=TNLu5)7_#h({c+Zbd=?m$m*dHG(#q!KC)XuS2s-cXW9VX+ zL5s~rhHl6pdn<((z2Wp`wZIoat&$X^4dun@K?&KQG|{?7zB|;wK3#QCXymx5y+a&w znhmGj`&_@w2W+5wjcuJi2hIbQ%U_~Hz`0@qv=LmM)RlOT@zNjEUD8&((_E}w90*yh zh7Q~odyHujUIKKs)&_SuAKDGpd0;hgTDKA^2S>@%5CK|}*&9R9&WX1P1V^PGDVY4H zsFkt8?DjV?S6gLlg7v075zDk+2S&P90q0!pEgM~D!1vk*?z7MZF_nA^*_5#jeTS7L zE+ra^*@w4OXKF-&R(~59D>7k+Im_u^76Pn~4zsOt%bZIcN1?suGtMVSn6|IGKB|!o zgvVlAGonQ|O|3*7zLcCTeNG;xJfUix47x0QGu(r>QoK0}DT6Ott56f5wLipWnsz(8 z;cYd4f+_gpJOi|dSf9QZc}Ch2YVd*7Fljj{<{yKDw5^E*-mc$ARFVx$TH-TgwVWd* zj_&qeMme%(ir$1Ga&vOHkp1c*-kj<&ECgq znfDQ+*Y)&@X-UH!Pl@ZMSta~y-EX}q4A%n=2~Q| zr`Cc;aU0^UpgtCs)WP>Mar_!pY4Iacp-m~832O~u1*?H6rjq=+<_v4ypjtcIj`|PF zb--(1|EyV{*@LFcfu8c`;$qS1T;Hf_9AF9y?zr}5)IpfZo8odxC3oAKQs46>e()77NB{+iM*rOF>qf-8)6iEBylvk7274P6gkoxgIDdV zwBEmUUlv;6~a@%p_b2%7w*M&nJESC0vuPrLl%PL@+M;4p^fRQ@D=FDgpcGz zyi!_5U8b4^B-XR$_S_MBk+mTa1d{Bvq0Y`vj#u{AV)y)(5r>9>N7a+zv(VPu4Jd#t zPoIK!!pakN5+?~(>Zd?T<=br;;6T_MTSwqM9xGD6{Ow5=Hed{NJ37gN=d2*wcXp-wAX!%zHhk-9YCMkUc!I7svQ@RE!IoU zHRx*nT9CpPD7Qd~xHrd!3@5B_5tE-isR$-1-xL z;MCe{h#X5f@PfFhD{=ipCMx#3w~;5a*T74u38_0#AGJDu9TxRG{Cns_?afe zC({k4Aw0m^o2d^BwU1@$nqgo(Q%76u8cF|@-*fk(zh%9HyU_oo4nSMdui|@(nfJSr zbmAy|h7IUX314xaScxIr|5!kwf&IO$jQ5*T9F=@aP0+cTE0KK#E4fZtgP>JxMGA*h zu|47{u_9)1)K5ae{6YWHM+A;zO^qx3w?LKor|+X}p{=X0zOkcYkhhcCbxq6$+Lffql>(*erp@DtB z|34J~ob#3Bb#PtwPRzjEFNKTAjp5(CHa3WUV1GpE@qNq^-+s6vCz8!Y5@Z|kXtbfK zFIa=_)snWinBDNuuoRzenW3VHKkOTGdy@5BUD95X|3UK;EYugQrF1A$pPcJWa?e+= zOnazMZNd7(9-Y?p2?2}|w)$wcrJw!=mTYgPyp1OSzj7*w6!3EzMP|b_3BxEi_F3AK zE+;<_s+NTS`K?}FNZYs%@Ec-PG7if@EYrF9|B zplR{%$zS*tX*+rh6~klBhNjab>M~f4!Y9Dfwqw9usJUaW?Yu~yoIx<)PK8adqd%fwJ;9Uu={;_^9PJ8polK)X2=ig16@ zmcUD4xBMsa4f&S+2bPJsQ*-fZ{9`;%_9PwBS`eKnQzmQ@i@$oUt>whUlZ72Mr+(%4Wl>$gIpA=wR|gN;I~f^2Dtt2GH##F=QFN zlw4%_MgijQ$SrEIbBALmb;>%~DWg6b8oR#H-Bc^w0eW_x0~T2&nT?PYVn*y3Hk_`G zn~ZOzZ$!N!`_TUp!z^R?&d@qrFYYqnbTsA)EUkfk+#KB}mz=AmTo3+YcZyGnLe`cM ziwtG^Cl5i7G52DRDqOIbf=%RC~htP;_>oGb@<-907cCdDE% z;~UY_@aYmzB#KO@Tf=v>=THRMWS9vKLn+ff+j?w@wV%;~eX@ttZ}Ba_57{EZ2HG=! zk;9=cDHQn*4a8lcju34n4zZUpk30xXFqI-R;0cz6t}tYrt;)Iy?ci8xC`R8pm#ar( zr@)o6y7&rcO(svU$WC!$P=TF|TS>(ccO({i67_^Ig8S@q;1=%t4siw=`t9^u_QF5G zZucqk11F~FVpdQh~@dhr?^~Aj&-0AM- zIPCrirI{DOO<`77jkHJBD=X06=;hos*bwYn27pf^{z%RxlE}$%JxLuUmkgp@G>dL= zrXgC;>Dqygu`}*4te(jZO~Q_8tKbZrSFA+b#EhIvXg={X{V=wP-1R^9-YcrAtBo2q zi6$D2#&lyev1^PPYhsU;-VSj3;q=~n+kKxy6-5LQPlu6Pz1Di>nltzecnHz^*FXd4G_P*>PxQ27tx*i!F;6p{ zfPHkKmSu3Ul5KUsge2cqhy?QY*iRuDoDq)K$OHNWU05l2cDgwhU#M7TIfaiG=h{BUKjEFRdt&$4e>kkzDVoVyiS4Ew0j^_( zewU#yu^6w(@UNKM8e2J^HLlf^aX;d4AMLFceM%Vd3W%zk}<@!RN|s=cPz=;5-F=Ce`Lg(=o= zB7I4vA3Z|B+-Bbr9vk|~amAe;gp*GCmA;QaE#B&x55;3=!FW`}{mB-FS_KrN1&tQR zX#CO5vR!g1x>fl=v;ZyD&gITP4;m20Y4nZxd`KSlo83EL3Wfs>UMI2B$O1qC7Yoy^ zLGWFP%HTp~%U7rk2v7B~ECbo7IV_AuI`xHN>F7tMScV++vEm_tXsRR9|2g^wjPmNm zq)4>$H|R6jP|NR-O<~Zxpew3=)n<69cD=L@-lQ)P+=OqM#)bWWthOZ52_(xtF_=I< zb~gJTL_sLf>m2$PId6XpM5^@WZ@_OgcAW#v(P7FwXsjWcT&Hd@#R-gWnCM=Zb{{~YvVc%)Y!=_oS8*6AFtJ79d{JYcYB0l;kfQPBZ@V(yaogYi~1 ze+x9p{uO5sRPAV_Plf{kVXz3k2aWWH$r;t^b&}NZSlmfv+epVG+ETOoSPS0A7X2adekuo$Zi)m42(e z+%Zo*#}VtCB+GGn1D}W(&ey;V?rESB>|i0F2+E?~gC@WT1%~<%j$aR)iT>slh;Xst zria!uphL&E9fAg{zO(OwHcGP`(eQAg(&-DkxgP?fVS?ES97C*BI+%()p^St?sLsy- z&q1$w4T7Iw!NyYSaCEbF9;pwkR}l73(SAvq{R;X@;B=H@UxfYagt4EQLjfObb!Z(p z0b>WnfX^_C-zew+7Eaz7Ov2&~9E;MuP5sbnar5PeY9CXS?hK&=KJG6Aso*jf*LN;71uyjUB9$bst*BeXn`gN19wS_> z$#Wf*gebmr0dkGF$u(6K&HoC&plxDr#ne`iQ35L82d}?gy|ZMi#1zMQ}$vNjx&C@ zNf&}sUY=MR(qook9x|r>Lu|dGNp%HVuO1_Vu-~vECNJc?N~nE_JlANI575y%n=~ICZ-@{kqti`Wxa-ix zmIBtdsIRSsnu1Cl*C~t8Nbp;~0<-~MM z3tVPfpy~v3>>jdP(5DWX=psaSviM&@=>UT>61oS{7<1v}5G8ahycMPdO@SXE0ls&T z5Y&t84Ax;wwH1IjFhN-ev;!xkH^8N!OZX5>foAX)fe)c>wkH$-E9fq$1Q`|*2aiTd z1DjzB8swWy>S*qGCL@6usKJ~7JfMgJK0?+=E&~xrif|C{5V^{wfx+l6ljHntK?f4y<-<444kBbB*>fK&SA{9#6=uf$#L&qXc}J zIxezB#F6ih{7D)h?ugJS0{Ab&_p1Flec@wuO2!qp-MA$*$#vUuDv0NjJAU@Pk3R-& zo;7$h(yYsjxFh~nRTd$XcE|^X_bIgEk>P&o@%#{XtG1gH+~Z4~KuNoGx47u28!~4EIv)Ro+flw_zpwpo?WH zVGPEfSjUG9;G6B`f!FbcfS+#*b`!ebc>>EPQ&(!;(^Wdfb=OHvl$7mq>$VHGxn>y3 zc_Z*1QxkhB?zEhsZ@}l+?uNu+H=N@FOUN9a5Z`=E1LK}ouvKJ#vjM-P8>1M6XX~#? zqH&qAL~tITXx4MZ*ahosRtc7Edrbct(>ZEFe#h1VXrLJz2l@MMA-@#kJRhJ}(0)}B zR%P5RBe0F;72+gJWvS=CCKXw~gl)q!p99+iZX+1&yGDp39JIEJtpFuRvV!3-~2;L$C`ONp{;qkWuh8_C(Tq zmPT(!Mj~CI8%h7PMU>gd_h_O2ePlBBy^jo8gu$NV?MCdZj0Vp|4~wqC0oW*hC9K5! z!#J=3v$6hw%~%@kCG5nSL(afB)*G}DPQmW@3*epD8=t{&FFwTMDEu1#RJs_7ch3=C zg6iDg@VcOLE`k#Y-E^I0g3tq34b2;R?Q(^zfQPy$K||o5T_gQ&!!*3Zy9|!R9Uggb zJw9KF#iD|0*|ZHdF)kh+W0QIEQ==uySz*j5rsi|zfk;37w=`OWm+9x=m~fgkEZ~sa z;>h;7M!IU8@mz?XL0&0pHhd!)EEUB}ktK7Llb!>%>VOniuQSKzr0Z)4_w>aDsE7PQbelX_k{LCi%oFBBjZ@#_(Ic1ZJUGiE z1cnqwZ}>*@VCp7!r&UXN;v(!sfWoyJ*zU6rkB6st^07zgCi$o+yDDFNKC(_zFSrr$ zM%T(MjhJgV$<7W}nl3X!+?5uBI??^1eJmx%MRigFmf%(3CZ7iUTX?sp8H+}b%YKVo zseK~;Dq^F4uV8lgHDe@qNcbExjs2M$vi?Xv?mBOK5W3T~-qB2;1 zjKg>c2vbjBmmyj3Ys?0F2foBcAdCFJM03#z-gn5I>RXQ+$bGC!6zuW=PVr^<0Jt%1 zC%y?<#X5*jgl;gtz;?nB)R~w+GAUS!-a#4zL39I3^D9T^q7S??k&77MQH^ZFUXh7Z zTj94nA-)8$a+$NUxqW%6zgvo=v*zY$DxU z_aJl}^m>WR!bKijWEk$dXhmYZWH{qM>}lETuwm{DMVw$Hn5P~jnPC=bs}*C^ zJ_fhuD^Z_0OaGW1W4mj<67n@5w%7PPgr9;5+eh)gVOJ!K67Jyrw1F*|EG%~U%dScY z03y38T$4;wq%Ki``g;8*qUEMnra<<0Hnz1e_>%L65lw ztab$@u95q>N*Bcxur$eNu6U2G!ZBMu({O|g^~f|`RP^h9usjz0X)LyX#9C?fbp8;G zIR}Abedj@q@TjdgL#cbdX$WVC!4`3nx5M-S;w$nrJMAAzpITn(waN^eS}|1XVZSV3 z8$WUeF?%hYz@A{5BN>|LyB6ZXKtejrX_d#WW$|o&;lFWL*%v~+!XF)Dto4!?4nh~C zINKb0~PM^`UMmT z7DAv!CHWaTp`D``3r~^#sQCys^A79p!MhlR<`XbMk=dJ(l|CrA1pzmI7;=!zOS#P$ zifi$oILq*#fzS9%?1p)XSb^ngCd;iDU%FIn$Hw!%)RTVKjIYcx)D<*fr=v@Kz5wSV z&o_Anzlnk(gXwD{Yta%m8gbXr#H)(r{|KAzT>u85!8;}|7(Hr!3CeJ%&yy1|@6m>{R$Tmkh#az{n zh?!w(U8VaVE!ouKk_C3!cHtwvSwI5X9&39(-)N3Y(yC!gj4aU!Uj|PIV@3sX?qo`n9hFs2sqn0Q= z!!l4e5@~Cu~?S^KZB1IpfyQ{t`ziTYf|1OwsNiw;ZN%mChWy(FE z)j7o{AHEC85_St#2$N#PqDvBXI9-a#=Rz+PYn3;wXVsr-@^uF7Lp@c#$Z*Z{89> z$kM_5)jrZ5OM1h40ZV*#lJlZ}%V2Js;)|F=g1xE*uB+lU4G*l49n@i#7Uc;;u@==l zFm=hahS8QA{1Ijk`zMTUo7?#-WuvnfT<5bFo(%hMUcpHsZ#6CC_35AD4$)7>7{DNv znZGj^D~c?Inyu<*wqKw1_OI3BBvf&>2nx50M~9ZvJP0V$XITG z6-J*44%$%XSCTb$*kqJfI#6|_dXm#2*{9nGn7DgP!$1lBvegCADIYio!E1e@AQ3Vv zc0Y45U<`l3c?FzAO#B+~cYBqX0+t%*%PvCGR9cl5N)?~cK7l_A`_qWQHoBK}ID9b( zb! zu5@LK#%l&#gE`TL6g-SJ*0Kz%58Pny!M^tn1ec?DWD~6}b_C|dDu{UmOyR~xpEvIl za-v$brP48x1@d6UsfcLdbd5V)!!9wbAU)xO%=cVl0-oB#Nv9nGc!2gtjHB`6*C5AP zfpG!*|jl+Nq}B6sKk1S@fDJUaPI&5Y32o zDlAUQ5@@u8N@kh0LMt|ZD%;3$MZHmM6dsAdyKGG15^>5v&E3iO(yQ@+?qsxOrHO&o2p zrc`Y;EZ6fTyG^G}u_+%QQSxz` z$@U}4Bf20XLxUJxs#N`IQ>kRQX^7=4cZ#LYK8((_H#^q`HJ#cGPr=BIgib0;1Zh4 z)&O}0jddP|Lw&|V-AGtucNpYcfvNcCo$UZy{43yX87Wf%7j=V`$)H2tsA-2r3Dfmg zp={3Y<}cv+)Tg$kWM)&AlY#hpmq85VM8qVH45>lx@J=AD&W}V3kb|ZZ(hbNt%?-s# zWAH=#2fCwp80(m60(K|#q*a1B1DYILun}HQz-?$2{+jhB zW+M2Q8y>yb_M7m7s9?i-i84~I+9q#}z$KOHAHtJ)Z*&TG74x~V)72XKrS&`fP{1z^ zFYJO>0qB9gz%;C_aT9@P?u3nvneH~y85q4-uWDfl6_CU&VNnLZ_4tNWDo41TD$ar^A^jU$Emh6Yoi6jps; zft0Jo4x2*zd036ZX}CoH4oI_f2eF{@jvVht=y+&jOtxxq*az+*>KA-(s76~W%CY^R z-zvSJ?=;$#KPeZP-5R#2*ScMw8#dqGV|qfX1%}zEK~o@(v(Wo7=_v9h`WyKJ;XyoI zNl2nWvu2o_Y8|SZqxw{L!4RTpSC~y(^bXMq%PG_EoJRXJ>jyNN(`i2!_!ztfE>zg>gO7i@={}}DP6eT@{#2fXQ|C@|C0K~ z@xaLp+zXn(MDHi?9k?s}sraaN4st{$*IjqKRJ<}|nflcmO;qhr-5ASo*jLl#>KywGXj$M2U@I)~u7+nIv3Q@L+VK^5L%h?`Vp}dN zb;=EVWi;@ks#`+^PD`Hae*|OsRO2~lHA`j5hMt7x+r@BBz#(89Lh<&2YLT~?FFzCZ z0_>t{7_dB+*1;9}1&T8Gy0T84fczl&SZ6|ld6CAoNHnw4G9GCQoo>5_yb2II%g|rF z?vainBBY1c;R;mtauH zR$C0V(*KDw5dG6@J9rn}4SVrsMJ={x3d!w&X}!c6aYWN1MG3FSC+rZoe0fJ-Ef|J2IPw7SV0^hVdCnrk1ex zI7Oy#UX*#F`MhWunF_Q}`dW6+wp&>#7;vz(Ms@@6nUU<$f}PfH0;A#Wj{DvPs0BI` z?x(#FwgLG`&*krRtTOb8hngmuY_dg~^_DM`D%nEoAx*Y`Vc)F3!CvF!ndeg(;9T40 zzz}$x^M-dOIvif*I;n0GzJ`uyqa-!kGNi|)qH@(#KNy9By^jrBow!x84pI)(ZHkmdL&Z~~MAobuj| zN6mLl3H;^OSC*43fn&IRd}uN7jZ+)&5SkA*c_$$r$RPBR{IaGLn5~+kGg-PdJi`k8 z6kW3EL*-sWm$_3i$@JD5#jCW=u=_Hf+PTgVp={@7vX9dN?uX*N}8*S}YH0jUUzY(u6{p{d#A6hidA|~6`0^AF^?HmUE8lVFyu-Q8Z z#*qg2Bgtjk6T3+sWp6e8s+#PG)pTgGoI-iJegv>owAN?e26~s@_|;M=NuU#Rs_$?l}-=u)yd@7!-JJx zsyO$4iAp=!y^gQY?{u}Xx0@EaR?`wKU3d$n)*gs2@oR9_U=?0N!JDL`^Ah2$n1`nC zB-POmwSUN!Q7;s~tGi7#BZu)K^_~%9SSO4<;d80iEL!(U$`AHWT>gGOPAyLJ z>;cm;Ip%3DU=9X!mTXRj<*GG>@1vhYCc52F=GmKM8^wIoGnw?BldC()x722 zcA5=~nRMWq84Y;|1>4RBNZ|{>T<;mE65asE>ytz)9G@7!lsqzZnWE)oT7S#uDvsh? zYrJ-(sL(!1zmKbO)R-a|kAOd{^pFg&-LW{}cUS~|pWsI@R z_`Uj=hHdK6eJS^~I1Qnq4{dWysbM$l{g!Lw3Tl)6_mCiPk#kGH2V_L+M1y4(|u>i)HmxNs7=Nc<6c>mS!GTX@~x|^vM`zbGy77;G{)Q`C2+z2 zDijITdw-7vBJob6N^6{A@z#W!V)QCqy!oMuV~DecO5;q3txgba(K^O(s%*hdJ3Yzq z2XHDl4VVVa@lS+C!JEDM;mgPa$Ir@dtk=z-sF}9Yx;ff-dzW&NzSPky!HhM|Dt@ZD z0mx-vvzCEdX^-tCvb zvEnGhGboL>+dK-Mz@BFP2F|3h?30n{l%38gNQK`Sa6CHSYXS5YEhhu;enrXq6UumW zvHGm~8tN(kLN^sD3Hg%Bej`W6hn=4Uh)pxsAgyUv*D2|z;i0Cn!TXV6);|A8bds~c%Z;su5oe}#HTS$lXZua?QXg%fAsMey zI;P6!OJkissk{U>V2)P9*#fRIq|r?f!+epDAkr%WJpuPQWM+XV(Hv^o zEcMcDur?_^P{!Hrs7fTK9N%hb{3}jh!${6J05%<q zk>BkTjhS)}(;V{$$}QS(3qyUG)FV~vrc1uH4>73u>m6)UJv+l$YWuMOx3WP`O|PpI!1M;jxxCp8<)pXi(Ar!3*d9b&%qmD#~7v+J!)_B2Po zeKCyz&`8B#3Ai74W9mLy%Q0nuMDtT26`RvE9FQQ1*5W_cyDTEDPq zxmRsj_G>J_G0jP*9(UFO2PnSaDrlKs8JXEs@5Mqq(0OK_cD?1Eu0%iH_K9khVZi>A zbeAd1F;D2Qcsf^d2V3s~e#|Dj8)Q()j2lQ6bOG1}oBcweAjIu896pY2G7Zrzb_%sd z-Be(ta)x0DP$$`CJO*+EapqWPP}oB2Iw*%Z%ytj<2rYG_!4HG1z-nZ>-$&pjbd6^l ztU#|Cn$#S)T(eqBhaV_D*RMl<6*n4xLNxpw(=DW!Beawww;2?h7@ZdSwPOmZ4kVnt zXpL_Hh>$1V82lM|(@mwS$1kZr(lp|&vWvP>JX4fu*oXtXY!eG-umQ_-d;vY&dJP{P zBCywDcLPOE06XBj6r6>ndwN4nm|jPIF(aNTFKT9!qN3Bfsp0*?GQ)^)8n@lp=RU!z zHCMU=X{W4a*WutU(rer^;G**vyv^qVa1CGR`8{O9imeLZCVQHp6r^zDG)q8o?I@3e zW=YiIZ=f`JKQ9cPrRK883=7==?Gr>}+C&kf-`UptMWg3{k3DZ(r2=O?ACZnC zznGWU*NH#SRXe_u7N|^)8;U^bOXn`tb72#(QM;YH3KScHm=B=e&BLioc(k=Y=n{O> zvB%E`=>>B=-;fUHnCS~^vYe~Uvn44Dl$d>!`o44;c?)N;aF~;frU)wqtfpg32Ph|3 zFM}X~Jt`<0W;maaC#3>N;xzoar|I!<42D%am3F9tp`BJxevU(BP$ zN}I@%#>egPwy7Mxqs?BykT`ESe+&JE+~@8I`W*6xJbim%5WYZm26IrIF3#w*j8gGU z{nqs|tL1l_LnO0C+Vgo^ZTlTP>`ji5&gb+>rzbcrB#lg{@eC{mJ7K+V1Uwi?_e?<+ zpq*NOgTk>)xx#qeIYMeMQ=Mmoc1t6W$=zl992B#9>{4hZeYm3vx*swY7!2A=zV`ePHeoE)8Qo=Uw(NnyjI9&aOE$5|~ z2HZbzCR?bk;|$2!<-({R+Go4wQCN<0{G9)3U=|+kGZ);7&+s5K3$Py2&29r3fN>8g z;*}~sLG}sni_arBq-*%M(T^1=oHBHU=3_=83hBYnW#}RE7eOCl6Kzeto3V|6!jpl2 zi2R`c5jw($)t|xNiFEQmV4gHgJRL4qgz`P$w`w2GRD`BuFlHjvMqS7NI>wS4xDR#P z2Yel53dTH71-1z>>BfR{#BSA(V5aORnG71Os1vE7C^dsu2oc)n>|EGp$ftM0_s#5( zG(=;Y6BviwcYf|GLL`hJ^UQd+aTNOoR*k>KllTg7Q71(N^>;%?FNgYE-e*aZ+oO)Hx&c3c({_ zrBTQg0*}n0tVFQL>QB23aqMe?U&CXaYXYaiXTa6IE09b$&~qD^7r;_ou^rXx1BeCc!MV=+YK3)2?wZQR%u)~(;9Kl;@&vrgz-*;>U&M?%@7_gN(8n8jx zlo(J6#|2DbP79OR zf>9}J4S6b_Lwn>9pl3rmodWE$AU$9rcbdyV9MgH9gkrH|k7U@5?UF7x1-K>)Z<)hg zU-FJy%E(j1&DJjbA=70$hWF7t?U(V^khzXqczWP{;8Wb{cMzP3`+0{$tMPFjMtC)L zPj$t$D6B)i*foTIS9}ydB_1W1is#8Z!YXi^5@J5YshZ<7ay4(57c9c(m}>$y;j``D zK8g4W;Jn8voQepPudx(?T_(k1#7yy57$jTB2QiEC2l7s_UQI9!m`?YWdK)vCJ`5g% zLDrc8Tx_#L>=TLAf=M3tuuDjt@*6U%dzv%>ohfS(g`j^Z{dp%)mih)K3N`B-jF)JF zaRN0Otuh~=97j*tQUd0oZ=6;iC$<3c_8fwl$oV%BPL>~#)WJQfal#GAhZ-4g60$^B z%f6534X+qBq|EG3oq!BjGbj=0Ovh&dAEPP&^U|9Rv(i1qm>Of?uam=IjG`w&TnrV83G;EejGj;gBTgGH@h_0}G&8 zqzlIdIKulW!bSQ$P9hhu8IrF_ALgloCr*QX4Q~`M*g;R=9 z!=PYD9C#nVF_aMZh#-74v$O(z|`Uo>`$DD{AZ4IK**iyTn+xrp6t9s?oWe( zI4Fr40{#kn1>3-V@UuW0BtVM&5~1fP*V_cgW1o7MkZD+}HV zhO&NldLUL(kMsoT3nf$G(LX4?;39N;zyNp|ed;%se4ahMe}tD{TRnW?^VkK!8Jj=G z`$;-rrNQSR0-~PDzT>_uYqszVHAI$9v>561daGu-zsP>{;k(3=q^6m z<7ap~K3tj-IfOS}6dR!z&g3_TUzN-Z+ZxW1d$6{<`&40cs++4#4gJ}5&2TxWz=c|Z zNXLU;?8m)}@XMg52N%yqawLbsEy8Hwq3|J+ApWQBWchH;bN52!Mb>E7X-zrpqRXM* z5whR)lPNApgdek(`j5glI?j3L<8<)32Z+x?o=QG-eqzEo@U$@qQOO$--0ILzt}(UyMw)Sp7GAcPC;oN$yf`zO6-FdD3%KNxK@R7 zEAhpeCiWhDnEqo1fn6|qP+wrX%qbzmu}tf&z<$i-VEA=oa^R?UCdP)Bc~q17s7Mix ztx?DE^RXG)&DcGmSIM(nWLyUdf6HmT!LP+Zw}mnUUTM?%BZX0F7GsS z0PgWPfWAQY3%8+zbZ2>8=nz9{*gW)8Qy@!>j^vB_I+J6Lak{pcr*!{97bMN@%s=-$w|;63asN)NOMV+KY-dMw%hbEpez z_bG;_VpqN9!yE7+>uhhTfSoDAFEccS-Wn%ho_gfkFTG9KwuChI3$!xqcfN+HYbeB;|qi$ z@~W;(u28Df8m&%mFq+I3tIh6k0w4qohH*SjC9KR(Yack1H*Q!}-Xh04aEBc6z5Dkckb{2o`0*2R+|QmrfAQiaIri6Y-n@PL=Re~ob_C2PG6Gi< znG_0<9V{fWL(OlqY2nYZ8AFmrC_uqvxd4i*db4AAwXNyn% zT@dMH9kTpM4gmo~eqb0;NYT753I!jO&|+?u(9$lJhL)YF31~P`8NBCc8Kd(^F=zkb zLf*li0zpqtzVJv-p7_|`1(EJYa`0P8gMf?MY3WbLb7KnQOE{F`jxkR?#3L?+XmniaQ5yceQ zqtX!D&C-yK{gpu(r+2R}J6a!5cep0B`A{Wu&%ts|`@u49=YbM_*MVYT_kkkOfxinP z!*?!`qy&b`MZEdu|J$slXdv>!+ z_q1@zTU)pltbJaGB=iH$9PR89#@|%^xtG=ElYPhmp+HyHn-gYrX)!Cn{?K-#3aG-aa z`QWK!>w)(Lk?iv`ksk0nks0VgWKpO@c8G+?p<4dTVT3==VQzVp!%n}O!zsL#!>ziW zC8)oeA#S;xE^E8EL)m$LyQaH0)o|!Us`=2dR7=nM;=eeoC$dQnIsb!0F5Ueiml^*k zkCk>Wk5h2FAgtm>0l)5Qo@m#_9O<45S&EJ`ndTl}<>-%e zXPJ+YIQqW$HwXXUh^&D1L=MRzH&{gEg_?+b8uqe)vFS-MGv!_>JNsrCxA1bgpscS_ zQgyaM(sZ&yy7x%2eE@^UU#^Gnaw32RR66gM8Nl(Zcxk#_ITmmTiNRUT{4(;R(Y5ZnKY zLvCO&kxvm21tEH(hzdU~VMO06XKlGr$=QCXnv;LNfm3m+fm?U9M$p_-DQY`dD(Tu^ zC_U7fFF(>w*5iHg9}e06Yl(kxCi8eYr63Sry+Qt`g-BFTZSLfN6t0!7dJg4p3VgUIw-MC6bh@&YJCVGxff4%R#` zq1x}2GhEjz8C&{m=^1BshZG;%MX&CuV>cWmIqa_#v~`z>I=V_E-Cae}1D!?k_Xd&n zKRNiVA@cnLiDL2`q$EW7xQu4LT|o=KTun0$Q6L;<_k#@D0$hzMbM4ImmBFlFnk?XsfDD(>;Ij~=shDh(1Q_a^aLnHedf|E|} z^UXd?ayUrVp}U@4-?fw5w6B`q(pg3Bxhq6_JIW;;d&{Jq?+YT`XF8GXvw+C+`GY9- z@gqtD>CehT1h*?fY!`P@qI-MRZ9B5hFZSGTuL(9pJ1*tn-wv}<>*xcPlSWO_{_@;v7fB_2zOGQYLY zD*}UVRtL(?x2*?`^)8D!aM&YhUw1%ydq*g_|7R5ad+#6i?|pw0zq++iP}9;V+_|ek z^xhz{Jf;u@>t_>X9*c-dzh(ET{6qSiJru_;%?A#gSrOfF)GMLwU|?!%S7_$$PDV~k zJ3GInEv&G454Wg!53gibtDv-Tx1g-PMOgm6Aad7DBuZEROjLT#epco4$CU;T+KEeZ zln1UZa<-pe714UqdsFk#pu}B=s3}bc7&{vGv(g*8IGGLmxLFPRc-eKGyqww&er|P# zAn$!a6s{UiRIZ*#RD1q%t8V?8Q~f{F58j-o*n546wYh%{+IZF{vi?-i#=7Iw%{z}V z5_TS8ZLK}bNvb&La#FSYC-|Ghz!t@w_pUiaMMG)L!&OsxMGnRp+P?m1pTuWxdRpl3vz^;!~{HqEqaR1t&S1@=u0s z$~(c0dtVUMD@MJlT{ZsFfjRSfZcJO(et&LA^OGg~x(BNiRRbRS@*94ZvTGDa(UlM& z|1uTIy+nhvFVc~$iwrcgpNXaSv#_*27QUm8?RsAjwJS#5+OzPd!`CLv>%2d6b<4Ab z0d+6Q0O@CI`DKs1r1|%KmDzU#G#LXyx*fNJ4XL+6jVZUNrsSJc^R}BbbJ9(^W$O)w zCGiH+`o17Gt(in@5B!x#3tB;BQ2dCDP&Sc4QxX|W$J=yv#EZ1B_$S+W+aDy0v+pL! z3kMR_Ww#S_mA4WM)i)DNwKukycV69Mt-G|vUf-Vp)b}NT?~H#q{JUoV)Zw2x{P+C% z=Xv;l=y`}E-;+B6ekIZa|Mi~qB{D-;L?-#3%w*W#X0XCvWN_l2q;XRpq>8fdCd-Nj zl2m245_MHK6AiUD5=?bh6D;+Y5^N3q3C_mz32?)?1mvATZ2k|2<)m)Um&hVHWKk7F z7Txw|7R&uIi?jJjW?1TjbV2sr?b4##Da!I&NxJGATMav}Z8bMsO0+ihB|3JIe|uMN zqI=ir#PD~h|Wk{p73$i6$1$fnAPY`XQ&Y^LjFHaqS~PFTvrY+lyg3~}M@G)4K% zR9(%rWMloMBy;11t@h@#iAc+-Bv;Gvr10Ix-UCGZ+DT-ucMg#eu$=6>|BC~I?ETA# zT$+W*W#F%JSsS0`bGAJ!;AY;-6BP{P$jh#0Xsa*nFxFp4weC8T4DCL#4c~J#*}bPH z<^4cx`45L>WZ&JJ?7P#6T=Fa?k4EnQ8R)A5R_wDP&elgoy!5+8qP!c0@{-GWnyL%g zhWg$NNAs~ANb8|geD8tn_}=|H{z4o1?>pf?95Mrzk$rb>vhPkO^2l?Le5#2kpd+t~ zm@&^vISKbm_}g!mh_bJg$_o2RRF%C&hWexV_LlwGa9d{v+TNLtc64T7?e7C3;a_#| zn?q#!lN+1KH znOpW2I`_8ZA)T$+NavnxwBvn1a+pZ=y#K{vDcN`TB76T-viC0{ibC~7F&%tX#tgq- z5w`JWr6BQgwIt<2y)^G+v%KO^y}G`mTHn%KVcA<-26fgK!26m=F1zy3j&}u-_!o!S zWbgl996ZV1Kb7qL3yI=T-Rn}i^HBxUb*G9OeXT~gslQH~a;8n3ccevHvA3v6?D^`8n)xwPEJH$b6EW8gWT;$I)%CWcZrp6a_c*TNcxMzEF#-?F_G`Lp41;uh!RTJt1_zWK^0wl zvz7r~Y+^>AIYdu9x}TkXpq-b$Z?~|ly-8fNr%uwayGGX1SSjDLvqH7Ex1Nj zAhwYlGJR$dIX=IW`h&GZiC-X5N?|^$pb7`7X}ZhxH00cVO6;-2A=?giGc&vPh848$ z=9jfK39DP{#dW)PN}8H#WX*NeiruxHQ0-KlqK*AFL)y{r%pS z2U8zbh4Qc0h8i!l1!5;o`fNJX6P(=DMbB*C%PweZ<(0NJ3o3UvifUU(9*uR(+UN#uJiBuczi5@i9NFDfa4x2uCi{ksAzC(o~T9XjC?*VRKw zZYMeHX=mr{-oq_!A#2gxEUav95>+=eN@{Bxq&utX<#mFF)|SlP|(Vfnk-ctuUEf|91) zqO!Ucad}O%q_T3Ctg7NYKxBA~C-T-$BQ+3nhzg&DFRJ`k-Pq|zJ9l!4;_%hQ_I(%D zxZ6(pZrXh$crAK&4#fSMxg*}38 z`8~qq{6nIYcLhJ;h?#yGhK3pPg7+9sRz3yqNx*Pyi z_J?2<7ijL%bBxHMv#h9sGwkU6UQSGIZ`g*MQ{33BQ@o9tC;6K)-UCGW@{vUCsxR&~ zuAF@Q(xl(I@67Vr^LP=h@!1MN&4aay@&PYh>CFIh(e+?^{?$-t&J`M%b(s!jTw=iK z7n#V8i!5||KO0T$=U~ZwVR&*M_nkr1EdTgv{qnC*_Wdw*|LtkN?Ros``oMVC4dqEEg}H6-1l8Moe|n-Xs_%n3J{<}Ei_miQZN z%jO$w>$`%eTRQao=`r6Py!F$xy^m%tYD&XN5S=YXW^B2u<&v`T<|XVhr_??^-mrCsl$KIoBt=Chw%Tr9~Khn|Kt6T zLFE$Z3>A^avj4e*<9e0KjeWjNlK3Q1nfhppF8#q4L-zeGro4Ocmi#;Mw!&NS_9F7# zP(;2PiZ8~)Mg8&EJAsHKIV5>aC$-)rhoIF&MsOgJNev@1>Hj*bwGrtY{8bvsVY?{t z$u>pm!$fVy{RBhqy#!Oioh{a)n_FxpSL2pGr?MN zWs9Ti;ufgl{wZ^E}>`Ct1>zhiN2-6hq#vZRVmYTkU222~fqkE#Z~Do8ziZY}rzEG$FqF z$UEYHMF-WIVVUKaD>9uX?4OXN`)Wba=>6w=7OHv@Q6%62~~<;UJBktAL( zQEtCjs?Rx-Z!bBLg;efOi>U2LjjeA_-O#X?MBDb*`gg_u!XcaFkV|sNC(l6&LqtR| zO-Gb69M8)+`2BKz%&l@s{FMqt%K2(-&apB}$^Lw>YHv2avpF-Sp*cOKsW~mCsU>Yg zBZ-E00TD~qVLMreOj38B?Z1M^4e%j*|1_c~L`alSHE+ro{~vqr71d^S*@b$-vH`e3((&Ge>9ZZ8A7GKB~&}RKrNOy*@*WZYbE&(cX@;9+(M;htC2VC2a4?lm^J@&+3WYxh_RBhj3pZxB9 zjPmx~ezh�Zp|XL9G=?M@d^qXHlE5qo6J7UjeB|L%z*YD7IM(6*gN@{=gLKTx_SB zu+H~d-8~0)W0@BR?Sv<m?|E@CQ^{Z-!d?UC`iSKHiLR8SZqY4qUWipBl0e9~y9x z?K_9l^qwH=x=^j#(N8OG+ea^NL3MWhZoiu9-hjG_9%g-6FSov=C*)rN8Ad;%{K0Qf zX1NNgtv5iu!?tHl&L*R6&e*F*tmvn28S@SgT1W1?>@4p+=dS5GNy_Lz-dG!|w_E#t ziktS+OY8RemRIlft1L&)Y3UwTb#Wj2UjbRh^P$*cF;rTh9thMA(&VuIVT+^rjlEX5 zv%}kckK8c|-HSBz3^>a=&$}nLpC+cYqW%7sW0aicBecBwLp}vH2kC{C{fy$W1AZmN z2Lelr_A~z#kZb%sl$tF>9>@}Cv{^RQY`5`Nx0Utzp$&wiquc%V-7yaB9aQzS4W22i$`ycG^>%MEsYa6x^HeYv+X}sc=&~S;6Sa*S}szG^! z%5xM=`C0Fj(lgZ5;?uOW!c+A0{8PRe`jdYD3Me)D5_urspbW$}kK4?bT^?Az{rH`= z_WK`hA@)q{Vzk~j4{jW>3#%J;5!RyqhicUKP;r$QUwVa{P&D8nE4=I_&qqHu^q0I9 zITxwQtiOCznHT8)3aHxg2{f2|@}kw`hoK|uR-PDHvE#t_db^&*Kq7H!V1M>(|x&4_6za41{&t^bSnJ3o}y9Q)}0~co2eQdC9 z%$=>hhvTH(C2&*k5_u`3q!7(0IVAaxM`+R=&oI@9*S`Xq4c{K`-tyV$n?L?^@WIkm zy-(L}ZGXAdtPzatYrq0iJY$8=owg#kQa=^dgdB^5?XK#G<`Jwv@f7<g-W}NZTm#&0_ml$IF3oJ42g&Qe$+MN_LjUz`-_5MvZ)>_D^_`%u+m8bCRE_ zc_~xs(6p!OaP4GrM8@M}f$pJNsJpKgW!_awGViD(b+^>fbLPJq{!7p$ z19DydpZ6k{K!I#h5M;i7FT5jPY5e}b??w9mycdtu0v$RcJ9A7e&KyPW#cg#|<}i9M z<_d_>yQxC&X6D}-wt*gF33+Z9(BmnPOA3T+k1)`AMb4x#5?*MS%4ZsmW=g|Ld#VY| zm{jvK(e<X{a4NaJNpq=skTPZ8wa(TMYgHBkjO)Tnh(EiV{I*5qGO<`te-6wVolMjAAxzd)|z zYLq|Nf-(?hP>8&kLU-gr;8Fh|i3f!qqF05K_!qhU%Bf6t%Hs^4?tXfB&Ye_Y-c5A< zYijgsJV{-2NvSV7uPP`yos?gEGAXZQu0RaZkb*Sm9ajA(ckgV5dLW!o27&~|IA16x za-f7Pd{yQZ_pAv0deR4{+|T0aMl!;4htkFQS2ePtONyM5b4q>b=_GyWNwvQ8m^!!Y zh-U6UEYgtre>D_1Zbp0mrYHm9h-MEU4+2MrG6EaQ$q~~Pl$eQfM&kWqmS!Z6mob>l z&mGXl6`s?im7Gk@DLD2*)X zEs$2W>yvAmbG3CXIq7vR+1k3+?2OvhY+cP^49ds`V;R7NZK|3bpQ}XN?5fc(bSfoi-Z#dXG=! z`QyIIqx+Z{eI4Aq_9lL5Lv3VLRh6{1q(a?LQkvRWUYy!kS(IL1Rj93-Dd4aCne=JbjxtE>S*cDn* z*($0kZi=nbHz*r&YEv8YD^nYb%F>V?ZQWb}Int1c>Z}6fla|=7NBILosB$ugT8zVV zqdRuIjflU~<>hg`--~s2&{J~w5>2uDv|oD55vIQC0Jo%Q55F?EC#p81OV*Imk=mHi znA(_Ao7Rw5m0mYjAQ5TELK+HRXa8;1K?TZURHL5}^%$F{E$)tEokZ+VAKClTC?V*? zO%Kuj%T#&y8Ae+3QUBcPer8eWK2C*xPk2pcued&~C%z%MN8OOxo?f5Pq^+APphOz7 z&3}d>>))ZwdM)x{wxImMu9uA%^GEG&wzqb>s!5bhc9mUf)=NohLn zn^kitpr8!Zw*`CImDzhjYch5V>r#7T>(slGYE`>4<_xH|d;_^iL$M|5KR_NtHF`Jd zowiT4V0PZ^#5i0(;zT+-YU_98j!VeiA%di9z*E|Ko|fErl95q+)K6b=FrcUq>B!y3 zuE^ROQmO3|RHg2XtxWDqtePvJMjG-geu7eq-;f8g5_ur&UNt#w7;ken9_n*&K7Y%^ z>)1W3zw(rs`4*WfnH@lj*%{r4@IJ+~eBZ8xw{ zO#?(}{RK}|?OAF{%_+LJ9C?Ms$NaMMj|S%EqUSX0FeguUI4m#Yh)_RQK)dxTC^r28 zD$xC?wO;(H*>1`G4%^MwFKo3sd1sq@{{v%M?|n;F8`97;h>5JfN{Fw$-m#yR?CJC z?bvqe)}J;9A8g0(er)X1jx;phaR{rsi4oP@z(-eICC8T!c*=@V&0m1(ef(_+w}`{p3N{uAj~hF8Sl|*qWUv17X+kY^QtEGjp%n2^&VmLx-SJ zlszaMbqmpt;KQ=2wR1bzov&4wkO@GuF7w7j~rd=Z>B!Q!bR`rx>bg(v7Bgg7Zn7z|&1v_`7f|0QjOwFWV zVHpS3)-hmb69o=-5^%H^gR{K|TpWbp>L>t=V+3HG_~7mo4mjs9z|R?A|M?|EZT}IH z%$Gt2nqQQS*$LV1HjqWYf{yG3X`X(N?8AnnfG|)63qi$=1XXAZDEM)&lhNpzOh(V7&hh{C zOyaSSN%8_MdM4FWHYEFpLlR4bp2;Y5m}%@QWqABdqA=n0A;QmO(XvTtto(68y!>H; zOg@&7D8DOJ%12}>*<1h~(h!R@Xpn|1$90hFx&w0EtRV-7fo!5D=sf%&mBInFA0KHD zLlQUoRZ?jDOj5Y?rBWz=rjR5)NsLyE%j1;yWeLi=GP!a@rce$iCMkxbX>$TXkcN1q zA=P3jtQAW4!us)|wHQc5*L3Z>?TLan}* zn4unstPJEIi(Y#%ZqAU8Cm}C}4!It|kmVf?>4B2z zR8HJvN{IZChOc~(EJ_|tiqZ@x#iiU(DpRj1(ozQ!b5btG)}@|}t4li-SDQW;fWQ6= zkZ+%la@Mc2|7(yBvJLql=BOV9yPM)rSN zPT`K1`U*x1f@5yxgvhVzM9F`psM1a+X?4dGd07YLqc$A8F0pKk=jNJG;0A5hl&b>@8)nz@HO z2xlYY!`L7n#&xEWKzdq3_IXg_9W+ww8*;riDC$B*sQg5cSaVPxpV618%I?x?^{r|7 z`OT?C1hD0$MqhR zdks|In~ikd<(7b`Q*~VVq4EgLo}%cC&OCWeb52TLT~^ zSplUei&5^h9r-Y(FKgUwCmIPZ_gXyNZ+3W7FP)@tPaI@K?(bpByIOggrh1{Sx+*5O zv^=pOzcj5lr$k?(FVvUj=jW9aq8a@~`ohAw0MY9{hZMu_A@A?p{c@xM)&FQ-Otq`= zvpRRnhb;uVI~|@**Y|sRpS|kIIe5}X(sLj{*4)d{RCk2yN}43OdG*qQ%o=rZT6K15 zdWF7JSE?_~F3vB`Eh;FQ3lM`eq#_OZR*O*fe;M*&Q2p<;8TDZ7m~3=68S5aJ-`wNj zczFazIX+Bg_g$un+fMpPYY(y1rF%m%^gY7djE>j>O`Eb<*^*JJXw;V_*XT=AEAxxB zoIN+NgbOXJ(~N)j9Mi&b?6MRNh-kp?Z& z@H+cnigFN@$Rn(ET07O~`sY{&cI(hVjKzg}_AbYsU_JUC5QExoc}3J-rNx#0<)_L& z!%Wki;AX0i@^j^fA`4;<%8MlZsYQZ=`9+bt3kzdA=LX2ueFi$iZ=u+H0rEgl{crdC zt9qvuk6T>+7}@P?cyZXw=EQwlxBZWuy}KUbgPKM>!>eylqf0L{1jxZ z>S%b5?1)4kcQ_$0>PT{)=txc;|8U8ifW&p5LiUbtpu}uG>H}E_wRVf2H97t|*6FzJ z#%WWdQ==vh2gj`lJrfQ-E%&jkx)D-%GPCcY%IZhd2eROCtKE{}1Lhmg+}dh-_<@o0 zo(W5G8`|@(A9D?^8o`H`4|#}7u2Ez22I$hPOMVLNg}@}uxnOnDS)N95HatakR-BS> zHbE0}Mv*!fK(p>+DBAf2RG58@W?_8uyxHc5(O&cA=Z4mAIXbr0V()~JYsZv@M+3_K zSKW7Em7_TWCBtMv!Jt=U4%*+>U8TpT4fsjbmjmUhODsj=MUGPXSC}g9FOe$tVzhED zfOg%7CpN{JNlpFM@r0eLIt!n|X@~h0agsbfM*ef9k(O39$0&>>A4;4lq zK)u=fPg_ktA3C*a;faxze;j!9r(yTA?baUq$N;{%c&5(5S32z6`0!=ft7PM*tn&?v?pk$oykehF`kMkcZ&Qe-dj3Jq{UB?ePW+@ z(4!~37*P}649R1fujmooPw>dse=b1XCWEQ=&2OE(@x@1n?*II4@6__eZ8PgvH^8<{ zm9T4PKA4#4z}#8`mi8*Jb&`XFs}!8Eap39}^9t)8J>!mxoWbKIFNp-v3ldTI!h~ zcDq8s-iQZ|#$0eR=75U{8(d9-0c*+vH&Z6y%z^-KJ{N$t<`W3n^bN%9TnI|b<&c81 zz3G@;pvBujD#;a+JV~IWQ$Zf+3kmE%hz$*f=m-u(ig_T84gq08C5 z7NH5Bj7STgh|q?O3o^qV2(m-p#D6vXXN~?>tONazPaq6E4{_*uNJ7s;8p@kzV0S?V z-Wt-;^PoY`LlSx(6aju9V+TQeC>vtcoX1{ zGzd0+{dXTm3hLv`L>jU&J0Q#53Ni^Upd}MQ?L~zoKYviMf+3N|11UcY;=~aU9V3{D zl!>M!O7VnPEq*9Y6WW@K$G$f*RNVEJMvYpnW ztd=3wbgP*@vzHLN{qAhbLBO+XORAVC^lXaBWG!|RzC`oD8A7RV2BLYWu>WO#am z+Q%RHFu^lP+|cK$(1@prLeb-dXvui2JnDXwCi;#<7c(p_h#3@BL|+p%Mqdu^j5!~^ zJN9gNZ`_*z7Sa%jG`!Bd>(E@U*RvpA`yhE(GtlE4A(x0lnHUOaseUgtLBUgMPUu8( zsNkVWAh{=xj=B@4h`AM|jU5z~#9b9M#$Ar+j5{CE8+TgJlW<(vEj>C1Ab8cs5REjv z&YNdiEkPax@gL*L3TExQ+86=B|9eWk{yT7-v2DuUIRIRwAr5Is+4&>npp0|DYahgU6Y^npAs@yX_2jrb zFCh~jmwC~~ihKia>4Uk~bYa4aDY4O~Rmy~8^7O=m33_k3{#miEH<77D*_n?yEHB#y8 zd%c*+{YxJ%Jed(0b2wEd+n=ma?p9?dcP19ATjjMW&4~?&Mop8VQPZesQa8*22wC|t zNH=^5={x5`9;$H)(LBmxr$3PoV~FN)m_4f?*gdKyVMc4bNkg@C-%FK2+*8FNqC>f& z*u9x?^6qq%sx2i$-KZ%@txc{t z%ysLl3U}klDuVU>da~2425;Qe78>JBV*vMX1xM6Z6cO8%7cFnjPE4xPrKVJBbJI%G z%e2L5HQJK2T5V}+ZF*^HU0P{sed?Ql@Rc8d9BIhd`2)(`qrQ7PwEvI1(sI|$)0J*J zAJyW`Mw`esgKb{8i~A__#8&IbT!)ijN0@9ZC%=%06x-?h%{uPIdg@t>wnaXQRegqn#ZvLorAIMK?C0Cb{pCJ z>RyuD=_8lvo9J#9h4mU>=nO_d4{q6y)pD@+vd6xmB6EoN8TG zc8xYWt2X^jK*WlVK)K-y$lCQi(tzrJyCuknS@o*Qb=^dr+oroM_^m@dM6-)mvDjm0 z$h5vgGbo z+BX40q(Ox==bXzylg-Z zkguS;j}?>O9U{+c7bT~*#HXqnlQX3CS(!1lC0Vgmm08mADx{}6LseX(eG?!?8j?4B z4*FfF{R+}SPL0_c?SJI#)bRl4Sv%qpSUgAz%Np>H)SM4a zNIV^;h&d6d79EwRh8<4Rat`KaF#D@Bc)c}RetWG}*j$_bCLntGhmf}6Qz$e+^}p43 zXddYN=k<<1jop8e))I3dAebSP<=fpMmE5e zMO}_J)s9AIPW31Nr<#z0GI$+O597 zban006Qf)H-2ZsjuCC|iwhc3OZspINJqssrj9gUb>PD%&l-mrx;%0y-VUQISeVrF0 zz7`P|aWyJF?5ZrDcU2P~d?h;}aG*f?CP209J;>Yi0h)#J5!70KIMHnR+08>+emr%1 z&GP>7&6|3j?L;#O%}P%tXxO_s`w<>FdK|zyGpla@}gw1F^LM zc9~X!g>4Dgy5`I{;Iz}OWDTnORg(moe1gnKc7Oip3o-{`{j6wr!~fW1|wV zu*v~zhcvKrO$H}7#S8{7d*O}-o)J7^o{_zxro1TPr{3PeNgrCo6NXRN6MuThL=b~J z5zJse=K8(~C|zOz4ciQE_W$4!#v(8>%mP!BRFpkX zf{m>V>>c94**OL$_=@Nr@yY`YV)w#_zVs%9y!0V*Uoc4QX@3%HI*|M( zpzbGw@vhYdCvSZ`Z{OqjpL9+y{jLetEUH2tL;-AFs{_MLYB1WK2&TK@z|tfdY|JHK zZz%*Ps|awl2?wlg7`WSo0N$PpLf+`uEE~(?;5nveELo;{P1BBEc_x1 zmVB>)6+g*g&Ego?uv7$FR)oX$RUxqJ4-S~D4F>adOt4%Z1lE5Bfb9lDatODOBtP;lK;0ngW;9B~F;JR4>^2!e& zXzgbpKpNtZ28HDcNJ73?GSZ+#8l)sgh@)X4I)DgbwkHU}y&+ue1EH}D;K_Y~lk5ka zRDWP+_)oL5{hzY){U?G;{Kwf9e)rk6es|f;zN4&mzu5qf6(0Z#X%KDx4y3ylAunbH zXi&d-3d&-r-7P>#bOb5Y4dVPs5EbkN;xHNrL=51^_(7=5A9zUtz)cIB=41vw<>&(+ zvx@^Cuqy-aaq0p_InDmJSiOP6tiD+RFQkFJ=2MU$4RWO6wHK3yG^8UBA`NE&YN7)q zQrtl5M}jyO1)@TIKrHkHLA3uXzBKS|idgqKmCR92J#&QH8Z^w_ z&Abt`pLIR3KX^948-0i2A`Q`-zXipvg^-H8nAce?9cDY|aOTL9vPT{z78DE;$e7*` z8$zFnitwKnM+H6;NSTx2Nvw&`jNox@A^RS;l0Cv{3?Alo1`o3Pg0C_A*_VQjaW4j) zoE4y5uWpnTFD!Id)R_-8oH|HvQKj#wbDDNEWc*v>XW1%P6$HHa<=tx5t z(h!flIW^Mo+6T#TS_8S5Ey#y4LAe}Tl*@5_rJ)j^CkJ{zQF0j%W#NHiv5~=dB4wOg zf;8S>SYgO@ZbRrma8KA@tYcwkIq16Fqv1z+NBD<$ha(P#%nmSqe;@crgA{2<*@fm& zp#4R?GwKh*pgBtJMktqK{VJR6{49e)m`Dkr+*flLcT{0PHxnY+*CUl70})wa7eY(< zXF}>DPV%}1$9RVXhe8ht4ul;P?hQXE+{5n|?S2E`xAc7wto{V#o4$rL)Nh_+gYqCw zs4oVy5%S%4Kt93Zc^=tuBG=0u&1LnxoyMULs>6Z?q;cHy(aB+_B)W)W!UEx;h$``Z zeygM}e7~euuwT+G+%M@ww2Ssj+TH;8BMl;?L4h=AQNOv~7G*J<&^ZjQ=p07(?Jo-n zrcVmVcK3@sF(Y}tq#K#R^h+sx<{4!S@0d)%KNy!L+8doK>5VLl>XbA@w~2dWS|okZ z&5?c4jZuBk4T$Z}4MbpC+rTGSV`WvT>kG+s)wx>Mol zG+06>Udm(APwB!~M^Ysr`;(-C-bA&eQ<@pw5?>VC5LXjd8`}|A9orjM9k)BKDsFdd z<=^ZV%=)X|cjKwKB3XDLtk}kr!W)Sdma7Z;=+ux~0YP?u24_PkgbwH@;Zb z8#f!kL>i)y2KB}-A=?Cb5OxdEOpYaJHtMS9Wp3-yS)^O<)Dd@GZz9{BZ=w*6Rr%5P z6|-60dVXk2R+O+#E03y7ONlGhLW0c(#KWUm7@)~KW=p5ww*hIu{pAbjN8-hLu+dYWY$%2L(5AA zqQat>=$w2-e1<+BBa%E=ctN`!VcR`9YXg7TZ1r}eU zewgo}+;PEVwadc0jhID)yD`6A7_!=Y__C8_?@7E{^C60NRUad$u!|Xz-5MfHZxlr* z)y2ijs*~igm6=LOd5KC;)~FH{x2mEF+Z3^R9f@)J4*6_A=;C)lzVbuRZT=LBEWdr$4O`9^TqFo^#h}JUBf5aF|$f z0Bxr1lO=HXq{^AS`HB8L)e67P_Qas(jzmsfr<_~UDVq%t{4x)cSG*6o+de|GFi{VJ z?Pn8p_MhHvxBc?sjZHrtA2s@Y-y`ewZ4(YgRSz(Z1$T)=-7N||X^_E;zsd{|UkVX~ zofk=h&m_bIoYKV8Pw1uI$Eu{BM>?g{{oM(^-Q5X(owEayMe`tK#e0yy{R4C!$cHmE zHXq(^via!7VZ(1u-QKvU|KZM+-A^qxHN3DlE_v>1r+-Rt&zSI{s2=+G#f}983r0C1 zyj%Q;pqtSm-$7+0^@c9m<9cZ{{#r{k{$gJ=`S_j~&m*$~F$>=T?aFtdc;|a)9>;r= z^;Yw4bsK$hW@zR2hwpFvb@$|sHLWkrwpD|TSphgXWCPAE9cAfNlT=zf+EbT22x9W@ zaXH*O5urgN(cyl#6%jtSv=QF73L`vj*GAxmcMIKb^vw#$etH|SR=oq|#_vFl`P*YH zChwlVw&?StcUS({H?d)9`wPSMbzo{(2G$mOaB$E87OQzdAS$LjDe+HeK9P@o{RHDd zfuRq$O!gQ*IOtv^+y9=7!?>5iq20@2Q|^^>Jnz_fS#z95leo_l#prfA-RNncvn zL?E5}nC;7Y6y_f|E(-7)j}4%YD*~zG=>gv3nX>|E^9`VQl>yZ5G#Ksv^X-$v-+y>u z{I_qqp08fm44YO|!;U{o(K_USrD-bISgXL%K?)d`SioT;Uy*RamtF+^3o0q}Im3hd zEWj&xibV-}%JcSr%BM0W#WdPvJdHXjn-$3U-TEi<9GTW{P=Otv!&lQ zqx!!VHZL!Q9qV(zXlp8%?NWf1SpwKw#elP|1hDo(z@c6TqH`#Cx^lrA%LX4e<|{w< zz?ndt|4cBR@sdZNy^J7IUkYahO1?38-u{ijxicRc92)y#UhmY;pR~ZTZ);)Q!eZF? zdlqc{BL#MBNJQC#I56K43D!nJus7j@vsoBmEO_8<#Q}nKFpzDS;AI;KR6Bp5+xvo_ z104b#sSxbs4V+n-YW&dP^!|4Z4qbfTVE5B+-)(_KpHw0bq5xJc(80Q2lVQ_x8Ejh} z4LjF~z+@vIEH;ON%~mehZ)by(Aq!k~1Oawu0N{4{0l~-@$i{T=GNFODDHVLoDBx%2 z1^(uCQo(%_1@N1^fV9~IJhqU4g7Drt8%Orf zGw3<}jzP^;1B1fp_uk2bFW*mwA3l*H9g(o~y9ii0p9gDyV!?(*0kCDUFBtwxhh0l( zV6v17X1`Ow;&(5wUgiO|%gJEBoCHoQ2;j03515rWa9`zy%mp_ftey>UUHv`~SAPh8 ze|`a6lZzQ(SUvF)rx&i>BLM?c4MAU4!F+7{|XJ5Rqq37)rSy()*%F~g8;3A7_EZ{ z?f*w$w?mkR3GnEazz(tnW{4vM2%W(<))nZ97@(=K;GKa5irx(R(QTU2 z<~HTsjeX+XhkfLI2s0bthBSC04M8Yh9JcKT5F0OsDC9{+q1s!7>fQ*F355Ar0WZ)F zf_Y95ByfR%Xbkwt+<=kn4nA6UpyuJgyBJZ4oAIv0J*T$do>IGT6VyHKvj+kO(2451z`d9z~wkYa0CXJ zQEm_*#eshk9vJC(@X-^1R!W$mRTG}m8VQrM4#FeaZo)&K1NbqYqqtG(3Bqgu9({M9 zq3A-Z4Xa0E}PZ?F7 z6Z96(2lO7#dyIaM5#OWaVV@HogS0bVSE&~%SE-l&5%55@zaP>NzV`FKXFA z)&Clt5hzHOkU(>I5fg}+67%pA0wHNUJl69*Pf5AU&Y+Gki>S8(8fZiQz0~WzN2phP z&r>fjuK1ksy+S|XH^4aJf7y37!29?2fcg6eAVL~sNQ2twCp5=-31m2}Mj53|D6_T` zQb-oh6TR)9Bm`g{#&GfXBm(k?AeJ&5nnWGsWcgfUl`<{|*85)Y-|c(W@3`M7bj@Rd zm;4R|Ui3c@^jE-M=7qr70P52BAQ<@|k;n&8Z2ksPjTfL9slP&&^GcMTo=VtFNdq?7kJrzXE?$ zUiLX=H|H#?;|+ihs(CrcgNQ~NRH)B7-30Z6*e*tWT4-LZ+q##z_^nTJNXGYdp4KC& zzRrUxHhv%>!s~ogoX;td%I|1|Ht=A09&>+aMev@GW_Ay+kKM^V&29@h&29-j&1nif z#c2#X&1skgU?2@VP`A%Dj&b;VLoZ+ojgyA8<}*c zODQb;8D%)-m^6xUFjgM0H!_9QE6!$j3X8d|5p}#Kes@S6|7b{U#EFm^M3vw~NTuKe zZ#KXe)x4odLp;g{X||yKf0VP%bNCtcCjIuJ&~4@8LfrbhMZ|4G#U7@Y^JtEzb%FRJ zDIt{oDv@umJU*y1At|^eHiO#`T@YFmSrt|xX$vnE_lK8Aj=m!?im=Nbl;HljT~63|wUTIZwuIt%RPT@9r{j3{q(=C* zB}X$Gm5H1hd1^?7G&j5?zAU03wn?Cm-7CnAJrt1}dpJTLdxWo#IT}725P)i4KGGmx zjcV_$pF^(2H)uZV{8vRT3nxmjzuhf&UolvXUw^R?Yka)Y!{IaRARF-gjiwgKGy*`?!%a(_y zW~K_0wR&-4T1BKZwJkC}WnZLBvtJ@t_lp$C2Zgf%!KmhyARWnnd;nS7KS6y#pQHIK zUrkgve|x9a<-2PgPCuPJY5vE-1CBeodvLZ*tzK?b4ZdE*)hyrK@-S9LiG-&qOyDc? z)Iw=)wj?IIG)j`$93|52jf&Lni;PJ>AdXAz7tIE+k%lOwL4!2p7=DcAaeg{e=J5H$ zDyJ`oo1DJ9e8P0W@e8J_`i|QhwjOY^uIcr_mUPfP_02(yjCvkZT`lA#R>X$Km8yi1 z#X5le)ia8cbIgLI^>c^~vUK7?yA^1>chTN*#VY+bYEz&uvZci(k0~w zcccgd+VaGVmMRIYsY~Khe?aV4bwC_c)-OT`{|Vs#{0_(#y#uJUKF~je>ADS(xyKB9nc-Yw_>jut2GvJAnU7&kKoe5$fqdbUrOvq*) zjt}uYs1Bzd$Psv=LzGB;9YSL7A%SQ6Apy1VP{eG22-W_|MQ@`Xh_|8C^zEr?%eQa0 zn7(uF`mY}zxx4nuJ&(67XnktBwCaiNI{moQPVJbxjdGOi8he{Y6y6N*<_&UugRVsc zGOol1Q!XcQNq=RB;LeqYV$QUOx|}*1iamTh6yJ9&Y&IYY)&81AsQzE~7V1SYcvx?0 zaOLE$^Nx=!`FP)>wcm6+-@35=mC1_I8S4!>FC2HIJ$JWIJS92AJn_Z~9{ZAbkC>F8 zhoL_72NGYeF?j&to;JvBw20|)yMgI+YcJFB`U$4fxii7D0SVu~1?fK1tzx zTb-K>PTlY4D(N zo59(EkKQ^sw%~)_r^~)*dA07x8rb?<2^g);26MFkZ@ohXj;1JgU?l>AT{w6;ae?N_ z0zbEa85WNIGK4^RAtI5d6Ug{!wTIjD91qv&3bNCSE|SxW?tcW*zBPct?+vCJ7aII^ z>O+GgcRzo7_vFG4+h&%3RR`-Al)}~}Ik01O3K(yYgT=O3)Ng=hE|{WT2Mg5WV9f?k zJ0{Q^{K40W0YNTQ;JSLE;)n>*SRBaFpt=;)oSZi+knwb~JrlrxaeWd`5)`WridM;RR3Yka_QEfrkXQ^4&{Prz>=19>A6 zyf)#0w#f~AH)9}h3o6dGI?ck#?zav0ADm~+TDe%N+9DmE8;-Y~;){udL>WRTyaq!{0BBUb}=?I2jKKF;; zzof&eZz!<#I}h0KJqfn_K!EM@aj^48H!xm+1+$-A!Qv+uu>RQz>=rtJ<3f9IS!4^? zMK*x@#R^D^X9Mh3zlS`C4}h@dBkcZ4jDY6llL30^HYvpXWxP`)&pAU_u_<`On*lM&9Pk+y;Ff0zm~u;S zZL$QHZuGeial~>4bH;KSbJ=1Fd)4BJ`wjEQ?nD0wIIVmS@UJ}x^nEfAY2X;n2cGF- z;Mpt(j>}pICT)Ts+IH{{+y(Ry6QGIAfD&f`9!g6frdt7CZw2mU)_`ra223xa-+Bgn z-1<58ywy|p0jmkzHOq1QpyfmCkkvl{E@2}!mncEqgN!&%7 zNBAq&4+%G{#@q*OM%`}O{u6**{vME#2f;vI3$an~ zLAzV-!wxsyZ#(`I;EwW>uj_w*l&cLvo|I_Y_s9=g1kqN%LnNyI1*rB8_cR0^!x-2s z3kV9ge&ru&`;sBEe@@dlOnGNJPI{I&KJjRDd`#NoI8Hp}I7YnWFiN~`e~U2WIEcUH zd=)?HatVLe^`8Jd(m+M^e-O&m@{t!4wGCx+j2A*Y@*v{TJcwwt|1TsN0^i4ECWL7* z%?`Dh3X<4A@s~P2VrZNm`eZxbr-Ufi#Le zLmqWI=`rg5PXGyB-v`zIY~+K8P(~?k+qWPy{s|IMFGixnDv)9}Kooxa3o+IBDL>Hi zaY%^mL$=uA9y7uDPJqVcwr{rUO?oM2kXDbq=G}!I@H&FM=y?r$&hw7jNv}J&qm(=N zL*65V1Jn`XKLH*~-v!^-wKwuXB2hj_y6qcKnk;~1t6w46VFjpM*H6oE+n&UD89fyH zTioUIY;T8%9EaEmF4u!J*ee0q?iYPa@aO1tgwsBqgcGzw#ACF}#6vzK#QpSJq&~(l zxtB5Q(e3-sfY*|D|E_z(kOvX77Wp9CzD9Fe=R+#W|ED=Fdy$G+`$Ub~{6Oxp>rR}n z`H+NTdzCMAz7!IVJ;zDLonq;T$C(A>!-16^{Q)hW`~3HM?eRO~)#HEDvom1Ot37a# z(t>CXy77;IH>$gXUe~>^>;JW%K+?7^(R|YH(V00vztTA^d8)&#exSu|7}1ao2NiU) z%W*8bvl71R2|={`k?=%fe`u=5US5vZZceFpS8zSGoz+8YVI8M6vaZqUgRjwQ5jE^< zwCY&^D!M)k)mcJxKV+!STC@ENG?x?2L9|^ko#VXtQI6a4(H#8RK^<}HB@NZ=bYh_W z(fANdf3z6iCr%*u2$Lxt5jtv1c!5ttXceP2q}{iQchI+jH^3+h8DNwmO5OnY{5tRN zdQXfrB(6jI|Jy%9KFHUQYyZP!9%kWKzWb8leEg~bJ$~z%42szijlcbVB?r?hjUaTy z#dtPHE2#C7Gf5~q)fU$Vq-}PP;dM;F` z*E=26-r43~zRY+0?ok0|!JQ(vU#^$9tvFwd-Eu6~!*qWJ!@ehlg>6d;BQ_>Rdeum! zwDLIh|Do$I!>VAq_kVcyrn^JBySv$&X4Bo>ol=t0jiexo2qJ=k9oXHiw|d!nS=eHu z>%X{u&x_})czAq{&AVC549Iz|b7m6PE*(f2t{hEUSuvG9P<|kzuj~?XJ-x5&MjDd3 zyzJ&*fh5$LdGneRYiQSF$NLAkVU;7_w>92EuSb2wo~-keyuHy~?c#bLlM`#g=m%ED zyX{<_?myL?A3E7l8a>up6F=P4n!K{1FRj0BIHR{_LuP07bXLdGb6Fi#S2H`Ru4OE% zx}LtQ@n5;y2Ns6S}syIC`k7Dq%%OQ%Y}JcY0^bs?4^giR|WvT{#W)r*j(WE@wB^ zUCC;$y_VTrb1ma4X>GKRcX7FHYMlAK&g_ zIK45*cI$W)V{#EK>BgB{xgY&LF+a9ul*;5#&y9@;;WAKf)nn$R{_liD!QmQl03Kf9uLG6FE^WW5o$|qf1k&hMO}=hkCOMR}Saq zt=N>GJ#a8TYx#w|+}=yMi@Psp7j|9A`WujqHI!owt%_9W*JFY;HcTJJU74P2c4oTy zj}r5ln|cBVE?LQKJL9Y~am?Ft_&|tL|K4b?&K*fX&0Dh~Yd06fRcxq8E?(b|p1-au zJ8SJwZraF(yyP|e@)CzH=B2K>l#{XIa&}h#m8`!3c{u)3NeXl*QeaSz`fIH%_4R}^ z<^Enva`~Y$)3FD-e7kR3NN>FAs5x@p(`?{Wpnd1DNRQ@22>~_x(!$oj> zx|vR{-xa2ud9K28;JG2s)+ZK{>+ajD4c>M)?z-krYrYuHs5uwsQ+7HvxZp%?WcJaL zxYWbdNeKtrQls||q=)Zaml?c!S7zYOGnql#u4RO6x|R_!`FEg1oC1xgyZhu&|7(-? zla}P(K}~Z0Uoq;Dx2kNr-x=|4dTk@N=B0yj-%~gJwnu(ewf93EOYg*ZEWVZOmw7!q zB;{&RMBL@7n8=GQ2_fhEll;$&CHtJ(k>Yvcbc*NETgkrrZl(C|zWG<6LX-k+m_Jw{ zMShH{lIPp>$d#KM4B!L;I+QO-Fp z5;1|g_qQ8^Twm`Ea-KOK==khzh~vWt zp??FKQTq=Fler0ga&15A|BGzo@CO0P?jLe&n?aLz42(nv!Ca;jY*iY-LAwgv3`@Yr zEDwULGa-VW0&xy;km4K(Ij+G_;^vEg0D3^Drwaxkoqlcca{PJJ)8WSr54*2-y#5As z@uLUAOK$97Cg+Z^k|WpH$nGB^)G3f>9|H}(m7piq1tv00V5yAvFKCv4vtB-Up!WAO zO@&~K1c3_f}Lh2xag#Ur+y;%VcuXq zXAo{04zcDTkYwQx8CKqqZ|w$UHqKCIYmW^w8Vu6#VXnXOe4Iiq@1l|uM^S%YW+s#O zn8+~jV+KKjy#o|@>p@Mp0(2G?f}wOa`eSLBJ4gb1)p&4Gj{y(ONbu1Lhd`YW2-OXQ zD1ASOH}Hm3LpR7aa)Cl)2MmwXq0Ynx+D!k()vbh_KEOowpJ5`KZcxb3Lqd9hlc@!S z*=j(FdkHA;=VKk2pd+3PhLZ7ME*%5bvXNjX9|leeA;3@!0#9Xs@Ky0evEl_`Y90`+ z&VU3BCrHzGT%YmDv5JcFsL5eFC z6!_vnO&}Vyg~Gu=Bm_*wg1}PTA8Z%-f}MmnI7xZ}L&_aIq#58N;{t&)ju0wq4^eV7 zh?ld56nRT*Qv8jh2bsu@GlYy?C8Q6tAPr9mDfvt&*+8YH0vB^U2(w0kBzq{xaR!14 zw=Zb$c!MslCm8U#g9$$a%mrM)O27$df{tJ>WDm|lbZ`^41uqdB@Ds6wAW?G&7d3?_ zvA+S1+ARNh5Z>532r%G+Fbe^Qz}{XY=4vAXlprih4MLK&ATUQ4d`tAetJ(nETMfar z#|WHP8G++EBXF292D@ovpq(}b+dIfpBcRO~0sXZR*uOIbhYyB7on{UG1~_Z6{O3W; zdoe-!To8%(I7ef?IGP~~(f&#h8KDm0iP{j7r3V2;2H?BY5WJd=z`ffT7=y;(vepEg zHk*LMUK6lCWdihnkY^@fH)9MAuZ_X+ozYL{4@TdeXAQqQe_9Z5(Pa70g9yO$D53bb zeGK*&<7~wt&P5h2SY?O_SBJ<1Z3xTMgWv)~2&gmypGITw>NElO6{f%#H3ip=rr^BC z3>;6Ifx|6Ruzzj}4lhl?>9xt6%R7^At{;rQIL#V=cA7O=2w-4l&=)<3`7B0+F4upa zR1%(XPQ)|LiQdW(AF2T{aXJv0t`A|0jUc$(1pMnw!MDQJME^}}> zZVt{j&B5uZ`7h@gvmdT+%)T<-nSOHlX!^nVquG1sS@VSe4|SIRJcxN8Bxc?Z`p=UR zhcri7NbyvL~YrzN-_v;4`p zYB|SvWbw^y#^N*Mt@%gB2lKbCA1z+F%v#O-1@Ka1fgsfXk@Hy$^n)_c56ZR{gKP&G z$Z%JN^gsCa`43tD@Lg~H&1bvy7q7$CpS&(u z&wAar`r!WD>aE9n%Nh3%*3aE$Z5}f|(;of>@Kt4jP}Ki1*!xexd~puutQT8}L4mzA zsp8>i*1%Gy0a6ZvHuGvDJt83Y&K^jkMR1UG$moLAw`WV|LF%H`_f4 z-edPD=mh;i;0^jc|7Ui$d_Ou|^ZVp<$^Vn{xqweDXa52O;rP+0y_2!uoQv6<5(8eS zun>kS^dKtn4uUdIwI79nx}Wo7j6P(in!irZw|<#iPJ5QnVD}_;nf=4)6%O~KMjh{j zZ*;m9y4U$e$SLQmA-9|_g*d=FzbC&&43T=Wk2(@{s{%moU{?z2I^rp^N^LbU6{-e@FvpWS@HaBvL?XP4mb-s|^%s7+U z?Qt@BrRTAvG4I0(Q$7dc_xVi6pYz=vf8S?E+#BDm@vr@+5?=dlN_g%6Hy{GXPe#p| ziaZQ~BkLW4ZMn zhqBwf_h&Bm-IG4-zcXzzU|Z_0z^RlIfg4k92TrEE3|xN$h2>oAjd|FU8yKfx4F)qs`7%r_hd9QuMP58#Jqx|_5KHA56LQVH~#L;#(r#WwF$n)4xv&46N zX-(j0MQh0Fvfl8OrK=}(2Ui4}?&^)9O)X1to@mSQ zSld+OJ6vBCw6eA-w7g=!?_31h~_4W>9rt|ys`3~>2mfSVv zqPA($*KllIsP*cRSf~D>RFCe#9KW{y;^3yfs_@#L=IF|hlt+`+d7NI zJp1-rN^ITbq_Tdix54nnV5@IJ1wqxTD#FVL8={K`y5jPeuS&@7 zolMH;o=!^Zx|o!{>`r1<``!4Q)_ZZee*rRa9ZKi5H}>7T<;jm#8f0c%pWNA{NiLk1 zq#Qo2#kKR8x%h^I4oV|?J@i)W2(;+l5^3MMDS=TxneJURo*!5`RvK0?QX7@CrXx0E z^b)DE+I=rBwQFG@2gk1vC(t5=`d@~8TBA&!Y*r>$ z&vB5G*Tg8(m(@9@&Y6geoun(QJnE+1bI{+kbvlezzbnqAa(k+0$(9`d{LRH7SsSY( zQYTxZ6V~^~#jG1kh+MlPA#C(?LfD$S@sUIK;$jBx$HlI=AG;8ckJ*E2)ZHEEL9CD@ zZ^k9b{|<4GbC=l3!FwWP`yCaw^*0QKhA!L6_MUgqXglp=RDUAGy5eYzWAUM6x4ivX z-Wk({0m*wRL*jNdM?~)EiwfN~8WT9RE!J<-$yncw_hSOqKZprl`yeJ{^g;AOKoR!+ z>oI@OElz$6i;`R0*~rCXY~!DfvIKVLXVT486{dii!-89GOfAU;IZnbbR@J)a+3UVxi zpuyD%27--XzNi9hWs80}E9d_7(n$XqsGBqwZWuckXAuD}u zdgzXyNA2uC?V#IzJVvK|xIwpiKVxtC?v2AjKqn`8F~USHZ=sTt2bjt9S!S~777G~x z0m=%HVeJA{?k3O`tO65p^dF>i!CoO97%Iu&tq})-+UPy#g+iP`0HhdsLyidpmYCt; zcyl`RT3ExFr4>GOVgaWt&Eb)SIsCgIKL(k}wGD)v*~3H*;v8(ja}XojgEl8)dnh0S$!kNi zycV=A$jRM=>^n>)o6Zrk>KY;K_i%jlAQsONN;41iXaaWNC`bc_kPUbVS%SZ?DTIj_L5!$AB#Y`mrkExaAVmvua6gr7 zIf)+3Mf6}W3sQq#P{9jA(!LWC3lvHi))4@_OkN<&%m4{yXZ%@l03}vCP-mloHoFbz zvs;5Phb5SEnu9f`DcErtgEJSJ6x?W7aBD#jj|N2Ws6rgCD#R}c(9r{NLJz`Kn-$!! zm*j5B1>Q7%@L`C8kG}+XMaqDCvOKutD1ze>C9tbi2HQ3juU~bRlu%Q6=*$bV6#dMtk$W44se%b$ZU4VI{@>?(P5&1MO=fUK4`MzK;;q9DzD8USXe|Iic%N0EuOtM7$%1c! z0(fOAgIkdbxKycuW3w9AcdG+^P#tX7YJkmV4Y1m)0hXsUz~UP6NFB^yBVVwtUswyM z{xk*E@1~%(5a6i64DRSb%x5wD&<_ekofT>&0AY@zcnnSwfh0 z)WNk;1DrcG!Erzn>_;?#zEKlsyEVb~m?qdQ|99wY{Py$R<0kU$rOIEEy|`pZF7q%wpjp+A?S0Rbgi;9H~p)4NsY zhi9+uH}@gkFN|^B&#qf^XI=N{esnpd``-DQ&RfUFI*u54CXr>7H^wQ-YPhCB~|MicQykAGKKb zO?ZX=Oh}`_i=a-!XMrn>o&<~-J@VgR^uTwg(LJA|Mt8if8r}5z*XWwZJJZYV-_0+$ zfyEhjusrDjRto`sIDR;4?>HnCJ((Qbmjy;VP>6TE7ubu><+{my&J0j`pBAP5Dk(+# zd3>JUztLre4_*U}`PG0O7MBB#SX}VGY;o5Au?3zCvpVkk z-TJUE*dFi$+kO5(TL=hLU_$N91WDNM&B7WAFt4=)?^!Fe5cpX}7yVS?BK0=kN8v?I zxZ0D9B<=etIr{%3lo;QLtuebA(`s=ss>kYl=8JC{;nc`B*F=6FH}?Qr~Zy905<_WNQc9ri@;ble$r z+;LmfO~ujQc~Q zjlhcr2eF5>?$Wm^168h+MC+U@NHIE@n`?0-tHk<1Mm2ppt=WE8YPaL|rkkK5@&w_9AKF4y_0oT&=eIaZcve4sefa!)~l?T)+(`z<;3PMfkiTqZO78RHpi z+}EaU^jMQR?KzZk&U0nTBaaoyA3O(AK6vz}eEcgAiDRdr-pj|&GOVFi1vB3|EFYH{ zvpw&z;JVjq&40DiLGnzixAM`3P@VnNamKqU(k!=@=Fv75FL793SnV>lxP>v2*W)pi zJLEN(J>lJ-wace3^OR3d<~^UTjJL=;@6P{t@70;{ClHNer=i{}kfPxE0}3=KQ-5@5 zGrjFGWO+Pb%64hxM(IOx-}c%5%&{&xTN+++T&xwrkBb6)$krt zD^b35X;NQe|NZ`I1Lmv4Mm(oh(!>ugcT?Qe6`(!U9%($$oM^SSA%niUcCpjo(lW;K z%36=^@^!ByZDuV?c&#db@^`=1mbZWvQY0W z!Jc<5_M6)j$ZW3~<>@Ld%I$GgrVC>R+((CP#P+UqRoL3^r#0CVZnSn;yyZ}98hxNC z*QvLmgwa`B?b%k{;?q>s>tA0n99UhpDY&xqKydkzE5T(Y&x6W~Uj|kb&jeH!z4BiO zNWgW-!G3d@B*F6s1eVE@cLPe~(TFm+wn>h9dQz9`z?hZjjy2Bm8&~;gjtztw4)w)a z4s@l^dpfe6I$Dd|TAC}p8X6mYYwEiKDr<*=N~GtiNt(=c%aECsGUV>07`eDjnsRJPn`6%g3*o7CjXGxIQ$uVRu+o-MO&L+Q*^U zHP3@{m%a$ft9lu@5Rih}zW_DwQtZ38N|7&plH}=#2)VuubMJd3CYKHqB_MU6FF0}e zXlPvTwy@})6Jb$Z_d;WqJr9X%e-WI}`XXo{APdJYMcv&vum45Kt6?5;WfL1YHI3T) zxB%IDOo?UuumSJTew#&odt4OT@vWA|ts%xsH%D8QZcMT(oXB#{882{4AFJ?A9&PfA z8|ev*8eS6|Hnb%qWaY7tpus!A!Tm3S!}?wXMfAK3TnNY&B2bC6;@t!N0_4UzR&stD z_TCR+_WV2-nLIDYG<-&vd-(}V(T<~za*YQ))tBxM)L*hU(rodr1e@#~=? z+xSDd_E86xIEU`5bqmeeu?i=$hy<;n0a@$ot@1~Dh#`RF0 zq^mJTv6oZKBQ9oJ2cIvd`<<~?CUi}UeGhQrYV4ErP3UF{CeFzlv3xjF2d zT@YBxg})QH$nDjbKiI%Tj_+n7d+{EE4VPHS&@*n*_ezG^{z{XzZpN6q^rekZ{&OeE zjAvd7Nl$~+VxL55M?6l{4}O$s?DwF+%=3PgCF5?ZjnkbKwsyBCXtbOA={DD|)2y$& zpjlr0WM_5eoBcvSBRl!9oQYf=qma{EQUBu{Y(2wFMsKi^<)8RS#}^rn8P@_he~!G$1@lgnNWYTkoF&Zlm?Ju zt^`%~BGkFrU?P|bHlhjOBoPfBGU4Da9}J<2=sl=-LW&yWSFVQRuTm}g&qi(QpUZVD z=f-u+=l1BB{kWiG`t7c^@wZnCa%Yf`OX~?av4cXU4^hd+Gnj$6MkO8hnMmy~9!fbZ zVlD(l%phoTr(mx?4$Ot3fF_FB2k{_qm-GWaJns-9>ju$y#vxh34ziUn>!56oO#ow9 zu4)Kls`{`~O%G0~>B2v%3vp!(GZ>h|IDj6+mJ|O^e|O&^qz=zwECEhR9*8hypeK|B z${aDE#T^cYyup|^!0dyN4>$^Y07H}k-eMTKUt|vv61I>aX#ts1CU^kO0BU4(pi>qv zX_HfjjdE(RXF<+xCgkuQ3fXZ4GZ1Gn1A#e+M$ADh`InF!V5Ou0FEtj#m?N-`U{GfF z15FMu(C2aoQ*H)W@j3&Y&jFnH>EJG41HOXT02VTVXki0L5z)c_h%}%~R23S;l%PvY z0al4G#Ic=(>^VRo6Q|IFK@Xw@=b#Kdh-~yAl70~q1?-d%;G_D1D3b^74OdWLb^=uv zd(dK~gFc%Ln6O)cC5HvjI84Ej(+J$S^uU)(3qrWnA%;g8Qh5|0k5?9!@Jho{zJ=H~ zjULP~LWaA*Rt!a2ypIY@j(NW@P<0)Y@OV52aAhvEc6R67u-+JH2ZCCD?Gg9@_= zXfPXr4zmFmu;_v*i#AxXpde>KfzFCzomB<`StTKoRU8u7L?LZK@c0)qUUk9@_UMJ! zV=v3jgdONMoM7+D3-ts=|$Q1|E(nVlgD*=XW5}@BJ3A(E!L1&#LXiZ6i z=3eBOBxqbh?n{FDj0C8CLVhhmw_y>wAL9R^h@Zb>gZ_gZ;(%U^qb@5r8nJ_;H77W^ z@Pd=CAUH&b06kF*tg{z^MTrEMRZD_NizFEJNP)phDbQal1-hH1KxY?nSPHbyOM%uM ztm_3bi}lQ59Wc*)EfVO+{E0344-SYk`XDao2e}xsgR2!MFr0XS;UfqxVIts|AP)3Q z39u=W1gk13FmI9uvo2{cSs@KZBhp~FK^hEpAP1yD{|s_V8uXrGZ6C0v?^p{+{?r7? zA8H`=LlvZd2k7d|;DnljfjJ0w^kUrg*}((f)An-U15ZyO@CX(KM(iSRN|yxt0x6(X zNP|s-3|Mu@f3U?VESO9qCuM&bUz7Q1{6yx5!CUF?24AJV>VfoUZIJn> z334Ab{sbIUnb7)W22b=MywQvC)n&sd94GkG`M}>z2z&#@z$;n;+*72$B~Jz%%Vfd6 zRu1TGa$wsl|I2zve$IMK{)g3O`EQoH<-b}UmHTXYS?-hB1KE#euVp`&evx@=_)G4U zJ}A7<1;rOSp!7T7j6F>c)S13W0Qxb(+H4SF%n6}3e0UX+5Cr*&K|q8g_$En%XSVE5 zw-WguF4YR(oLUsVICLw1vL95OwI5adV7Ec>J$;A5Tlyh|*S6;sW^C@szqFo_dus7n z@v-SI<%g!Aavy!EdnTavJ7_yIfj8>RK?D{u%P0dk@u9;# zg}d}u%C~4=RBzaT`ZXKSylf3xmlgm#aD0E%-l3@fqj4_cQFkZeo=C!X-V^A8zhd3R zzD5U1&PK$@z70uLcomqh^uoVf`KfQc$`hY<)ra1Fs`tHCtKRh(SNq3po7yeLLAC3y z7u2u1K2X2t@>=7Z^H;6YPM~wr5p<6^g5J>u052Rr2(@=4YTpF(LsC(9r{h`g3{&o3 z>9zu2lAT50C-_Rt#74+IjY?K{6rQVmKXi%eouC@^TLG;a*Zq66uK2CgzT`WmeZgn5 z&ROqiozq^Ybx(NxqkGhAM)#1%SN;9&U^wj#M!S(+9)AM9a#RRI%^8cNq92lldove* zM)Qq0zvNl*z0GzIev$6E=y6J@%-zIz#hbC2s#l{5H7-R|YM%>l&^;5jOz&h!zy7h{ zVS~d#6NU!^cNpypIBK*v;F{5{fM-VA{Xdy(@dMM%zF@Y|56m_!00hWTArdubB9bYO z`WyFXp(e}s5(D=4CFb1Ei)aE5^WDU5X9vn$O^;T(kdmr?Ix$cCc>EH*BeB&62V+`{ z_CsQ-5D`vwk>?hd`s8?^G%@_EjENcwpbtf!D3y=FUz$dU^Vg=APB$zXw-YD zs5f(QPKuPU=c>;1sZy79rpko#QKdEStx{*v%Z0u&XYwMHk7p-p9LmVj-IrQuusgZJ zct=vb>DGjH^Ud+gEjPpsTdj}XU_BPI$7VG8w9RnTeVd`EcQz{{=WGWf!FEN|!axXq z|8c1IGVq*t0iN?NRiw;SsZ!t7YBB%YV8C{#-kke#wS&m%axa-9B_YcD3t}~Q=cVaw z%g!^}oVmnwLq@g5`m|=NvD9vxk(5=m)k)*@m5Dp-RwNv^TONPgt~dU*T~FM1yY9H3 zc3pA5=Ar)%grV+EK;4~8 zwkCZ;emJxy%`FK2=|h1e)O)$8H%qbST`fmu8x_dYc16lRJ<8OJT?U-T+iivRH!-Ak z)cGrIu8z>0s7TOXTbf}qT#|3Gvar;8U~vttFTd5UJGakqSdDr&@fJCK^IDM{8sChnA+Atf=~_}n?y zR+V$Xts?thMn(1~*NUt!E|pndohvi{1Y%Hkr=#91#2Tt3$Xuf&nOU}o+#L`imsZJ8 zjt}avPxo60Z0&KBm|W(iINBDhG1MHbKhThD(o>sdv21C9O>1R^T~m3zV_oSom+F#1 zMn%!Mduib=_mYCM9z}~ExfkVsbSuvL%qYqI;<^wJhwG4udb1e&?$wLP*A{W|v`2{C z7~&vj*GQ5>!=N5f(%P;)Q$S?R4NWgW-#u`ddf7gkT zkL|+b)&M)XG=lf;uM;QJW9lqhMof6ehv?$NgKlyI{r+k_z2Um;UGc`v9cdPIZMn8p z%_a6_jnz)Y^)0T8>-yYtYeqdYmu~e+uR89PT5;Dawfvn&dfBXdM(HQF%q4#UNw^Mq z*zc~G*Z;!gO&6ZKUx{xIj^VlY4Z>u{qzdzfaU-6QwYH*zYg}b}hJ01q2Sat5`eTi1 z`%=s+yR&VUbQakac2qj$wKchBwe+~BHLvkZZk+N;s6XNrS9jYhuJ(;*LiIOhMk-j^*1d z1eZ-YNH%TsP^_K^)F>a1&?_2CFv%NDx6E9VXPdgZ)IMQVt#i!aGS`R|E8Ro-Cp`lD z_IvvGT=VennsEd%4?N1;Z`~GDJ_Wy79G2t9A;~b#&#{Tn$11x0CAxX-> z0Zq2feWrX(d+6fTJKf~Uw)?3TZVl7UnTj<`-SydlRbZlahLIbLlaI@azKI6CO+ zyJmvnwR+mkW9SOQW918m$BNIcUVWckz5BlW0pxSyI&hIM?QA#)RB{GAh(nvHWIJlF zwdes1oE9Nnr&L&)PZ)AnAGHxKJ?tV?aL`9Fdw+;}+H|x|;@%{~nB7^X;X4a0gSS`N z_-$>WdrkE_xNRDDblI@S$!X$}qtp0vN5{2a9G%yEcXAp26DVd!Uy_~N?j<+}RC0Wh zN~Tf&ZaPRML+FDnyUIhFuF6xZujq0txn#+of5A~a^PHzl%9%i=_|uW~o~T%;QkArR#y^){gteZ0z^$wx#d6U`yNi%*J-xXIq;opxJKz6R2b*v+Y!z z1I!<+!~8$yAhu6a$U5`@mS1EgO%J$8^+OrTk_VbB`S(q^Gw#v_lkPCYWB>7!j<_AB z5PUOE#s5Z{hS&8xZMUmsdd^oG4eT%V8PP7RHL*Fj%f#yJc@wKsPmL{(e=;>Y0%m51 z!Tfiifr|TpkP9P(977Lc?`{g&cm(I*EEDO#cQI;S^ODlnGL*%yHCQrV8F3`Pvf+)J zaTbhx=`9-aGDyPzMU;&9^CWrqXIV-vPm5I?{;g4`Kk3x4emtyc`Dlxl`Gb>MX7?Xz zn*8%l+vw&GUBer{{@~d%Laq!Ea%uwgKd!-6)W4%ADWvxjl{EarOv=9Tlf~bpsF~kX zS(3l$vB!Qh=Z^SB=MVYnDirY5Th!-Eu(-#UXbIOZ$x;rVb7W|rOXaLTH7Z!l_9>dp zu2VGmIIU>>{=9m?*QuGmg|I+qYsk* zlZRxW&P_tS9Rr$dVPMFKXD_(@z>e1&Tm{_0ThIl9gdHGKlm`3N$V!aN@BC4_+Gh@mWC_zZt{{8bOAT9ux^{L7fOb zWGSKwqoT^NMN|onBB%f0IwDFGl# z^+wI_21-oMsP`Q}kA)5|(-a{*T z4+FgTI2WHJO#Zs2F^juOF}}>2k`|q;(>LzfC$9_ zB&jrzqgsP9lLcrnnSm~|F&Lr$V9u-ywk%rU#G(csEK2A*-~kR+DM&@{VKJKsRIuUW zmTZF1%Kkfh@ORN9uE8MsFs11@5PnI&@& zrI>&O#SmmE`k+YF0adCNXi?QcpNfG8CPlDfk^y@r32EWcc+vgf`y~$izXO&yzO6dbe?A0#-WM`p1)3!r z*f?;2wHFsygz|uCJRcZm@`FK<0O(Z-f=-hlXmtvLX1@?<3=4t!xDcpqMs^E=>QUsP zAgXMv>lN1Yng6H4FaGaxAn;Wh1iwgv(C>h?8jg=1ggr9v$2g)V@Pi@2qEj3FC%;W=+l zCvI@};R8mPAUG!o&pBj?e4`hMezC0*`()E7_R(sY*n6wx;_ob1i@mX2C-%x>N^Hh_ zTI_|{NzrGf{}X*`@=WBR!AJ3X`g0O@^g!}9dO|mK{{);Aso;iM*Bku@f2<=I*CSY; z6#~rI@d;{f@blpN;TiMFp7PC^9%zw&UFax=BW*~pY6cm03TycCa%$WP5 z{tiX$ABp~06s|`EzVjAp!}Tr1mG5Jqzu+6cD3O=mDdJB(a~C~!Un23)tw!=5qeb$L zYq!)b=RxTkPHUyFIc}1<;;={NqQeQ9^Y%An&e%VfIbrun{wV#I;$a#n9kd1I1Gb>@ zJK&Dv`=Ra)LERaR^~B@;NWhCm>7mrf-q- zHJ>Wk%ifK07ri><&v`CaIODNe@ud5D#p7<=A74^38=eMaDSv}Fnv$cWqF@s!u}$W#`7rNUEoekkm&Wu zScyyFX)@O@?k+>w|p#RE}=D$|h_YP-Yh)pv%qYiPt2BZe!(t{bfgdtuZc`qii}1dMu-o&~_x2L+-~=cSC|NHy=HS z5`1T_LYh3TmL@l96)ERx^x2M9+VD=7xr%Nt_LJUR5UwzhAFn!=ldicYD^F)AV~PG? zdbMGHT8nXSN{?w*@{swm#7T>`guRw6@#ig@;vQQz#?D$a#DYbAEST5F{0W4j?v6vf zmx*&xAWr7W7Ll2y;^c0)}`%eueLl6Ob7lgOqDZ>jaAAqu0#F{-NzQ#1$j zvvrr}6&dv8R2p|?HJG+%cAB@O4_Y>+ja${H?6R&&K5er!>49}s(g*9xM6g0ED--?% z!g1_G)SKC;d5cBK*Gducv`&EhuZ5GGZI>p8+q77AH=FZJH8=>bul1A~sR~k9RS~5+ zP@1UKTau~US-9A+eQ}vdOMabMV{W@;UCw~@(yX<%l^NSPnrqltQesk9Ty4Izu+^$!ai2|T{s^r&cMH8B=crwN_CNHz zthcnh%sJb9ycjM&18jZ=qH*k0J_3tzJ<0{i+d91Su8obH>t-c~my3~I%T<{+^%!%G zbti*WYf^OUmu4GQR~DI8lvi4mmNr=zmGsyyE*_@m7H+c7 zDmY}HvG{-X8Tqg6GV*@VGjf5JnFF@J1F^Uc8N39Fa4xENaSm9>rB3X-qX)2WP>5_D zRHm*UFyL6zXDzUz+gZH3(_6N^Jy^M^HCnT-DM`1gA=9v|uE4aorre@nX@gB}WhX7G zV#qG7Y=eEuk^>G&CD$Ahi)ZW;i@w_>6#_lE0RIR64T#4%nAhG*c*&PqPI99S&;9o> zlOuynWaltH**L5~9UanRU%AqfuXn&vti8`url}`TskSRpqjFh-ZfRS(VPQ+YXpdK z+{jF>bTW}s{dn$uHI+=QaYP0r@nDch5wijtyCF`XPW*a396q&{?ud%VqBEuZN= z_;`e0!=FF~8~$EmC$HK5z3)oO*=rhq^;<3KRjInOS!fEX` zONWuumJY*@E$oM8E$mi;rTvOOfkGzov6hf)o#;QTB;*kG;CE~yWE}NhKjz?@POy=s zCnZVgNe$-3$4xl1j?ws2k1#|M4*N<(9}JZZKM<=Jv@b=)Z#qZAYtIsGx7~Gm&O5sd z9JUV|(zk9gvYk3YV;G!G;EHo*0ee@rDbvWxR&{W2byNnAGA#O{M0eo1-ifUt`6s*2lEH32|0*+ zUhYeyA~uG~{Fy!ckp;M{j5{WGBQJGVP9 z1A=RC3_Xawcot*R9zs?h!hL`my!s-AA>#z(~Xn<(~ryMXBdyg&v;(bxlDe;xe@`rxduU0koV!{f$0IHu@m(=mCV!abgBR znD~G+#T}HWE}+Ha0EWzTuwbzP8jB@3v*HB=Y(@~ot`E^1+K|Sn0fk&DP{*wVy_kPk z%OejvdF0>}k1YK22gfFH4YuJLOykcY?tu>UVV0r?kpDl-V4x2YhCYb@PiEo?Jm>+4 zq6Z*Fp@Sj?+on{E&r{97n8_F{nGAr=tOG90n&5-khfo#;NI?G~53>)I=s$F@iNPxL zAST&_Vf!EK$33_e_rNIbfgaogb?4E8K`$!xE+LUm(EoTzh}#cBoX~%u12?e-Az}fG zhzZD0jIb7cP^0LA4n+$L(SIo1%)|$a(R-+3W`|a0Ht73< zowx_aaS!z49%w=zX2})wVD1nS{RqE*%%d>g5@L@v*q{$$4pbbQofzPp=wdBeAVxGm zil~7+Q3hp-0%%ZVL63p~35qD#Pz1o4!VNwYyl9k)qJ@fotRV$|0ESvrFxI4k33?$$ zx=f&N%nUl#ETHMk0xG_&pb)_dGRdqUmCFVarEDNx%MPNg>>$#O3?Qr7L1-M=%npLP zki+aCaE=Z5?;tPOfcq0GaQ$QjPGAKNWd0uuu>Bv5HK_OmFcr+uhcMN~It-b?(2520 z9I*~>R!|LN1;s=*kk4iZ*x){l9>@Diy5oJhL@syXXB7YxIMxO_Cjr);2@II6SzWZ{(j|lw#12#Ce9qMgI>~%Zi*9_bj3RC;&nL42o{uIKyzflvc;6bg@xC_d;hixYcvCX--aeQ$K)1&;?O|I}?HhIE-!T6ovS)(5!rwl>#gaL>hGXU}50mu3A zQG0u$_VvfFgK=*J<9Y-dvi$J1V*BXj#PQn0hwC{boac#465j)-Z2r5B#R9h-sswM^ zHws?2>kzs^?-Ra69}>PmTPJ+Zc8kbq+x;RZZO(}tv$-#F*!r#50jnR2rY%8oj|E8W zn#cVAf>$l$_^5mRkWkeAk$BHslsa`TLYMhth$-t#Af5e*zX#Vn-w>W#UUB@_Jky0P zx#tU?cPkY=>sBLniqR~7!nJGBQP+V*hh0V_4mxj;*yprca^Z(_)IS4?_8G*Vx9`o+WGUR)z0(qUPN_m{DO}(9H z#(F8q@b2=B+=l5cF4E_H9kNfewud@w!ed=7-dA&aG zbFR2L-953b`a5H~4Yo%Q8g7kRYP2bGo$-cXjo>U68pkSP%Lqx0)?W>$X^uI%K&hWrg){@+O;s!~-_{ z2^Vd9|t7Yg~ogSU7 zvlSez@sQkGIbD8x*=&_fC9#_8ic)n~7vvhO$SXEpI;YBPaZZ!PaP~Z_fvkl#{TWMb z7o@MZ?@rrmKQHB+Lr3yGhxX*39omxK*taGk?M`HNAHrgwoeIA<2V+sHKrc{teiwV5 zHyiQHffj8#-eSey-{dB-quxhuV@Bg7Z%i8_U5(Q z^vvnC>&jj1(2+gn*qXJ=sU`EYb5r`ioEy`hIyI!dcC1fhj`gX`;X_y)e0MthW&yl! zxhy@cm84IbP;=jkntR-faHzwax2w%Xd~=Jp>{#PWhP1VonvA{}&by z?M&2~7hx=_r0HIRfNr$H`**_o&sU+n-KM;)T~1=_I;P01Yzt5xZ3)vDYK+tEuTM4X zsm(E+S6yV$URh;5x4g-&scfD@ed(ZcP4Patgmw-*Z>bdhhB>&?!l$d9@cun~I@qg7JNt~d4ZRMcRo$M_OS+~j4RwU5 z_qE07bhjiMb~a_1wl)-4G}l#F*Vi`K)l_#lR#x@9lvXTrD=ObKsi5?*dtS+P_c_Ji zPMTBnhg)tTbDdMbTs{O_a99gyL%9S!YT(n&c0OI|=Fz#Us7qk$Zai?~~HYm|J=&bItewmL`F)wpn3ckyYM; zO55z7Cdc&o^PQ9DjkqOrj!lYf+v6VH`jJOe%LDhQrWcc<8kt*E!{4~#^?$Hz0p>wS z51O#=-38Bu{qJLoiFTn5V9grrdE)+{!L=s5o;8kQ?W;XynpXNN)-DfGuUHzbQ@kX} zAb)X|NzS4Ii;Uq4o7AC3`^15HPO<$%E|I-!+`@WxO`1Lb+@z4MuO@|bK6eZ0c;_0@ z#{Lg1kB@wqqq}X``-lHNi#muy(BC!&&xIUfV6zhx2)bKYH5dk*yw;`$f8xwLBl&;0tU~z%ozC6Wk&z6 z&NF)7I{PnR&i*~`!-|Q1Zh-fm5ATgNaB>;ZKIm_P2VMdX*u7hv+V*Hu;~qF<{<5hDLlhGaM61T`Pu7Usm!%WBr%*q5ca>4#&bcPjcl4V3Y+GhA zb<0*u&rK(-CU5-Qa`L*LEj`9wTe+`h)*td4#^BR#WH6Y6Q>%y$Kz|GLSHJ`J!2>qo zo{W+UQj~vDm$ENf^U^N3iIP5=CJ}djrcBhiaQU#a2}-lhWT?(Om9OD{vO?SUM6>SH zV?FwwN0%77AKq-_cJR27>;5~2u6uqmblLUF*l7ndaoYYqUm^$k7&*{+WDrM@1MPTcH-YdBxosOfm=sHVe(Tbd5%o@m&g{!7dDB-6G%!E`2ayASJN zDb~R{i~(|>Eg1jh(C>o>ZH5Oe!S{2d-w{&mV>ya^tV5yST2Sz}&b+{Hr||tB&k*`P z4i)o$94GGeC|%OyQNHw~Z>nTnzG;*aKT$IB3*+shb{^UG8*hu`wWZGWqlu>P$>(&ELir1|p=Ql`Hil`?tu ziKOu_k0p(s{w8VoI+L1wwh;-O`)Il6V z7I96a$sUR{*f;NSYBMqz=x;rQIXHnDh_hISmoNu-Hb^FNkeK_>M-Jlu2M_s!)Zh7o zJJUi2U_j2yoE(@f*)nI&ig|Em%!e~#K|DPc#nWPGJT+E~{Gpkz$QJVD*=oKl+s>C^ z$HA%h7|+3WB8yyu9B2^wZP2ek4wG|-DE=$dU?2yXjvQplAOFrDT$lv%2YKWVs>mO- zkUtoZ88angW=#gnk#v|lsiQ4QESQsFv77|U=7g-0!$Yz-+!zT~00glU*t;J&*fE?3 zXK)@MhpD-a3g`j6L~G&!5jU!}v@k zkQ2;Ftq)5IbZtmJU2-;lXDnetf1so6of3`AjvF&y5b=ji3YcfC02^l+P4b@|nVV zunq0p&wnR(lK)2L8vnJ_SG>QZp74Gb|DE?j_>T9C&-lNfWj}uazK)4wZpQz{1v!X| z4RGZ%$Ekc~8z^8FkpgCtjJD(om|iK`Qzu~Bt)NTrR=Z#DMstziwdQicUm9bAKQ*=p z{!rf|c&T<=@SExt{&STt_|KGn;6Igr$^Sv-o!~neCVVWzL?6N?!Q*bcTlhrpnBWJ!OM>ro?g}1h{UCUt`BM0m1{1rd!Nl)sFp1AU;BS2|`0Xjc z8}0DHocP%Dn72D}As+#A4;H?5i4p$cm?nB*moIu|TQ2(9re5^ard9N#b+_ny>jBZX zR*OZCtX7G>vD_$nXt7Il-~5>HOS8+uyQW_X@0dIhyJ_-=#K*==@|qEox?;qnFB>tL ziLi0~X~-XZkw45p{t$?{3B=l*;lh9CH%0J=cYyF$&j`^^9?7B~Cgq4db}JG4#_$Zv0vtt`&ro& zlWxl%o%EgTA-CV<_q#I1y)H~?w+mC=<@`R(4}L!ozCQ#+;y6Yhn9%Q$*4)!jXWpY( zQv_cI%@p1Vj1vDOAVu=pj9lr<{v|RO{HtZpO>dSvJ-t)@q+hT6ala9TqrNK?5BY3R zJTPsy;=XAoly-Z6th96LBc*LqUZ`yLVyYXxnA!$UroR3I*bMmnV0i8bc;7f2Cu-BH zL<9OM-ju$HvE}YXx$|#EP8VGb50&^REMEFdXol>G+4=HEL&_Bo&8ky65Ik3TU(kG& z-9ZagcFr7C-5xlmwk2Sj+NK#t)HnEFR$u4;Kz+^hXBw;g-fFJ!Wm?O@GN1QhGvWPb z!|#rJ-y|HT;a>MNEqa`yNB5G<=thz=|5D;K(V2u`$>VX+vWH?)qJ=>byvQp06;B_}9u zOUh8*l#s8sA-+swZCtJ9n%EZYl`&m9%cBQ$mqabqTO7GweK(dZ3)NBTjFn+H^+Tv-W2$S(V5vsuGxaM+@^X5!C0-Q#PTYC*~JyJm4?b*&vn)WwOjL-v`rEpYMCa} z+cZ;QenYrwdtJOnOHGP^ z@;-GM={Mv0d!0n{d%Psu=KISvb%iL`bw;aKwI^wpwPorR&&@Z=Z!R~@ZE7&jYUs2| zuOF~Uu3KrBShL+euIjXXY{gys*z#ZOV$0sz#+I^iY$n1|afzGH(HHQ(H*l$W9{oSE z0Q0aA`<=)_77Z)Y!eL{sd&pkUJ~&yTX`!ENZC|ib#ezt+lAZ*u!tQk4IbCxMvO7yn z(mU$RQrg=s6I%PNV_R0(Mm24@)d?ZT?x*oIXxoA3`{8AN|!Eq+#q zd1%Kxp#KL3Fb^ZxcZUaCxKxh1m+Djd5*vQg=p?b4#nYtAM`kJ%4~MJd55{Ta45Vsj zEX>tU?JG7)?5#14>uEKQn%`>`-nGnTcIOt`;Pzv-L9MrKgIb>2%xrpX6V%9Tf*P3h zL|87-cUX(JFa{TqK^%wv-bL_S$RUPTA%9pcL9J`FsBw)2uX?qMux#a2$-)&gWaliO zt(dheMm23ovS!j~wod%wLjCBGDx>h>7Sq{-3(SKSF0l;g+hpb6d(_&m=cbk4{3lj^ z^Ilo`b+G>fD@6WKi}L~g`&>Wn{fGWeNcrRFa07C9jK8N*GI2jaiea5wSX7J9K4*Uhs-0gMj7T#?zNBHuV|ZVCKE(u-TN6 z8)j36el+!3_?MYiKQr^{d!H8=gF7wAAFu{aVf+t3e=BmBmB<15;X@mC;`s-=WvO6~ z9_8+~<}!A>2~u`V6HnL~C>6USOg3_RoI>cfH04=a=BNd3F4Oei)S&INu}gRA`VoE4 zwd)N$#ts^|um0G;edYHCla~Ey=(dy@x-MZx6SQwJGefCC$3*!p*!ql|SQBfN=W7FfpHtaS~H6 zq)T~zlrQamzDm~hT&ukE*@X%Yr&lZ5o!qNvd;GGZ?a{}IHV0oSS?_1cR{NOBL~itA z46y!>A_Le1{SD9`MGn&qA6^F^TyO^a|Ccxly(yv@cjamNT|M%-XGPxkTxrTZZ_e{x zAkY17IN$AVqR{#CY?0&VrDArUHHq8YSs-Ec=?Y1UTRSDqKKV$}?Bj=$rdOUznOuA$ zZE}IhOym;Q;7RBofc`e0A=vunegE;$N_>r=abJ5QZ(g9b(;LdkldeG zliO1la(+669G}kM?4O2lHcu0HR!_3|=15Bi&+zXbZ-Cs6|d{X*n0snCyvejsvysgH;z{X*n~`UiXD z4>o_-KIkF?FhK_J@9aUBdBFes!T--DMV3f1ET1G?HBo)%Q3E z{cU?O2hi__ehc(Vp`VHIk3kL-41FKyyZ=DsfHv4LA`8@nm?D!gVv5Kf)JYfFgBJ23 zHD*al$U$V-Wa#=sClWbBI=~wM!{TRy00Yl((obL)*8dphe>?~4fPN+Pb3Q=^gB&X4 zK9S#JqRBrZ|M(f%$8#dncZA%7NDpYEEgHCQNfqOw%v6wlXp=NEBor$k`*4JgCkO;F zU_1jEZwuax7(SFHz$}n|SRj8e!>f@E$8?-HrZ$;l3Vs}un$0m$Jjb|954eiI!drrU8 z8?=Pc3-J5{m@V=TTlicXU@_hfTaKByp)KAV)11jMl_-o!3fhv3wv=*AtQKR_0_G7D zVk`uMNL7~5JKidK%WZ(t4thlg(ciPszDY0X5j20LU(lMLp#22=h)iZ8%pUoN1AMMM zu)(nn#=^=O?eW6g%)pq0^O$BL$5gX0E|?F6D(7-ZxPRdIz{Wx@WmtI=8qRI*)l*bzbl<>AVwsq{D>gbeQO@ z4ilRQb4Q*3WcY1QczdG~D#cwg95 z@b20)@cw1p#=mXd&A(~AkblE^G5@;ND*jcgP5jH2d-xYDPYTXkd@ML)@wMQD#dFb7 z^LJv0&6xNh1e*s;ndC&6C-RS}@Y_D{{{A=)z_`pXp+Eg>=ofES?mI7U?xA}S@7|kq33A~s2T$Xl3knvVoEa^4bY_agp}-u;{Q*T%duCKh@0!sdv)#W< zcB_B4?B?l%avP^FlUwh%UVg3b9{JTirxjLCyRER?`v-+3Q~y*NoywFJO<~F-Uhl)E z!}|wH5(|U(jh3g^sK@v*T8X|0Mg9

      D6+xYLnd{3DS8q66XK;=98VBzJ^nNN=5; zC%Y-6RBl5^jr_V<%?fLRI~7+2^(n0gTCB8e=9u!5z#S@!0*M(C!s)-O z_WLolKG5s?J}h8-4B&aky>GlUy-1LvFXP1NR*Vc?jMt@8i8kD!M0ft41V7RC_z;QB zu`$x?V^U)_#fvoleuFLm3vd zJHv&)HO*VJA$6w2SaO8)%A^FjWr-OIqX~IRi{eUDhGMH#2V$Dl`lIJ*EQlJ=?2cTf zH7{bLc1QRDowl$`I&(uG>a@&$q0&Rc9<0V?1HA8ZFW~j_)daV3#T8iReYPNEJa-r&iqzd)!#0HId z3GG@P@qOB@aZ7YtV%O_6#q8CqkNQZzHu5X|nuuTZsw3X%RfjXZs&J-T74{D-1Y-~b z-z69PM>?)O~O=bGDrp$)7taOraq{K&JpeRVDw;)1( zeqOv%=bSW^wwzqGmh589#;j`Xy3A(XnvD7SmFdF<X+j7bTxIE=c;^xFGRo zqx^)oM)~o~Fh7nNOa$9tmQ5$FDQ}Fkl9^Ff5;L3#i-hk^=I9YFb8Z)5uM_$wYH*!jhwJ=$aa!4+ zNuv$s++e+vU_q^y_`GU=skX`xx#o&!#rm=&m73BF^~#cb&9b60o#MiJy@G;v!#Vj2 zjdSvrn`Gv0HBHYsZkC#L+blKniD_!aYm?M;W(xSF4XJ8* z^*Ne3bwxUvHC1|P)h&j}RXxTD6{DuHWgE?+OAnhx7JqCOS@^wKWWisik@?IdGLIQg zge4LEg0*-DV{o}1ey0`J`klDepAWy)jXmccS?cQ1qt+fPUSqebux9>L@$z{A(#4&j z@&z3+O1W*xs#&dB8tHQjw3C}FbrYJJ3}PF)jiTxnnS|G_Hw~>mXckg=&1_crccvj_ zf0~ArGLw)JW;_v=O7sA0@g}mUk8sI*441sSk%O%3gXe?~?HZ7zxdWQiFlf$I4>zyT=y&y^8GNbkn;~CY=ct+Lxuq@;cI2W!rVBZ@V#35t=+Xvu#hfsIF2)=i*fa*q( zKa3hv$*4WAV6lfVcag7n=17oq>TrZy(ono&++dnY^ujr65&fl_v-|3`gBNt_1$GY^ z_|G3R^qsfYXj;c5qiJoA45!U~Y3SX|485D)=NZ=G9gM+c%)v?YfBztIpvBmCUV=6t z3#eNO-?>_z3RmmVoYmG`)+#rC+DdOx(#imd_!Xhj(aU3GBbKEogf7WZ4jwI54P0ES z;lHR|%V&5%$9r(KuGhlddXxJv=uPhZTGyldH(ihUOwVK9`#dNi`lJcx1LprI`o9|< ze8UQ?g;mHO;DxI;VBdF>IAv~9r_{}+l(fl_i`(SMkKX7fir6qqeD?Y%so-^qGJ$I| z<^0DA6@1rJD|@eQRrOlcuja90rN*RXyEI&voY!z!{7}PXtEo_t+S+C}?+V#@;uwkc{2%U3=vzdaoWu?6s!QJ+9oWJ>LAF-2sAt-JzoC zyW+%scBV;A-H|8lxxGTxeOt4<+tywM=grF%9XD=QvR{8z$$ss9MY}c6m26kNRkm5d zR3>s0bMO(?-*`RJUg&RLhk1Yx?t^yIPNL$ycm^f1h|m)v3OcDm0Vh%aaMG6iPP)^y zlRn(k6G1$$6A=QBYNB>wE$+j!I1dsPAqPr31^@pM_W!RE zdEOF`!xs`{`#^=P9~hA3Lo2d)=uBo0r;zEx05X0UK}HW#$lzfC={>9?od-Rf*8P>7 z#+Q3IwR=}d^zL2j%RV6!E-P!pa$a_ z{QoT?w|nsZ5BX&Is|=a_szxTy^~vbDIq5&QC*2qBr1ip=)L+adl^2Pm^dgTGUeuBN zi(ZoXbrnfHJ3tb@TqW_Jz9!L==kIX@`n#dO9{P(8paueCUxTsFJBJ#GOQ?akjvA2L z7z5}#K1BBLEs@Df0sOx-{J%2l9<-5r7{dSmJA06Y-^&lLw_0c7aYYLVC;*|V-Buh4sIZWxq}P_zvA*W#^5{XKSB2J zay)zBkUfY&TNb&83dy4;L<(&Yp)EY-1ih&s1SEqJfW^-+@eGsBFxV5=j;5`G{s88_ z^)#|T%zYN-KK3SZu)9Rl?qd!f5!wAfWcCx0{;xzDXoD&vXcNhyEx5IVBrq;Q)a($` z1O~tgxB@>A1+qaEXa@`5gBe>8GcqTphgVH&VhRq##NFY}ec{q)fmo0R@{B#sZ0lA7?-qCVsk3nfO^maq*D0D6&^Y{Yk z!xMUjfBOr5a-5$(fLWLjGdDszfIg13F%}vw#1y9xlbJzGEF9G%iTJH7Py{ML0~jBR zE@Ume=e4q$}* z!3bkv;E1+NCZ;+ab2FQmH0DD*o!$xZ=rzBL{^HfrAG~II$?HI0d!Rc2-Nn#c3AOdm z+5w%zNMJ6|*Yr6(Kr8N}g`|pT@hn+Nt|&LVGhuCfakYE{loaNBj&{3ikOWPy*2lu z-;Mm~SG_QLsvS??Yi7`6jePoAy__DX*V0#NE%b%jJi4peOP{F>)2AxS=$6V_`b1?r zT~|I#SCuZ(Wu<%ck@8bIqwAdl3I%~9r zJ7u(=J7IW^J7(}N?vTNcy!{4$^7k4r{%!*%*s0Hi6Jc)9p9Jqc8Qy;ij;CnSTTcUe zKFNZS$wc^OBe8WB@54NheR#t6PJ{RN z!?B+#{WeXDzC$~{bhV`0Zmx9A-G?rC%;L^?#PCjdr0|Y<)^zSetz#G0ui604@Jl3X!mi{vt|LsCmTFH0@*d>}PE z`MLC<$2*w;cP4{xn|=@GjWO^O5et&Q+q)z^n}xmq8DeyEx*S~$)T5I@Hgq`Ho!cAi z$J-G+ORzO4TC{Oyve^2-Eb+0x0*TcDWs)lcYNeLXXpvg#-z7cjKOi$QeVNRV-v-$M z-+i+EJ|D^TPWw`>$NQPw{Hbr{yQVVvc~jnp`9eEDhq7h4q;FTNr)O>*h%T&dBJV(F2PN|~Wqjj{v59di9a zee%6QqYB+K*D7`e?pEv!IIGz1|G84D|IbQur@v8Z@ncHOzVE}PV+?}%$fEf42p=t< z#&UEej!$QzMRX`um3GCO(3S)TZe6@5e|4O{aCz)(vC)_qiIM1JsiCMWnT3)0vV9R{ zatp$1<>!aBD$Wb*QR)aCQEr{RTBRjqhe}iMDbeegZWO0*>vwFhao+{#pU{*n}5;Yf0@_+VnBWPd_} z^n&Gs?~&VQ?Cj;u3iy(OTB#dkLqP1 zuhh$CF}2chg8zX9au@^ny;yjE=wC$+aykpwR(Rc(AxrDCbZKR_6)nkf^qn~9I|NTP=c@cz)fm;>*h2k$=z_Z`fUqSbj?v?R}* zhVz_x{d2s8Jvsj3^Rh!E+p?mhTQU-58`Cr7>(cTRYf?*xg^^#2Sl zyAGDY`Z#GEjv8yOrP`I>P&HLlT`@zVqI|Y=X<4*vVQG>= zeo3ZMZgIX!R$;k%dO@RRYJQh?Qr@smLhhJeY|dW2n5>I>(V1WCMW?^ijZR}a(W!r< zjn}^!&tbkv#ah4^TtNSi)!;fC9(*kr0Sg*ssk2dsS{f~=zQLJaUGF6xYonBqa5sG`++5e2*T!ty@S3!U>oFEr;j z-Oy~N8=A#*CNiGG+|R^0S&Z5P^!sol*1}wPjTY2j;I*?=l3MWG+PXGVs%mxMmCl_k zC~WZ+%WDpj%x(&o&S;F2OKnJ1OsdaSj;}3Ijj5^Ch^%hc3aebGJ-cF+ZgAO7y`YkF zdNYge>&`5Ep*u65>CDV~pVyd&dpHNKVg65}-v@9BzNG`66LinRerK1EYH^=d#e5?w znQzM#bWP&t&YLF6>kO4j?ud~|XiJulZOvAUnp>n2-dwE~($uOE)X=XLSieHs zzjnKhU-cOs-^wp_e9M2;@hy9&?OV#UeM|n~N36v=SO=HjzfYjwd!f6b`` zwdozb>b`BuG`;6;)11QL`d+=fw`wAi#1&P8&!o@>|%vlq>iwY*L!q*Q4UqyHwSqXN#Kq{1a+!^FCK|YyU;nt@VwX+gzsR*76Vcunw+b z4o-Dq9-zH#2ST9IN`>r>QfYYJcO%7w0;%A2)nhG6E( z*}@qsW5lMfNRjYao+CANS*eWY(gs=g(fRUji$@h)Mm8xr4INW-9Jr(C*#EPFL+@)v z`yQq=ky}`Y7hrvkq5pfKy=e)~$)z|Spxw9%zHcq^hmAxLn!inFYun(C?kg;C)xa`>sJAw+?FoSxEj?+yk-``NJNLeD=$d z_kL~i+HXdl`yI(+|74o9-;Z4P&*Gf-MRSh(Qh4@z=kRU!R0^zjw+StF4vNfotQDDV zJ18>QazkXY@duId`ai{t)-rLUv46OL^>-Zo--mv0h4!iq+|#w2xfov<`mDY3=)+r@4pmHFy8RY3PsN6Si{Sa@fVUg|B7VJvHx)t`g<|wTQTk{wxb3DIY9G1tVLu{ z8R&cTDfs_$xCi4h>MuS<{_rWh_ZI@v_*RnCzEdWZ?{!J(doxn_-kxNC@F3|Q{7LeM zND}`cgGAq#6aR51@xH+#`+5i74AGIV?&3n_d%P+A{67w0-gjW^$M7S==z9moz48ch zs1wMc@GBt~kij4em~<2Oe|(1g;Y%W&uZdK?BU1cboaFyfAemPhB=O3CM6WD}_u3gB zg5D^An@#j5K8$}aCHf6-crVb7=a}GMPXhe0XZS<^VfU7Q*8(lsi}%HYn1f@e1v-sd zp!3LJ(070IeKPv)cn@RnfXLtx@&|N7@d=UiOG56zN6m#KYCaTk5ugDKfi;)}W`H9_jJsA)HSTbTbKh3o%4OvfP6z_guZJ- zTLr@?gDgZ0Z4fX*-b46%d>{!FfF>{o_FyUq1DT)-U`+n5Ng2iKzhSz_AGCoQki|YX z2fs_Mz#Dv+Ka}9#jpq;JwIJ}vX2eO0(KyWIEXyp98!ndaQ%3ZGhZC8z|#g9%CVa{DF`B zfsj8C@&}3o=^zi3qOUd3ZGv7Kw7Q_x2d!afEk)l}LvIsuh&|9ej?CjKa)__#ArgwO z@S9)YFWmb8W}=VWLl;>CG6)TLZgq@>GUh`D`GXkp2QCP|5rwf%!T9E&zeUikgk~MI zT99sZK&uB@0~qne^bK;5hnSE1$YQ=k9&`_F_zeGa3oZNvf8hrH+Q;Df1egW1%}{?~ z2;Xmj{6Qb{qJ#NR!+a=s(Hp55^oLj&J?AIT&%6wJ!sS7;6n(3PRwMp;8~QgN{p&}* zG6KEj^eM8jn^+S!(1vSt8UOnVsnAcT6dGS443#pB&P;$=BY&`j=QlWkQ`4P(Rq>@K3bW{2*=YJ&CWXF|&Y^o!Mf90e1$`=2N4KO}=o6_<`dF%$u1O8k z6{%%(Npc;1B)NmmNgkuq5?AS@!~;4e`J4_*F*+#CXg}CD0cHn%8{`i*$REa!ZMBdG z8PQ8qD|%|+M31z+=qrr?`dmGnZfPXY4UG)CrkO{VHB0ECRuz4u)j;R8+USf{H=WcP zpyOIg=&05h9oE`L2Q`n-K8-81N8|merNb^7aTjQ z(jWHP^rMwAJutSW&kWt@hOrM_F_}ddOrzD-!bl+N&ZdsYo6lgjil==}>D(TtTyCdRF}K~R zlH2N3&uw;W&ZOo}fO+vz6NE?~A20Uq0VU#n0o4*cGnysl z`*%ss^B<7vn7&-P&2O{xT;C%y%|6#<8mB#$srUX}rfw>esh#pZYzjU){PA%bg#BiC z&+8G`=LCmC@XW(dMcNduPh$}_v?5{>wYbe`-W`%H(G^lC z**U91sy(rLeb_X# z0sa3v1Q|>edyMcsSP(s1G(Rd@tTQr6 zye%R_a&APPRC9Q#bVFFJOkHTJY)$9_`KsBA70N@_DU=59Q!Ea;q*yfbp<-d+Z;Ay0 zOyO^4;PpQ+U$g<^e;6SJht5(;EX;w$8e;u;kSV!M>)#0)9tM6XfF zirS@;5qVBEJ^U-xw6NzYX`xK{Z)P*4wEr+Z4?l&&`zOHrr^EZ_U>+2R+~3CdUxeR^FMidW>B92dV6l>%2#Lb%c&WUsG@0DY zT)FIw68VhuTE(=qcID*Mg{p}uE7anXx2we_o>q^EzpEY{_e?E1_N{7k3>!z~zn~16 z9$+1Qf;l)3&BMj;d&pu|;&reH*ZIZL)K-jpA4<%qw%CEIEcW1+7WoK^3TKMt7lcXV z=Eq89=cUMG%*mEZ%Pmq!&Z$;R$Zl1R%j#2&$y}}$nZ8v$BK4$tSjuPWVM)KJg(bdK z3rk?C6M2hy_%aD=0dsH$x(6%Z_sTI3pck}PNKj*iI@MGfQ$>Xxl~hdP6_k4m=9C49 zvddI_LYGC0ewHf(G z)%@q&R`bt&s^*{lTGc;`srqOB!(*I}pJEIyVEo5(pq&lKVCqqCjn}zN*l%sdwRVd< z70lJ4+_@H%HP?wtYw_eIxA+MXnuA1fP2u9vjd79@4QbM$^>bur)s@Ky)ifvuRCg&& zuUw?!Td_fPTG?S$?~qx zQYYurJWpY2_qXGm z1f!q_HlTh!y#E4tzJ38EEmWeoh58hYEGA;13x)N2aUuQF`N4g&1c7}~q8Yu3Vtxy< zB&PKgNlxjmk@oEBkaeFoB+;Uc-^)2S{3-8L&*Yuz{^52Z=Ajnz zfIjc;#Sk2a~pG zXEOigI_BWaTzGHjZiD8EK6rn;whh8}LOW+EeCJB+{o}iP{Ku5YcTA7G$1G^-8Yl8v z<4Kd(_>udXV9srI6z8%!ndh`Bm+!E$Twu4NS!lhyPh`1trI^L&ZZWe(m&DA5ABoHc ze-|@d$iz+i{^26l+;Qmd8$d3K9AN1P&dJ5_eb6q0cGeiY_Xc?H&Di_jCQfeKRLNzV z0Xc26B*$&eWWUXm?6yrOo2?;ay)~9Bx2ALETZ%Z-&GkIvP2D`h4a;~2>vr(;#?JHf zRzKwFu6)7QUH*=*yOat3%b6~$Mf7>s5}cFEa854A`_gLMkF*wbD4VeVzZK8HL?+<3 z8{Qk4fc0SsvN)_lW{363~4~&2LC~Bjf(dap>@m>J!@q4n0pq&csaA^BO z+v7Cq56&ZhxD4-o9d!q&OOg0og2e7Bk?^h#3GSK^|E?o(ccw?Q&>mK|2Q8L8mYV(00BA{|{}0 zo5&wNBU1Q+NF0BP`&K~oSeoctRorx^PxQSN(GQb|p7;|zjRyD-eu8#9!D9ar?f8B# ze(oH8{?`B4fw5nQzAxH^^AYo1a|rhW9mBmq(4Gx#UuaK)wk@dxOb-gE9FJo1niG>umx0 z-gpFaa1wKH7IOe?e`rrea~(b>GKaR_*F@@%F$Uiwe|bX0`x)B5fEVB$VB?4Z1)vQq zfIGn4uoO@V<^s$O!^!s_n7S!^HnIn4z`d29B36m zs{%T;&}l;d+Mv~izV$+H2+LzBG{>O16_c(>j;}&M*J5=br#HhSO7Qs*<^6xvBKEf+Agti{;c~DnI?t%P41>+)* z`4G3Kcbo_2)en6Rfo=@+QlOO$tpfC|41KGHRs;Ig3YB@#>x141^j0AE*Z`efbe>Kk z@3?`MJVK?=b2^3|9f9uQ2{0364u#D!OzSw;084i~a+%6-N%lLoE*4;UGpYtR?6hV|?#| zt}eJjiYC-xYyi~ zqnmc9f3TFHQ?}Z4$iag4I6BjIrzy1AIe<1ehtgV?SX$$fLaSV|xD_t>+%o4fZi#aZ zx7fLv8*%R94LJ|+2Ar1h`kglMdL0k)dK@nE=i5Kz&$D~M@33WpcAJ0GhSzca*2hQ8 ztI6=ap2#2Yv3F@Q@&{LOIxtC%c6uOx@I?OL=|O9}{Ah(&FfH|pZ!&*@ZzjLnCtuLzQ!4EAsTH&%5L#muXEzno?nPp zO@1a`;qg|y!kvkiPvGx}-y5|D(7lO|mJ1=Mah`>1iU3^O;a;89f!eez$eb1hIniK{ z=l^5tE#Tv>vatVWBwk7C?oCrk(#GB0-QBgZrb+5;>h4C}XrVyyqQ%`7TNV~y7Fc9i zbdhD=?PKy(d4{1rbJftc9+R(<> z*M~O5d@-ax`WIjsQWs@$`W;p+$hzG43p&uVEIFU&GcGqB|l zwK=?ue#FDb~f>Buj%ZOf~$Yt5~9XwGeOY|QC# zs?S+Fv^IOku&S)1!z(f`4KGW-&$%@1W#^LA&xV(z{4~5I*)ps+=?_>-P2VT(f5P|2 zS!Uc_i4Ijko`EgFl{KGq&4Mpw7h(gddq?gOUj~*9i@rpwvr5+mg0Qdrs6WY zhN4=By24h+n!@fOl?AJZmgjF9UYd8>v5)Hy%>C+GY$%kTm4 zqkZ|{7p{YMSZ=*gLSLvxgQ?{nV-5SP$;Eeqja5!sT}|x)deG8pPjyuX7}~1BjV+b& zrl!g?oBE1e+uDi}`|9#)hsv^Mr?S$nAtj}&h7}cW8D3Cy$T_d@f=f>RRhO*17hSS) zK5@y){?R!r%i^4sX&L@IzM&o7q8~hm?^Ye?3QN#4JRYbc&j4%d?X|37jJg}9tD|9o z7T5b4n(IT24fV06+PV~Tb#1n7MQxE?Sxu!wNp+)BVO8gl{K^%>aw|3u&niFQl2Llj zC9UMH5vfJbk4P!}*d- zgxXuDsHJtT8e6?p*Ai@~ZizNkv?Q6!nlo*Sn+og-8_ON?8tNT$>f488)h!#AUb}I4 zYRx|97wiP9 z+Q@ZShPHL``xl)PRoCgN>du9#=nOQJbw(PCI}%I<9qBfC?RmC2ZDsbEi)$UzTNgW} zwDb;5Y+gSszG<&>Y{O~i=(^ioqH3RUiLCy}C9?7d=co#cb5yxy`0sd^zW5?Bc%+ql ztCMv;I0&|azHao7rL6UP`Tfh@v8w2ssnR}o74`Y4pfAjj+Z$)h?nyOg^yJv2EiJK2 z?yj*6z;e5b%(u#t3N=`!wlui$+(RDSSh@Ib?lUn|Qu#^Pmn?v4yY2AV(L-_o%PDkA}+YNURnfNt5T1e7PU4G|WHLVwiid*Er|E zCgbe=hfK5fUNX(tea$p|=Ub*3+rBbQ-|~xT`X-C{cRae1>tPdf)K=yPup9JmXAjD5 zbRf9Pj&Se&7-vwNBJVtFlIJp}aYtU$xIyfQ-*uguqkD?S>3B~)W?iPxB0 zvNYe{$l)o6&)8CHK99^B?Y(Cf;k5%Y%*@dTo@Pu1%5SwK=kXaFOgD z43^CUaWX%UCDZ*CGTg@ib1$9i>RuKEICA^FkiE)%e_w&sF%G zgU<<=9`YbDc!WJ5k4saYMt^vo`iqySeRxIObTF{^aA49I!9r*f6QC<1{30gfFm4E> zfpTsVVaIzLSg;c0w+ZZ9eBqlf0Ad0z6Qa|!^*;PwkKf&J*I(s&ybm1;pCcZnAK>%+ zXNkd!!~kBGH_%}IivI8pHoOo12L6sMpE#nmjN*pKRCI~?)Sd)N-zA}kun72`cKMFM z=G%jSV1Gk!zahBb{0-=`_`c;fu1Egd40q`@YJrINfChul9)CiEc^w_?0E%RgtZQ11Nz5SG>Khk1N+I5kD^JOMjyBllkUc} zC(s7oM1A-IZQy5eXUhOS;ql|&U>QlCJ!Bd+7eG9hE3oPhfgl>BfdWtgUp-u{s0dx~ z^peT1M2}bp-zIYN?QrfvA2T_BdXG=N%Qw9Z-ufR{#!!FZ zOx=SG5IOfiEUfy2FNgriSf9hEOW~@4s|lVqyy}K)8NRK8uOFW_!?^>#eQ+Iv=R7>O zu;1i7i1p@ZSo#E$h+tuZ)-Jt8_*)Q!nGT&BXFID>qc~stLPq2(IRhCd+`-f`9%-G za_xU$nF#k7>MxwoADq~WVozLz79$V%{n1?_@h=(9Y^phm;i`hG951ya~HH2 zhiSC48-9D?X9zti7S1&I^582)W2uI#QIDZbJwo4jh`#Y4ZE`>LO826VU8OzlqCIX$ zi@BBmQxT%eSbhWFXjSa4hyTL=z%m`~iMHr3=noUAg%~?h-#Aau$2QaTreVHbF|h~4 z9HFOd67{HUrXI2@)C2bAy4Su|R~?#lmqWYma9FC_9hd7?$91~JajR}}+$(lp=%T{~ zU2wQVXYC%-Y5O;I(*AQDxBp4k*;~XfE&UgkS$48aGoZhismZa|4-=jA_Yux|!+xAz zaGIqjojvvNh#=iJDq44sPSG7>a&+t1V%;*fQkTcp>xOZyx-_m+*N^Mf`EjdtcHAbN z9=BU3#~sn}ap!g2*xPkv46QTfui8K6pV~X-KiV_eqTT-ki!1tvs|F<1uV)+d&NQQ5 zoyhM_j&{)_XfgLqnWsCa`RdB_FkPMzuS+x1bzx?n&dn^JG6h+A?=-cM!RQT(asr!f?Kj=5*)sC%IfyO-;rN3HgIG;6O% zr}lXCX_v=Z?Qq|!ZSMQE#r>2vEx1`5=3g`P&wI_VcJ60})owo-R=HUWE9d-yf$fD% zF&}v`{RA-0F~nc7*|A98J9&hntCJKO-mzt%}XNJnmZ%5+q8!tv0WT? zy=_bARokYJ7i=4Y|8Cn5^n-1EpvAU6;15{0d43d1?hpS{sVsTYSX-trpJU6Bcw6mF zp!Ohfnl>lS*M>wNtxXKos>B$rNJuvHC1e?U;tP%4@#Utj_&QTZT&uY)uG^+HcC~GD z>{i>xm_v5;(dX@Jqwcb=iG0q!D&iyis_^gaE5j^ym7#yYdVAr0mOX!h_pvONTi`+_ z>swy$Pcdpos*5&pj`6xQH?2-vq~&Qr>P?F>bf+d7x>D1P9VvOnwvcj)~l?i7Z%H!{FD2si@u_We0hmz>;9ZI4s_QjEZ;KvY_KE%MP z13i?-QV*QTWle!CyZGB?&d*)Pxj8GdXR9yULrb&$)s+>l_N+L=;>=WIb7r=wF{8*_ zpHXR3o8D+!o!((rnZDeNp_!oQPu{Bg3LXRc^Rjia?)>g%1(W9NLI@GPFYFc zI%OqV9J3Psz$f&*H!|o8a9=Iq{vNmv>@K3l7_2I`RZlUs2PIS0Rx(%3#a?PC4pLol zq@lVv!B|<8ZYnRzGnW>Y*%TMn+7=cpw#(1&amdZ@cg)V+?Ub2wVn}-SEkn{WA0LvM z@!pWsv~QeJQ!P%ZDHg}y@e%FtY7V(SOTIhG$-BW}u)PfZ1N4=f)LG%8){04LtZ-Fb z#X{9o1gNqi+)!Q-XDlgCH5HZTm6B7MGmxswTVC$|d$m6{{T+ z%6B-$l^t`6ExBn(bkQS2q6*(05|#h8Q*_?1PSLp*r|2AuqZRMa@17?H*J{yV>R980 zJzxV^+Ca|P$U2)dgsb>n>$2u)DyBB4u-RMr&B4lPjxuC5CmJ)FGEHfX1?H5-3Y(;c z2HW`hPW#xpRSwa$+Z-dSk2-}{-RKln@sLw!*Shh9d-z8xa1g6cg@ki=1oWcs;?XaD*xjcP+@Th zC?CYD^uNdJnIGUg3l4xSU?tpb9o+9-LeAMk?%j+2&^umfy)%{E>#oFJUnTU0Dy}z1 zF}*2=!kDX}_r9 z7yCtZmjB{K+WcYouE2K^>;W4I_5R>OzD6sw$)J$`+INieB@gM?Y+DzWNjrxb3 z=ns4CsXKOQ z%C=LQwB=4s-1LGbt^ZUL*ZtctajnHLam^sE!FOdX?SQ{~;N8gMvUOY!8>z*CJ8w7l z{`Zl09^~Ht5%h;+{O-l^(VBUDie?;l)wE-tntIG%laGaK(y>HMxGo#LqfBFuHfzk0 zK8@nAoe>9*$z}f)Iq!X1&bvO6^N#Q3vfUz=ZG*UX1MRShc7Ssy=;wJ4+|9e#3$&kh zILtX5$H=`;l6#*)e>g8qxZXiyE{@cwi<30+;%vEGbeHq>ej0XtxQ1MxB&Q2`ayVZt zyK^0~IlE5g)B9v(la1l{{W4tlstiXymGSVmG9LPI0Cy7Ob34#Kw$m4QT(gI9=>U4% zQS`VIkKd;hmmdvLq0Z0;N`^wkzdq)_PZxS0o z8Tf9kBgk~FTedLxT!*tH_7|UIVtyL|?lXwL9>6W|oyOloVCx~SNqp`&O<#bw2;P*N zsKK~}cDRjpxRd&WyU`!+m7MOU{^FXM$#y6g)@U#V%wfP?1VYd`5_!aqhZ$@hqLW?2 z+57q8d-;N^FR)m9p8?_10bD+g4e%Zy*4F({E8uO#-*R{};f=k67+fU=_erxJLn~i!gv9TqCh4?7D8_@8NS}cYcB9t_wsMo^S--y z{}bS)0bDvuf5hiKaIeGXPQq1lJAIM2CBhpDuNS;-@J@M(7(9#q@FM!d%W%I6{tVs% zgjerk%LmRZ%qE~sxN?KYhlOGc+C)B&&45n-Id*&w>E{rAe$N2TUL*!LqC??#58RD} zsu=FnYiMzgqQSsBA77`#JMI;9s5gj#mGfOdI7vhYb(oKN{0H~~{L3D#Wh4p1G#1RB zBroA08#I6wU^_SjP7J_eKZDHAsykR=eQwQnt@=X{hy~Ufj6zTe>Om{ndl%VrFPgwg z^oX@&u^Y*cx1mQ2)?s|beg7}n|L_I5|L1xL(_TRz_>`ReNAw8G|5Jxyahi($VbvY1 zu;$!WJ!S#mOpt*Zj8qucPe|VQRd7C; z#XgA^^BCFx!?edW-t_?AavvW3KQ$Pm&_5i9p^2dZi08k>;(N`*S07>;ruW#N^jG!< z{h3;jS7?(L(I1}Utd?i!8&9EwJb@jLp@Td^-*|{Nc@TZP3Ru?VafCK zj*rkHzT+>}Q)DdY5reRdL30>N{_lwXV2>@v(exaCg9BaaZTjZx)aLvNE#^7)YCT1p zJccIpu*&eO2EQ8l_Qh~@p}_SL^;IYy>*3oD*8zINNqBBR^SDYxo@3Y42Uztrf3@!9 z8a&nJ1do>S@VlTtjI==;bI{M&@u^0kWlbTrZVW?S@P@z{178Z?mIGH2TorKD>K670 z-DGIfjfQSrGA`Hk#&tSx+^Vz2eL8JCu9L<~I%c@4qlRa7*zi6o(N|b!JyB-xgkS5U zWg`6JxJ<`$-+Q!CA3AaVqrp+n8^`Ez^K?CEw*bAxPj@(l>&lRL-83{^H?TkG;_wn( zaIVxj=X#xSZqX^{PMvV+({YzIy3S>@4!i8tL6_s&?|g&y4!>8shrOVkLqF2?q2Foi zPyvlw2%8*|BZzyl1kcTKg`EmKFMyG+;FnG1Asmandx9j3#x z<8;6^Rr_4Cwa2wcyIjk))3sLHU7NMlwNsntEYrq0>$Ki=ht|0s)*2@JRkQEZidoNS z*~|~rJL6mR%s}a#&MDB-24Us@&W)UtA@xOXrZ;fMLZ(`#oeOiFb!tBPgZmuxmPOj< z5robXr5zrL+UAj=&7OJM=vkr-o>l7iY|vWIHm&yT(Mr!XTJE`3eI5tZ<8fBq?zgLJ z0awtx_tie{8$;V%i=hoIW-(ff6;>_i6L(_ZPyIzOQ!2O|z_jkm{a!Ce9ri|l@SUlh zzV6!Q=cmnnVcOsqtA4)}t?|p&D!)Rl@GIAHzgqSAwW!B$iMst(8oGQp8#;XV8`^wM z8(O_@HMV#?ZfsiguCa09*Tx1CQc8FHTjnlHYnd*sOpe6A>>WB|fTU@kSht)|M@rKa+jwdT^8oi-)W$83tCF54DFJY<_6{-$ky*gtLZLjPlv7h zc8RI?+b1NyY9F8Ygd_8l!C&)~5i^nwmvHx)Rk zu5hfX3a6`_y;>!OUMeaKR6${c@(L4_Qm;3Q)XG5(#ui}sbx8aiyRgik?7}iEwqfamc%62C6299?=?h?A z8SMa8fW;N$8I{yJSDRH-?V`MziOQ~-rOX<4rPuf>wI)Q#HPK3{Nj4pk4;#`dYh24eYU}+XYB%u@3sple90~#{~xvic|Y0)=2~n6a|ZD;ZU1OF z_j=(w4&N@&4|=P}_iD-g>&SIj4(Bwo|DtJ}Qk$kLxoMsfn!FU>6sXvyNJTd#D5@#L z5Ybp*2y3h~hBP#pg6g}?0d?!lezkjSe5=pc_*C9y>s|hWt#|3CHr^#a+ISaPY`hBx z@d7b;u!g<>4ufs*t_DlsZfGReAuY&V%v!%4{h?!&;yR`%8Z9QW!&4C*{0>J)m_j>Z z72J`kp!Qq^w3iwD+8T^Li@S_ot!qpRTXvZ}n@*YC8}2Z>*F9%;uld;QUiELYd!@zf zUNMNL@%e82JPY=NO<+0P?Qqw^UED#kE=^ zU!;6`6Xn&LrA0l(^6aUV`_c}>{O(nTxm`PqZk;ELbJ}k+x-Ndk=-Tp;(Y5IZqici3 ztW!&>!NzS*{K|bq7$!pydEn4R)&$ayi z=UP84SR1POYhyKcZJONHa1O=lD$QQqs+kBG(^qWOwB^S%weJ>9S^A`=EcsAVI=?ea z>9824v<>1~JLxdK9s%3HYS7)u_1H~2z@4*-oNq08?*{VT%?8ciHbidQ#>#aYdqB3i zY38ed8J*_wskQL2fX8#R7YkH&4>s4?pgYxKGsHEPYH8oBa4javSV zMlbtSqkAm_xQd@=NzeC#O<)D+>_ZR4@8Wgj{u{}Aw@~}PLt40-``-J^ntWiGCLS24 z@du`9+yOU@J+M%t4+Ln`fk=(mpN!s-uVMSDHDqsxoc64h!>+xu-*HZMTkn207Nd3bR?)_gUjXhz~ z&{IR@bZU$oPED2lsX4McwNN%E17$uLE#nCW`4bE-$7$$ey$pOfaroHa28(@9G9kZ* z82tsL-wohmKmUxs+d)6imu}*kgu4Qta}LlB@P-}d9F9}y4`|lHtZ7 zi0q@mWCr@#JbC~o!5g9h6VN3F8EfI$^{df5aO5l>K7AjF{;Syc5%zsGfHPZ}qcC$j zSVO$KNX;7#QGbBhnefJ)qa7~N4mY9$-9-JtE$9!ol6&7S3TccCu#Q|HZN%VC_5$&CPdI18`MU<= z@C3Y1gJ;0=;3e=fcnw?L7|Nn(9Gb{XZdNWtn_yx1G4c4ghFXzc7Vz5vj{Xgj4<8u7 zX_{ysKCg$n8=vclaRL4&-Ag}!*9$-0;2fxfc@7=w#osiT*Wi5fH@wZ`d*DMrc-bg` z_Tr2dLO=O-0a{2Xdv7w(L$Kp#?D&}u@bdw1Y5*2ZBipj-4!^^yKez*H4Mq&G>JNFK z6x5)9G?6W~p?@qPFXcQ=bdc}RAFOp4Uy)~WHyM542)X)cw2vFe8}CBoEG@ zSp`o&A=nJpPBPmA@EwEeJbCkNgzE{khS`H4x;vgOApD z%mItw4Td)s&QyHNg|7s@D!A$idkZ`rd~-KE%aFxZlP9kyPq)@#?4f%c9;m~3j*S0l zGUg|-;Bg}JDBt=B-~KRo=r>sGtlZRJ2n}Gs4r@F(gM#+>Cug*LrU-mYfHMQX^6{w@ zo@&0a0iRmn=|J~bO0<{LMc1GYY{cswaP5cV1p2@YnF-*e~#ACj|wJ5YadD=b%j z!+`!Fw1)v$Z5adqWc;1OCl=vj5ZXsHekF4WWWiGa5Bor{<5BE*7#-vx?0676?nei? zm%eegw!pOquIu2rFrb3m$R{qMq+GzW^Hda_gXwozhM+%){9ot~;v5PMr%lK5?is{% z0ldD%F%-U7xKi;e2Ojo;P#bd>ee@1=klV213VO`V=pdKr8#kZ{U8G-Jpgqp=-)a8! z1lAs-7UVj5>`{925tt7D2Foz`?Z~}txqJ*J`jahwyAa=r#MBjDPhuH>ZxQGo34B{R ze&wR&6rp%kpt01#)r3!N?1<`yZ#f!EKYZKaItb5cdg#qmr#!?J^BU^mr&#wr_>~qL zJZ+etd}4nXWnDv%JF(+cGeaEh@(6w7eoe*Kxw>NT(q+!9xo8uu^R`JkYn!1{c6mBs zS3+$`CAB4WI%40VLk^uf=+LMA4r{g7VXJm?X6+7#Q`%;Ki?-N3q)oQ3X@l)QwBGgy z^>bEjzYVATf`P4dqZnGiYYcBsj75JK$*}L>M19Lx-8hurzi@WfnGt?EF)~cojf~ad zQ7Jk&DoguE7ijP3Qff=8wQF>vc8qD$wlPb!Wy~sV8nam&#_Uu7n3GyN`ev;j^{`fs zcvH(qaJtQiU(`2(lZV(BWQ7IJe~l;aoreFjkk%PuZ=25aH_@o;$GYhB1kQh$Lu=gfw92hSE8MEI%&k$qZtYsic`Zxa`qk;SQ|)fo zscp^;YIS`;&9h!r)2z?cIP+&Upv4RTJ?1BT=4f=i;Kfwy!!iJz;rVq;HwPBjYWISX z)V55~CJ#5XmPK0Q8K_mB5$G&&>hnxhk7u^JJqxwOvs|5?b!zuqti_&7)#|xM&7RxU z=y^o-9v4;TexGXQzpUzcpQ&o@PpX`Y0`4}5FJ@DE3Fni6Oz%PHAK+L3Yde+zJG~9s z;yqmbJ`=UtXO>o=#q|05YN>CCmiR`i(>GD=z8PBVo2ORaQZ@V5sL{7s^}b6~=etTZ zzFQ4dK8FnzKGz${z3(-YF8Y(9WZ|cV63-tE#U2(zvHKuCB?i`7oQIj(Zef}|4Gx8| z#0ut~3U;jvbkfSevFZz&uI`|D>J0KydvKr@2SunQC{9g5scHzyR$Wk$YJ)0O9n`4G zpiV=1&Az z(+9x5DAw56(vK}G!tB%&K1yBTQ`8ndM=jx=Y6|yLLwKm_!edkuo}{YqOjU#zs64z} zrQ!94;_!AuVfZpbLD(i^Uf6zPPUtyPcF0|(te_W6nSq~}G6Mc>%<#7uGyDee0Wp6G zPPS+3`Z$)K@x*|~jo7k^zxPC&)fr9gLCi!o#mrKD%mUTKc&jQVNEI=WDvOC%NlcoG zV{%m(Q>ucPT0>sUVna@JpD`uF4Y^sU#slMG0XlNQhN_ zLW*(|vXzriY{*KeHe@EW8q*VcjH&VKP04Y4O-Zq*%?UAgnB$|KHOEK(-5ekOgDF1D zVu}wP#9PGtDL8LUrq(@`b^x2fDqb%Ei&I$RrZ}oPb+pP;r>Z1%jtWyfl%MLW+|&?d zr$#F)HAxw%SxQeWG^D0h8B$W4jY%m>jR`61OmWG(&9RB6%rObKo1^2NF-OIGWR8md z-W(NWF-1iV;tgW{C_Fc%(HGKL_Os;L0G9E*9W?U1I@3;NStC@GHA(qdvy_v?`HxxN z%E$^*T2`b|vl5h?m7%1p0z*Ppg&{t((HNV##2B5i))bk(%N&t*(j1<0t2r#`X>(}e z-^`)$-XIB%3upSPPbd<{a+v<#OLW&!mFS;OrCTpv)`g z!1O200jVFF15>^;2PRoe14!gI-iZ6X@LhoGU>@shuo86TlkXLfa~842EoPlv;-s|F zF-j?&s>ISciZAt4Y^k4ON<$S@8l%Y46or@OD6F)^5K>ZS2rBL{1{SY0`WJ3D`4${A z`Q+VV^3Hw2rN^(;kJFHm?jXF${hDWp15!PSWhtj<(Gb&>q4YUESZX7H|DZdg>l)wrt^2E{)%;7-s{dn{R%sc)J#d```)X(h&)R_Kc^o~4@Tw5iVHEnWU zy-Gt@?b47Hr{&alw;X$3l2iAm8q)QHhIF!;@8B0R;W`8cYJqs{!tcg*t_S?ifje;p zH6Uxqd)K2sY(jt7O5VG}UL$snkn_$78n$zWhVGoFAv?U}xFbjoJ7Q$FJzciji)G%{ zAmf%E88&Zb@IT4~bTf`V&H(rhjs7K*(f5d7Kk7^ul0+x%0G5Fcxa;69UP)hEiw3ii z+g%N$}#T! zpWyt7QwA10jsQE(jtA4gT!0;CvEwWgz!~dBPscvB6y1XW9^->YZ(}fg8XMmQpJC@e zb&xjR3f6+9{b*r?tsI|o_Ro8WEmz8S5B_W5)obs=u-%L!nA40ilb&LehwPvF0!V}DBne`~G& zx7J=z12LdISRbvmAivjOMDjcdWPu{GGwWWIIgWyD!lnJ|~O& zjO^}HYC%3G3;c*|^aBF+F8{qvj`vr-=`HZ)|6q{YTKDk3`hy#Dy@&qr4mtN<$u!?2*L;nR`!bpGOXRyRVC8ds z_vH*y#FQihv%{5S?qWkJD$Xj$I%}iCFg#aI*ehOGSL|L7n;i{GEXjyGwa7s>mdrUv6Nw3usXbobFFSJ5BtK!3Om z{ox9I^k(e1j2$;($0c-->(N2Zql277n>tM_PSP)q^WRba)moWykmwzNVIQr$7slT) z$Zxe_P+ojpW_%t-PaaP^XVP8^;Pinn1g>cGl_Yw520Xd=RfJ!hMS&i396g5Y7Ja1~ zz7_DTCknf;;wb;WfZlN@k$DD{=mV^?Dv9g`fpsIW7~p4U`4}5sGm>}m`a#*jJ%U=3 zN%%P%UQamv@hyxP#=?~hS0=l`@^sKpto?=x?KRYCkFi<1jGfwH?9(>WI&CrT&_?57 ztv8-WrMi!FCB6U)czq6+DlE!?jfnvmSj;|Qb27N+AyqL{lj~;#(Aw)4d1R6 z!w+lOunX!PdbgGic~;#{AE?Xe8+AGUN1cuqbvh7-p=dCp8SW;Lb28;znaXs+^V4Iw z1>rnYM@EiEZ<(pxV-{$~SYK@$8=}qQqO@^bqSlW~)4Fjv)Rq)!)%bF)7+6(js9LuqbzE)VkEz(jqmTl`{Zoa*K?TC zz)4q@0W-<}C)-imGD_Q~pg&A?)B0%(wRT#7R!HF0%{fz3S-LBfHPpf*$hpL|ZovJ2r3Nxsj$T><3>2JV$ zZ2{9OQ{8EB*qtQ@)6ModMs1#r{@^-6tLLCUxGhkhn~!?jg3(zb(OKfrSyI*JmaSH| zLN(8=P}AIcHOy^S-P}Ia%-w+Au~!vt^xQeOsnqo;mCXJ?C9}R&@l1<~(PFIlJN~`| zFUORP6030h32$3nGv_^Z_;OwArKYV=4_gJ*{7 zsEw)dELD|fttvfRRZd+@sb{~6J$I|fP%0+`2z#PtUn#Yvu zjRxb#{cfhWLrk?h{aOC`vZlbEW!|>x_8y@Q?@3zhGh59*?rQY$QN2%)YJDPA?Gvv` zpH!9mMbHeb*}2XP0ukk1NaT7G*AaTp6D4D#PO&rMq)7_JToJ z>wzAJ^9sw2Qz674l=Ur-Yq4b+HZ2J-sV!ibnghnGAz+4T1LvtKaFHqk{Z$qirjo!I z6$d7%FfdaEfrZKotW<7bld=PsC^K-4G6HufE#R0T)&FKgitnR_WS@5oNnT$Yk{106 zEQX|o7QZ=0w zO67+}DmOGi*`evm3e8t$Sh>=}8k82+sg$r)hUC!ghQyHT3<<%P4e^1G7~%rnHpKaT zZHV>#6<7?hJ{H5D@%I7vFGjM|jHVxgb)XMhIsmuSRU1j|LF7o4Mom&t)GXyk%~x)u zm$D-Rlo=VWbn0``qEeL_m8+DfQYA&zDKV-;@lh)cv5{L1F%d@%(cw25qCy`wM27s; z5E=9@LuBB843Pnr0a&%Dd*C@A!!kUUc8Ft51-g0N0_yl%MVyU_uM7c^zZe1{EQWw^i{TmgZ-wgw%e`GJf!CqQ^a85}RGrHDHjO&x zbn?y&2c>3=QWCW}@tLy}o4G*InO=&@3{*sBgu*l96_%N~>?H$jn{n-iWtS7G@J6_W3-;QTNJ=Eo`^KUMyDdGgIGmrq`kymFUn zQO*X#!t4VE&&&%3kM#Qu?y0XEJW{?icqIL7@JO^6JQ6I1YxsK+zJu^>2FtT(2T%uh zNgiwLeDbYAa=v1tB1(oTv}CM;(P9EiUFBcuF27PA`IH9ByEIB(rAb;;k}c1YQhAg# zXhHE3%`fWLyuy8&n}1$zx%X;L_N#Ks{6cQ&KO5ZAEQYz@o($45_^kCv{kiCopcU?F zxC@KOb&9#ySxU}VLGE2`FTd)M@~N39ubP=!STj$aHH+k46QBjv;hJ9^uesG3azl%m zQ&p?km7SVZxkfX~_h?4hSxqmws%eEUYkK}?nx6NQX5_Mp&tY#doX6qYQ9wI@E_`kR z6>#U5vHt_^xN7u=T58T4&GKj-ruofdHLrQ9+?riAr`cVzn|(B^IYcv?W6(NMHLWRM zQyZ%^xv@=?8dhpz-A+xYJ*Dy0cWPY4iyB|{4^1ffQ4@-}qX}-r*Te8_E}S)%;_GNNeyiLw6$2F|!HVtifR>SH(mUGPya;|1YU&Xz(a@qmDey|j@R+4K~ zp~2vH2Hf#*hr#XJMb5Vr{h`mO@hhA(X2mFtTEQNW6*Dzr#XLE$@Y1jqK^nR|T29N; z^6>;%|_1P+&D|7jSFPhz`(zOL2mKy@v_l9KpVai~nUD{+NOAUtEMg=s*+W68ETJ zlKZbfgIPli`q_iBk@Fw6pg(LU@7-y~fHI5;W(=4NW`PC37ck*i2b*1Vu$}dEHk{bD zjRrpf*=^YO47RvAHvQzu#s5o z`lq&b6DH6DTH&rD-udfk2mFnM*MBd+1$%(p`!Kopb>!a1$-Pg)e;S+x=WLj4oX|5y zfblFwr!(NpM|{Q(YfZ|fEM8ah_oe(hu{d`Ieda0)g=bkbz74ojPpnsiZknuV17jm^ z%c1EK;SE2E21ED;G???~57(nV;D*hOaNh)O0k;CeEA|NKj-gCGqp3TY!p+IKOorHT zUku0tjV#&;?o|SL_YJ)7VelHCeMg(|c{T0aMH@Hba~b|-!5e>$eK2r(U#1u*bs|MzF@W$TNM6E@ z7x|JGAJf*IToe1T0iPR)ZyCJVH`5p347h`KfNwfnW8oTl4IK*eNjh14fhWOJfG$R& zJ*bJi!lU)9l)pNng^WQ9naRE!Z;}||@fCLbvx6FuO=u(7F|aSkn*Ca94XpZuRRgd- zP6hLT4+sTuAdSo{mrS*UOtFf*yq;{Wh5WdKjHw%KU>TakYO>k&<@aCSiC^Lcp5F{aq40o zRunvm=n(1nlS_1q@Tm-cs<}Mt;c4M=?SQQZu2pbtgku*`IKohJJ^H}i=mXD@v;Q46 z;NR#2gEbh3c(l-st@s7}-P)yz#dGBSkI^q4B=5h6x|qAD1-YI6c?C`AX7q;}(H|~h z#|7*-j~!>R<23O&g${B;UDRJJgKHh^+t3FN@ZVYVj$3J+$5HLxp|@C1llc#@4B&Sg zK8F9#;3>422Q>t%$MC7C#Lx{s&Y+-8&Y=mNrf-};e>jF6*I~yI>^Mxj97G4%j}EdA z9b^w}vWr;kK;_s5`xc_G3F|k|YW=XRqqWzf`mDhQE37vB6TAhU1PlfEY>ThX@Q%lW z8GOS$_`Kl@f-3^9IKDXrzcS&3a-jpdSeT%(8X1{9Ig#;?8choyt!^$7%#lM0<8~$cyh;d>#W%{wo-xnBO&Y0}^40Ij_L|(a%j~NiHX+(- z6Q#|z3EF6zN^MD&`t9d8=V)b z-`QVlUBa~5C7RljL~2XYs4K})&xj%|9a*6zBkR>Ua2uUMo!@?#;fKx=g3c19rWx_nl%%R|Mz(5a6sdYl9W5ql8{4kt1vGh3Z!SVuZac5n_>lN73 z>uReduIzo7Gf}N*F->ms)!^o(I=2ATxP__8EryzsB$dz2Q0d%!mCP+y5n4(z-ls)&5vfM5z!}Wfp&wfqmvp!eaOjK|*n6&8@y@9{?!+CKbOCm3pT}*N77s1c# zrP$NCz^K**L)GLyj+&Nfs&RKyrMsufJ$zN>!Ttx2NOYEX6?mj7-y>JK9wo{_i^=kA zL+@CobdOC+Wq(qN`voN}xKD}mUQy!Q&z0!*lM?4xl;~>FpXRfqfb$&F`~j96TfiD$ z{O5HiHns3?4eU><@fxKHugNO)nyq551uFFNQodJ!a=pTo;}xSU?__0qQ-8qkDW!Q= zE5*B2$v(YG^xmih?*oeSI@I49$J0t&)yW|)AqWl6sm45&yWcjmW*N-!`@be;E2P3#1BWZ^y z`T=OPS|PEV0U4XEz}ON6 z#Ma9{woAS-YvdEXTi#KpXDSkOGn;<(Hf&-{egBBo)g$ zsaA`UIY z667bLe_O6SY9)JwBU12wZWQZq^uHN7-j zQ%lM;rMOv>i~BUGaEm4u9My!pn>9Y?NlnQ5P!lu0)5LV{YNnC*TLD1zfGVL z?))5b&OGWb;EpUJ2Bqi^72I>Kw%5#>5t>muUeju)X=?2pO{wwFq#AyUlnZG>O`OKn zWN2)4vBp$4XjIiwjjY(H5#@*FQhHe~#gECQ;C+qA|3)Kof7M8Im=o|>>ycK%*@3_H zpbYL@xKoSCb>I$#+qatByN=wu(WHqjLo}{+l*YDB(&*Ni8r3pSBN(y&#*a2V`6EXW5l~Ci~JKyp&dah_B*l~u*?+k5n zn#Mj&gPvT+d-&k-D`+!Mpv}C=!0?Gy5^L*ztQNQ{m(vdToQ%Jb`0BrhesF+(z{jk0 zFfJ#E!6|sp0Ni2m&twA#uh=7`8|foA5tExIa8qU$3qyLPALek}n~o;`@K4aC5(V zfBrq*)s}hAoH;Y^oM&g|HNzDSkKZ*knET;-5Ih1N1=qn70QZuVks!G8gNPqL(uFfw zPe2dBj@PTuD5ijUfX0382(MG<%Y5m(uWHM7!W8cLhZq;|an~Kh0(GpQetGbv!4>xy z8Vo#M2FG{6GvGP!Ja`ej1YQQOfj7Y0;D;V0jzO#oAx3_Yg%(mnGTFxu$~5|~aQ3fo z@Tcd2u?oQ0|6%A4#u^CY(O4H^>;W;(1Sta5WQI*-W}RfJ{bcS#=pVCDGe*gv7o$fk zXN|%d^ns1!t7H74ek&RN0ewV9_8}SC`{Y;eVfc?(vG6XR{2?V8x`(k2<5lo7nBX<^ z2V?z%u@1x750e0AI@wqr`bRk#Y8`x&DWMCVL39t}o+NWvi!n-$z8Ie6Xe?{UBsY?; zZ-r+!jEB)aPQ!IK?s=587_Z>2U*Nib>RA|`0Z#*CJ%+L7;U@XzP4a$Y{{=hws3-RU z!56H;j|BCGQ<~Z)>(ZB@vME?JGZ25vV`4nG#$hw0cwXXY;SJUuyhh&7 zFO=vWJ$eBrueQL^0Y?uUgS7S#U6Xr) zpo3h-j=NZgaf$5u0?HF)NQ9n2>$r+V&!I*9FSLj+@RD(-<(quMXp4!XhTrG$b-)Qv_|uy@22-yXxGZpS z4-Vq<4En=qV&fEcoWPD_*l`3q4r9kb>^OiO>@cF-={H4N)ql*&b{^;iYhW^|9;M92xWj!P&b*J-KOiJDKxS9mQWE|y}) zmvH{riQ$GJz9y0cR(}|AsF^E|=iD zL%EcMafu5d_YGj4>*cB4Zb9tfiP4ycMVmb{w9zY9>%EJ#*1JM$yd7HQ)1(zXZCdWr zt);$$TI@Tlg}$Sj@3&m@{5ERDZx=epG0pP5q+y@yn&I_^hCEs6<@rZV_xQV}xs&_D ze*^COxy;?bO`RKFtXm(yXw#8V*~cq44#Z9<~#$}(ChE>d6ITJ(WV$ADX|yS5zXqVs5jNd&B)p!{-U;U z^ar$<@5S)51DCf8Dfl3jDTS4~^SF+`S0*vtN^sI5lb`0B!Zp_%ui55g%}mJDj0C%; zClqU1LZt>19O_SOR$pSLdJ_B9g%;CkS)dNfYPDImsx|SDS`yEz+5CtaO>d|%?h`e# zCZ>`57B!;7yh?o^gzs!Rd3Pq$Ex>IywUWP=V9$Kso14tJi^;*PX^Gafltc}tq^Un8 zTYV`7tS2c|7i(iWQybNh+NL)4DNRO;X|ax?b*xmQbxifv1FGXb(vFmeRh#s>Y7#$I zO~O~IL4z@HgSuXU@05+{6_@%gIryK)rTje~d*)!za0cftW&~&;BSO6yCUs{dYf46@ zIx_5P%P3ZBMx|OZ>(rFls>aN2HQ1)B&Nf#L+j7;|Hmf>wzp66sP(}Jfs<5*AFZGuy zPx-Sd&|oT(Zt7X;dKtdsaPP7+&CFvu&f~&drry{x15C|!R$sQSy0SynkrS)79E)0W z($t*8{U>t@RG(9(x|~`$a++0>J4MyGQ&p8aqKe#QD$l(|Wx4xQnsZhqSr4k%_KJ!# zexc&@KdU&6Uni`rW2By!;6DQQjsoHUECr+a%iiN_6;!LDpi$)o9V+A8n$p7ADlS~AqJm9m40~0Oe@6NC2b7olvhuQj zp}ed=Dc|;u@-xZ1spmO(4i?fEi)0n%TN0=0 zk|b4@q@%CosH~(&r6rXrX0KFHX`2d5`_VdPDZg~F?4=u&Te3$v#ix~Bc)zj>zN_rK zkCdJJCuQe+qnsS}l2TWWEY#i-ru?O}1DIci4?sW9r|{TXPVQaqqS^{yRaJzlydp-W zm5C~;Wd8$ur3%?AmCs(OJoZZ2D_hYydX-Z-tnA7~XbkI>S+QFg<+m%n^qSI3UQ&9| z&y`;A-^$4UmooBh>KcAO1^;fi*gBx4U@jP{WPS@KSHWM+@9s5d_#ITpxTyB3XMmr|-vDY@#Zk}GZ~x$HwF zm;ARC7h#h&HyYe(PVfV8p$=9_|3V6dG=&y+0bG#&|=a$qLtc_ zpp*`)k~p`<(!m~#j&dcS#hB1y;yVTv*FIOVZ7UTsdAp)pk1MMAUPU!NugLoM747(g zqHF)D=o)f=xc9=jsh)NKBk)dxw;Qy;i_E2p$^6#YLC!UW+`HFPmcBqG^hL0qBVO@+ zNs8;sP;8%FF?}V9>Z?^GT1-T5pTc`)E3{|1Lb|sqc*;=)bzD|Z+p`L4{i%YRzf?%m zKNN}%a|+&D;af{x7d8`%U;y6ER&t#-;sEYaxbu3*x%&Ct8!aYw$X8LE7ZWiQt?;1) zg$<=DWGG9)Lxl<=kOGF9U!5%{*kx01S!f+2W~wMcDn*LU%oH{3|C%ASf3h9uKWa;`ZZ3YhE9dX6yp&W)AN zT#LNtrpt3~t~^Fc(K;M*9ch=#oT+k}Js&|~9hKWdBcCOyewab$4LaQ~Y4AVDsrPHK zWkeg{Thhsx0H#vsZn#_Ft{uP!aOcgy2ea_O2*3T0av#)%PV!vh&3cX?IW39c%kdx? zWCA+cVgh_I4qeoXb7zxCuAsy3AhAD5V_zfaU&5vjxCnfXU4PcjZmtjTEdVpY0AHIz zy&K`Ci;Lk-Y)@4R)rnsuv29p>6zRI!g8|K-FNe^OOg7ehT5 z#W;F1ddv}m?lSg0i)}x_uHWGN|JLR~-U9Q$w|mmssJ8>|QtE79i2kqy{Q+K<$trLy zXJBj)1DXqy3@;GCg)Wqi6%7(-SStcs?u78Yl|UP#Qd_9RX4+%ZKAe7m3&IoF_kHa9 z1pB_wI`|fY5ikP~%4(S>RZ-`{2A-4WjtLESqFaoB*-3fOC zbuOpA_D!5aGDaNo)reiJzt}_WPgR&)qrHHm;FuE^U3Ua;AM}hs23_Lgb`uk0>^O@Z zXKMiseg zFYbAm&*6)^-`7f9vCo-iTvMK z3&M%Y)YCYJGZC%~w17Og%FtK1A1eC8TjZCoVaF@j@m=h=fgLYm$Me|nEOtDN9Z!*G zK0)WbP9{8Y7RJ4V%u|HUJA}~Z&f^>+6lYV@QFs=?vjU#=aEy_+?;(Vb(`_!$eI6!ve}&fh zg#7)Vbgi4@(-XK!2z>+o06qdgz!$HejXh6XJb^DBMT>b@xrBW&x=j@-LOu0pp=)%& z(F4Z-8TAk>v*DRXcU=t6N;o#sjkzZXzA#jfBYf)+%ECcf@c@x!oPn|L-!N?WZ}1`b z9(W#KTqiE>M~k@+E#@*>%q7->T%b+Pp+DR~Y@ET4)7WteJ5I7b<^*;e$Bv`eaTtFb z$wii^N}M zcv9iWf};SAQtDF;M?H0EL5FCEr5lxD0Ingp=D@W8o)wh8fe7U+3cj_T8mz^mYlzm> zFs?%JTLu40@CBUj0=?t2&G_RM+GQhgx@e`*@oV6fWOYtO7{~nH`wL&K30AS{)HxCoL@Y# z2IUbKhFcGYVsMz@Wj}4Qhxp_)FgRo3OM)wd5g-?yBI;ERLk(OFaJ5pOPDY77bd~Aw z&4z0s9II%VEm*XVRVt@>^$M%Zp3_WvnsMszFs(35G;$T!GoB3Neq06uncoI+X$#UaK$`ka1?#b1Gc>U;1%2KR+q_c0)4xT-lR7mN(64C$ z!x{`2Re#_L^p4Hy4&0}%fHUg!zh51`FRI<=r)u~9o!Y&=2LIB_)cO7hF0C>6Kc34u z*Z@{B%rA>#ZV~RJg~7fW4Gz=XkT}f_P14NJbj=9O(NI{SriGQEv(#uHtWka8ZR!p0 zQFr)sO^KMRj)-Mw9h)>c;x@H}->#;xt7;5=Q4PUAQ$x@n)DZX&z<5pl?}6`xnKnpd zdSqdG1C}OmEi*B{;?g%K+LOIIK^lsV(zIx^24hmyA7fK*OrCmTiqTmr)fro-j@TBp z#ZFOc+*Gy1%~4a_5;eqaP<`BPIbu(#Hs*?IqF+#Tz(lJ zw=(rh#|L0Bn9F5qu;BNVcmkr5KH+QZ*^{ zsz!^cLW`+L9Y*U|sM6H6DoMRn#VIFLlzguWlb%&U;(IDIf1yItKfp~rNIg%(e_IAq zP8)eQm=9)w>AcpDEnV2up6;R6v;Z}wN2oqMo;4&%s!2~*b$Yfc(>Z@Jy6YAsH(GKSVxkevaD2G;vs>19V6`;lB z=X5DAXS(b;qv#8(&>6NXEBlyiw#%|{R!wI5k7cudE*tA|Z0wP`hk71^X9t{X?9A)H z>^$0l#~#p@%iM~`I=hpq?cS=$3t}Bfq>A%QD$Gk(L0*RP@^WR*FIH}Tm2&bMm7U+I zto*66<u z4;TPbKr?U@GRFjEMII_D3SccsxblkQlv`{;Ut#}4akgyimC7usPzG8|dP%#|N(N;u z89`%Mq2yvl*`mWrVqK0U|0!AQ?SiPBQCq&cVN*dfx)qGI-_|5eLQ0Z$TTV zD}le1e23{^VVSG!<-W?P2vJr=H2O+{GAdG)R$-I1B44SMWlBMdNvfPI3uo3OR?JpH z#WI=8x5!j>Nb#i?70-vGP(zX$I1@GXXKHhhC%3TT11wu0Or z6jzdOR6EIr7L#5ZB&#D*DUNt0JCbE_WGK;LSAwHNW=D-ojuyo^dKFtcQ!%wm&=_t} zRP{kcRb5bI#S@AweMeCxzg2YcUlolGa}xfYaIb-HK{>Gq*j~dvFLF+JtE%xq4Rifk z@{M|O9<&$>>tYg`!(?iXReW=z;+n0BZDtQfGkY+aD;0?r6VcqI@TM6GZCa#|#*GSY z*stKaa|)`xuAu7gE4Y$V)62hBXc@ofP~S25R=_v6idY2Q@V0_Fcq<(E0B(CD`9?E2 z4_ZuIhmWE=gB95ssfbRK!a9=`+L@`4&O8NomMIV|CZMB3{vFfg*FImqBu+k)_sF~D zth|~YmRAG!Bdz;HzO{drU(G+Y5AKcdEvBxs;2Z#*@HW?>f52S^cLCfS6@~W0Z@yjp z_TKBRpniV^^oPp7KSqB2iSq5Y%DX>DUj4=L?5mM`U#r}D`{mL*BIll!a_Zhe1D{0X zx=NtEB86d5p^yl=#l#`EW*7J`Nl#iJRd z0tU2sbgp?gc5WM(O5qC#_6<0CKOO%fwmpem?-2aI!mdA#uLWAfM`wb5&mL@e1_Zad zt`HLpCmPX%iHz44jty~m|iB%8a{8zB= zL+tyVmcTa$OasVy<74YSA*d6q#8_cCo~w`%48dT1?+c47x6-P_h%v!%8vjsT!gTLb8B_7oCy-a zb({u0iUW`A;o@+f&k_>{h>86li|u5Z4W`0v?8mByyOgHOfj4E0wGVKH!si2@Ga*HC z0S`C^PJmP3c5oV;#g;q0(M4#R3&iPN*l{_BiCPtpH1gf};u4iP&lk_p9(TSB-qV6n zY*;`%;_u087@IetM{c7pV6Fwus3Y`)dEz`gnocow|i#5xn=o7hc@BbxP%SU7} zACf_RKt}aGS>__Yj7!eqnycjSFOczmfHhyENpMyYco0mi!T68<@He@Uf%&6<80T=B;7Nre zn|!!XZ>SQEI_lCwT{x<}%L0I6;PYiEw$G zbqjA3KA#gtH^*^-$MfJOO=sXQ;Ma0O|M23I!PF%h9t#}lC@6MHD53l+%CD#V7Rv9S z4#s($18_`7^OyzCJa`tvvl5Ptl)n=_;vk+lOE_Pn3*Eq?kI3P_rd$1A_acG&ui#Vg zL-4ZP;q=F6;qb&$ehTH=2>pD@FQxoy%CD#V7P?0V`am}f{b&;069hX>5+BF41f5_F zEwKd)_7E8-@W|afdy?G!hqTh?J}8fcG=NXhFS=rB_bydchS0M-=5JPzNjJ zXH$LwrIk^KYIK2mhKd$A+R5y@$gKO}na;2^8=m=aEGIHIkooV>7@yosX~tN6}&qk@p{|yx1m4mAvShn$F1106FasO zr`xb&D|T$bj?Ki!CNljEWdG|>JJ#^(D!y%~L(3UJmJvlu=}Ak_gOfh%fR3_ zhMZG?VF~|v!WRfn1U&I@BvV=j9J$n|kouItQw>i&J+c|DcDTCX8iZ>mJfoDq92H_C zI>her7Me>9M~L7#MEe|i`fPgsY;1TJzNdgu|5Gju;S9ICoVonaSGS=#r`9cSb+cG_KH=Z>M~ zeGhyJz6Qp6F@`&M?_iie#ASG=AD2Y1p1;?+xx(p3ZwrMl2ChWvWrYj<0gY@P;}^eb zQ>R*@u@OGbfxs6-PMibb#5oYoi_kmPs@HkDdYq2XS{GQg@+f!{Yd-;h0{_r+ZVdNu zo(N)&&y;9$DBL_Q59ZPyK;Gr;j?Ut*QCHSKxS>C|Cux>@x`sWnHRO@cnvxPt^JEQ# zXPpMTTGZ#o8VK(Jb$QR!6z_%V@L8iapY3Yp{>d#~7uD=>T}|$9s>$_ZHM#szjn045 z6AbC+;XK5!zKu)Wx@e|9U?G>AdEv}0cyIP3^anrm2Y>Vjf0G9NQ`A2xLwy0ctS2c{ zcR;zO1lFiCuu<)S?V23ar4Bh?yhQgci)>qyd7 zACsdx*2dJvmZ>JTR#mahs*IbW^0;X#i<_&GxD_gn8&gr-Ar-JDrXc#d@+03?e#Ebo zAO5up&|xm)_oMJ{w=g|S#s?|*9&n_9`gyH8fw>K~Og6i#87-#46s9^;tZGe(sy3&p z%4}1GIS-A6^A{7URFcrBVzii|M9yDK98rGaa@iB<(Fq5XV?M8J(__kx`<}96K2vt| zUzCFmbCG%;glh}DD^i)JTbX{RQGXu0`MV>Dc?D=la+V{>N7YHes!WPfd6G$`Nl7Y6 z;{FdwIVwypR6%lu^3h`KDQ(J4?N?6fY-Oh|MPs-{nJN2~k$g@W78LM=x0G)BOd0Wi zQDz*!cT>-O)Y;evwKSbnfGM$|zx09*{^oWxtf^tH4=SymD&rnX#nx~YT4R-OO_bf5 zs@$|J<)q~+JH1R<=?>X4CMz?ePZ|6$o1U>m){IR`P2Z=~H1OHpQ4 zrZTeZO3x}$8d{7syG5zlJxa+QR&w?tbcT(xWbIX=?G7bmJgkJYHgr2LyEPrIx~a&V6ZMHo^?6L zX!~vOuY_kFJk#yO0hkOLKouxvN}3O{nG$9eddONdiS-*{=qoX@6elRL*s6r$ESZW6 z&^Rg-SKOf3;!ee|VIsPCzM_iODY9_4A_`6`BJV*(|ZJ}>(7d^{Y!`7-wgj! zxaQ=M0>ae^nt`JLAHZ8!MDAbAT)Tvv2Q9{e7Gp+>F;zqlU*l&JZGd|be8cc@ z#0Y2W-~&(%O5n{e#Rp~N-WBA1RZfaSi-~pwD$)_I2uGa492SK-(iGyzQLv*}foL%S zj#l|Q`sG(UBHx;o@~Pe~@2V5>uDBxavKQrD@-z7seJQ`fzmK0yv>M(~>Ny?0F2FV; z)xlcX6xb4?fELcdY%$BXB}G0hHgt{xd7;I4G&jk; zxm#{*;&5$TD(8kRa;`fpC&%4#s(D&2RX>tz#c$+R{^#+tiI&4Phk6dc$C4H`ff{(r zD$zfx@j)%QH{6y+a^Du#AFxNsuhUDOQv&5aC0uS(;^fk4k#lFdoH_}rP73Vg3urOw zn2xZt5a(_r$oA977a(|?4*z{B`WZ>pAGHy_MQ{yM&t5PY)PpLxOW?LsR3_XhaGToD zU^>Y?yU9KKJmicPqk#|*1x$cWHb8(G!eT##^z)^@ehQw8*tv#a--GUP2X;MzO>ZJ} ze2mk-w21O+GlKT$O2Ghv@XOMeRq()r0xO!3nKM+hnMIb0~v7K*WpkcsSfG>=K5V;zZ9l`d-*G=M9zYLh15Ka`pBKTM)khmlr45GAA7+yK&Y=zF z!M^}321{J&)Lu+DXcJ@7vjRI-#-nj0qjh8ff_pW-ScL;u&gK1ee3qD4a*6LhLpeXf zzF%n$VKWH2fUzg79-p)QQFcsE8>S85jT$8e7P9_f3HkqWxK{$2mq{jrJa%lr9~<$< zrXU6z?AU@GTT{3g(BISpRZT5^9l{rQ^fHiHZ7 z2DgEI-~c!Tj)0@!I5Bb(pPZs?&V(~jHZxHqHZBsM7g~8fjOIaQ&ho|6ck{Vt!8@AC z%Y$%tz+DG-$twE72G-$>(GPB=4dC6i_b)d$mjRyGO|<$zm&I-n|7dm^rA;hMNODN zo<1AhW0ZVx5&6<`^44|a?PKT#yU_}ckiXnX4*vj}$9KsmKP8W!&^{jL(a<3b4dNyy z{|D~m;QnM`5pbE|vXU9*pnDX-QOP=tdN^9)=!ByeO=1v^A+p%nFw7&fU!)hvh@K<2 zdYUZsDT3y4GS|n*+8^ex2if;fW?K8y2Y3wleft1f%-}ocE@Bf?M2{v^Dtfs%LWYxoRREVX#vjhtl;jIODc0RE=N_5X7`sTrJ;2k)h_%}Zz zDD`Cc1l#ZhCy~PuivAEo`3aPtN?mMl*r`h)btr?U3Z6Q6n&D|j8|a2-Dm=5WVj;S~ zYMyOH7dWJ0JUm2?oKCNrj#fF1xR{0wFTr;e81*-X!|hHJmjvU|uo^99C2Pl)(Z@4#B3^qRe8Ka7=s{n*V|R=d_}cR&tdUT!SBIebT9nJ;oHfuzAlXE z1(!QR`bddCt~vNx#0_coRLo>rMRqm1VZDs_KPrLMmPU+I!RmqfV6xa4ze zo#scAbH*_B<9R=?^ z3Erh~OcUd&Kj`OqS1i|bY?>U&TsPcRjbVPO4-J(gELt^TX4a7;t1>J@72!E559j>F z@N$(zI8+qTs=`RlUyK}9UgRR>My^**sD6WkZf@a&=}S#BW{<{SQC@RIWgAA8?uIfptP_*C@u6KI)Tr3z_S`o zV@hoNRLT;1_7%aONY2OWwb)Vx%1!PniT77ge5eZIqm^ee$!2npAwo#_nepH)CQcT7wBTtU{qX$SsYN&V)+GYC&7Xo9N-lz~E!!*nzgq}s{7^U3?rVxo%t z6;T|j@ZxBN7Mm4PoTA`j8yZJ}0?}e76*tPixEqaOSUyGU!6>{%-UaNz$h#=7+$ZIg z{f@kCzmiYpSK0#qQg~;wtqDx&BQsJY(O76o^ z86kh}=j2V%#e_&=;o3wS0kG%GS%dbg!I?@04@lqjJuFOD^`0 z<(m5?_u@hqg=+||Zn!x1R<(fRh*bpa1^58&RJapM(I3jmd#cDiYdz#uH%T6KVREmF zk!xLoTpX!#HbgW>5vT^O6gGfU=OXH?0lR46GZg+Hf&MB&*GD+|bIyh4`T*A;Tpe&V zfND?*3MeWE?sT}5D#*Lwj;v)5NFBK!>tbBdV$>20A_0SpA*QzASfe5>7|XgCp~a|a zHbK6E!gt{0lh}0)yI#WCKO?dG{rGyMS@0NZktUZAi=YyGt3%n~PK7%G?wERh`)(rd zp?dDzQ=1UZ2B*3 z`mGjF$LVE!6tfKts1}riVvqxO8pT)|h{I;uU@~n$kzP|+gVEzc0C{m?!597bVt|Xz zU>p}(2Aiq)Vk*v^TFGlIV1Uoe=kpu62p*)-?}qRh?D`3I{YE4341jjf0IESb$fw>M zO{HXtFtzAwm@SSHsei9qs!M0zG?+5FquC1URRKr~YcMja+8c-MgpqChc*JnEU_b~jkzzCS< z%wz{U=KG*$1Q6Vz40cgWf@qVa3_8n*i>3HtDV114dn{VUXLj=WvwZgv286dL=VJ}h zzMY_j@Tmut_`INY^5g!tHv9k(SiQKn4}5M28q**lE( zv6_kdekN{rGl6~@E#)2c;_E5ISPR_M_`7fhI@kzf!2)6c*9ENP3=o5pa2nTN#(3Ng zc7k1C57-MRc04gS(GFxqt`b2fWf!>-&_ZHJOqw_W8efh32p~xz#ZT`?Q_YC9}KbM zzF5|Uq>)6G@FOO^xPKPO#0K`#P{8H;NUWaG0Nm|!Xb1dV0(TZ~Cyk+lQOEJ~cn!Xz zz~DLs3?8aL5<0OL=n{{2gL?o)j_V;0^7sgN+?6v*CP^<~$154E38~~qfu6oBg7&0Qe_nplT13eOUF)}TvlA;a7Y z*J=J;r72$~bbgIqFtPsPFpmfSZNpb^e+=FR&&vf)ALp0g8k|EEQhiS$<50t zzlQSbDZiPpXosU4kM|Rn)8Uy#=*%Zvml3w>3Ev%bm!o8x7s>XFH5fmoJN*?*oPn`} z$G^h+OYjkdw!KO86TKSpdE!;T}g%VF~EgXH}O$R_ub zUGBw>+puFd@o_7e{tjYe8w1D~7Hp=pO|;AgnAW5E7;7)q{TrXb`4+eiE(4=$9mf}k z@CB<+DL0BCz)ZO**q%w*x!~J@(maa%tm^M>oguLpYxRM)xwh_(5O{1>1>>G5W}6)=O=oO*W!GtS2_sp+Br8 z?_Yx*tBH?Qs$%$bz|)BLxgQ61EGAADp#m(RO-8X|E+x&uTgI-Tne>dAMENi)P=>K# z2DkwqBlkG}zH#n};ddRrSdA95g1mnjZL$IbhU(8(0q*p7|!1IJv{& zOZmanC6W^Vqd&~Tj$!PWfgMBGF&%$QBRQON{P5)5hZWWi&n zE=6#a!NoZc^xJlr*bPGYLs&E)@2uw4ZSkSI_N9y^x4U*S!!hrYhhhVGwV_sDZPQe z>iDDst84ILHJ+^`dMk+Za(cBf%9Jtcl`$ffF(RLZe~%A72w<8K#FQtPDH7=Cc{i_3 z!KMyxH?_NQ-$|DswK_+r*(Hv3B#CNpNmadT2Kq{lYF!Id<5s3>w;ENtH>tv7ipo5u zs?=kyial1S$YV?e?yP}uyP$lRN0smNI;!i(;Ey^<{dd5%I)v#-7*nlq>JGX}h7&+UV!525&#rd4;IfD@rxq@v8FX`~`3B|KO9QGM_w^`j)7~w^~JhjVko(RK7pw zFZ$0@uK!A9`)^a0-x1k-FUsb1T{e$5Wpn$bY%bh=9lwvkzZ}kyD5iN_@_Rr#&zmEe zTSah9#g>|2XH^G!sxlxzHH9s7P3{=;3G;6x=SemPbtOk`%3lsSgGD$>Ht391pg8^XK{%g0G&*aTX?LG zfg5`&d9R$$l!m#gILudtp~1=zjg&o%{SRRlWwSOWD?CfK@O))Plqnr8CM~iJt)pKl zk#m&HWGE?ei!2d`lo-Yd$e~XvA?OE62>i7Y1OCK)rs@YjmbkW9&+aDN!<7j3u^JiE+J3h?|AR zuvGD}n-w2(P;t>06&v}4V#B|$*w9}oF6686Bl6b3F$$lt2B#|lAAmYg$={`*5PMka zplo1^cUOA6udMOGN{Np|SBaA)-l9ZPni5PoGMkIgII0zIZdP1Ek75(p|6peSgPHvg zrUQzMzo3ZN#}yIHJt!i7rKpInw4M4HKS_?jIhaTvu;2qw1Ij@WujPR(kO8cTE=o@H zR$?OOFId84vcxFf!aXP~sfta?QcO|-8b_s~k{cD7+@*-*8462Utk5J>XUl$tB%D`> z>AHgB-d0HLX9|t^v&QiCQtCPjK0}9^oJ<^mN>BpwK@P8H@R*Xqd>@!oJrtMfkFFA; zXlsl2)(av`z)FP(Cnip#rQMG%0nT{FBegKj~5VCA=kn z^Ct>0{Yjhf^CI|Xz+vc6Evdu-CeYGt0u)YP0#&cT^Ak(oYdD?tjiMJU7; zr(l~!LAG=SX60&9R;m269P-U-myc~KI>V^EGS|v0W4Aoh&&bpIpgdDvk!RA+Ve-q1k*_^L-Wcp< z&z7gXNFMfTxpRK7o4sGI_7S<{u99=kPB~?rl#}hMoHB06IqiM9q_W>1{t@{5;c0`T z4pe|5U4QAogWh8K0sn{_JzLy4mjv#vvg5Qj9RhS7!4;;;)CX2oR3P29X1gUT*!W~D^ zkp<)(MdV(kZs;t&a;{(ytz_`1q;qj*j4E+lMGhzdb)b`m9YR=IOu?IR@L_Da8-k~? z=UwdiwHCrZ9ga@E)&MF2x29sB8SMbl;ZB0v1b1{X>kmrFJ1WS1s$At<>&+lK354K4 z2AMj7s}9H3QJK0zP|bUj!5~4lfC_G;p$|ZE5u2XErgyOEv+;d#{dW2TJPuF}3V|IM zI#ep$32?`j(FSk_QLK-H{I|gc!NL>g`4Pwrwk`Ogm4S6KzL<&z``CVL$&hq_7vF$DF`b1M{ zwsuet#x)>#3qTg!sc@SqBC4JkXeI`5t4BNhQ$P>s!x#M?3~oM5HUj8eT->HdG5Iia z0VdFg@x=_BH=~u;rt;omKDUjF(kZ_CAU3{6IloX3A87%#paR#Gz-@;+9d1JdilNS- z@cO~)Ves|?ierid2bk%~0EZoOuwx_;T_cQvKZc0_gN_kw3kl!_IB-5+9-YVgoA}&O zCQ4WN?n{*MfjaSVBdDRBE2{`uxU=9+!Sr}s$o5=K8i0Qqd^5l-K+`fkn$SZQ^0>&E z3oq@mly+H;KUQMLYWm4q+GKqWKTJ@;wQZ~&8Rqp>=otIZXzt?k&!XA9JH8jI5nnqn zwH)p|xYMb#xsMotH-N8s%z?+?TmWz<6YYtK?`j?iGp4Le#K0yOGz#K$%%2HzC>li^ zi9#w`NIsJ_2zFEGtr)#yJMW+7bC2-(*SRMc@km?O!p$_$K7qlw{ ztp?)6FRrefLTKC%HH54*mbDPc`iLC!LuS7pkjMOtnfm+8+}~q1|06O9)GU6*pGv+u z18rgs+3b8WsHNy1YxDwH;UnjSFjU3_?@~M}}4!(;WH^?Ghz>a6hGM;9jc#4eY2|DX_?06JA z9wulXBv7vrFjvT&?;+>Do1FV1rJh3rG492B3K;u9jsU}kufgxYe}V7Gh4Ot_gAq!( zvE+gl%AGh1<6-ithp^*8?6@C)7{40dN2YTx`OIYoj=QkqBH8A7I?$bD!NxsFPLt!E zg6#zF9z`8G%-#z__b}FA>;Z-jVetPR{0O`V9s}3N@9sf=xQkqu9k`U4jQyFwPT581 z1Lc%mL)pf8oXs$_QGOTY_fh^3<JXB z@vYdg1v@sAC2t~A-#`|CmF z&bpW}^oK39$!69a+=3mqV83lC%x(s@Ft)B4E$|egV_n+8h0+mUhsRv4SIOq z>B0QQ)d?O?IQ-xUrYAGM(kWv*c=agA27n@NRk-2cJN zs(g2w^4#-e_b5@0N42s&o0R3*rA)6OWq2)6n&(?sa#LSJ!Lp^vZfy@QnJ9id$BSmpR6DBCAR zSw0!c^vzX530ofX=W^vB7&36Ldz=0S_wL?E zM6xa>BF3TcSoS}}4x%%lvc{}YP&E4=qHb4U#Qh2k;~o?tA1WyL%kec)qm(}tz7D3K z4e|H@kisetWb;}Eu!1CD2JvxjijDJ8R2=&s;@STYAFJ^AM1`5s6p9uTVk%a!xkiEJ z$qFzJpfk*spJ|nR<9Et8juV+;uE;0q1^Gn$RKDTA)oT1a0)IbzlTFO8&BOsH1i8S* zYiS@EB!Jiia-IZdMI?GDG;xwbETIarL@Ur@RsdRze^QqGk_zRUR3)FJW_hzv-YaRA zJT1%RVc9D81nz@jx-9p&XXGC9t~{bY({lVg3;tgC3>~T_kvIVPARDCfS_-fLs;U?Z z>y4AhyHea0km@TxYp{H+k@B|2%gdUCzL6mhYo6S#WpcAN$kjSU&ej=nvM!O6aRbkk zLpbyjiTsl|^#^i_|785=$QhJBh4LFIo23-&x55V?6IkI)0tq~iO~wZ)%(+vU^QO76 zhQmi5wm`Yr!sTjf>vM4pj|D39uVg0cju^B!W1&BQwc8 zv&g-urnlWyt_5BM4ufcsF^Cz134z6#F&st=@<2Ig_#ZE)1ZaIL?eNfSHQit<3C zD}$N`4e7%G8$jn~z%#`54iguCd|~Vl>cHTRCSLF7VmzPE(KlL-@x80q_FZiIP@VX; z9(=0-<=U}Yhztz2-iV>))sz>b;NF`G6SVL+Xi0&+kF+D9v|4e{OzK0{p2xWIRw z=DR;p16)<01YZ{tLODFQRxy0Q9f=DA;WgIaIQPKU52gZ&Vj@d zh#iZAm>5Q&b(onLq@x>9xs~<&O##an^4>OdlQZZUkMW(iRaZrL;OjC>E#P@3{{IMkxeIgOvgW%c; z!43-DMj>PC`2q6?>pHG5fx0oSzf{*^1NF{@+uFgHjGx1(ug@_4AE9i6b1_&748GN1 z9oPtN0b9UUumju*_P8)nrfm*l$5CSRWD*HX9{L9rJVB+7QP7cHyw6y*|FQ8id1~-^ z5hikbJXxmE2ADc&lr=C0ufe$nYybw|7}yRBuH9fS*bfeZ!{8V=0Z!u+29I?nr7#?NLp&PC?vEv3!E2Mg$j_?PM5gyv(^ZztFT_JISy z;5rH@VtitLJCA3I`RB~vKOmQRUpeRkh2*B?WG1z&|7heF(#fa^on&pjWD0|5 z6hr6}vzaZ=XI;i}vX~8M2Rq2)52Ht%M=!XJ0`U{f`Ui5&g!WGtgV3>8s}lOqe&R&VHm%v55Y5skXT3^*PtD2Bf~pH z6J8*6zJo;{k^SBzgl}TuO|mcUzX|8h!7Jb~a0T4O9;I{m;&$?xljsj8Xp`e)0>{Zn zj*(9t!;Yi0%V9E|Lk!vn$iDV7aPB1=yp2I*H+I}g#=3(XZyTLwj8`^enXv|ABQ-V7 zz*qyfu?J*1_z=z;-~nKSlM!C@T*@=fpa{U`aLS9NyhLCHHoAo!6j6Q|tRzlXkl`&S>sv~UEWwV&*kSAgS-^lgN{r6q zl@TnPP06!pt(oMzGw}QnkvJV2j6EQa!e{Us-P`E)j9`>KF&r&Me^|)6g9Wq+JESSo z1d=G%3T%LTfs$j+A*-EDjBwH-9F1_a!_iCmLs&8oudF72-_9$?iQfD0)Z66m{|#=A z<1&1P?{@%WC|Uv-p2jZ&v+%{RyeTOFgkg6yFj00Au!2l_WeyzqL_rBWm2f!lLNh#_ z{2N3cm`$V@t5h}Ij2U8Q! z!gFJqRPW4GjlNn-o3L}7x`e~Iy+V=Cckp!{~cGr+4OShxaj zZAbAnPTRc?+~6A@0nV0(eDxWRn9X9Je3BK~k6Y<|sf==|j8~~Bv#Ds6V=lxY zmz9}t_WCm=4503y3Y76TKck{KFt_kz--ElS@?896cMejnQy5&Fzd)bmjtg+5D#OX9 zbSJygoJ(YNu2HH>i&9*AmE<~07MG<;blI!~r-N910S{j1ku?ykpWBGf7f|n^NyI?l zg#JT~>`EOO2?_q^`UsRaqV+wPBUEwYtjjuVLEM6<}3t-`itdz%dN zp4sc2z4tJkyYyo^b;0}xXfYk2#kB7>n`ze#@1W?0`48Rnpfy-Ct-9hJ6kRa?p;I!` zvcoo}WxEqht2Vb7Yxr3QWh=s#1iF4$;**&Qj0Z;H?_t0oaOn^9!7)9%HDbDTYs_@% z{s+^sduyhB56pk)f%y+T`Y>&J4q{sO90`qMBGU?5Ov_&LnHIg|ndZH;p*2`A&3bw; zO?&t=O|W2oldfBsrk##5O)*Ex0)Ey)*^6LvUN7Vsz@*;r0k$K6A;189hiOPmFYxJ( zZRcK%m=3*~F>U*_V*c#Yj%n2g@8Rs*i)qn!0MoqhFlZd(n5O-vF-`i3F^&7lGL8Ca zB4NWFpsy=V1b4;WaX6V=6m}R1%r(Xo_SI3Y4D8PBgL(ia0IdEq3>bv{{ea#;5Af-V zZO1;1n09@kKlE?OG#}WOX*RGE(_~-|rqRH@_`wihG%y*M14sjE0PX?`HY;C@2Sx%z@b^HVAHM4YbjQBV za5ILcL4QL2KdK4ScuaFR4Hua4xS&oza8E+TPJ**04F*sjlcoT30XZ}@Z5(5bW0xY> zBEe?^xP!wejQNKhY5as8i!=oo3t*TLGZ^~@0R3RQC;sMZP|zPnA@>-I{0noWAW$PQ z!G(S*d@&6du^I5iO!#6ZNY6xl;Jwz&%-PtFn3$n~leEBb-Y^u7->wHIa2Q|4DB`^4 z!*w%pZmb4`*OW8EfWg>55Vm_`AFDxi#A&sKdof*;X^L>e%?jl<#Sh^NkydEb@P#OR zAqHQFqjJUZV{wobn*l69z^mYU6P&CEjt{}_GV%LDa4KVDQ2tzi9S1ZHTPz2SwS`a@ zu-y-zdyGLHj7JPiLjDWaHboWV1`3v6_)Z@k0%u%^{PCMK{B|$Oz`EkFAp*LS0W1TJxp1fp@EM5DyFAIe(z9&xF#62~Wk z<4$n9i1&V>nt+MGSRBr3Ktr+Z2ix6n&h7E@R-&*kuqy?-GQc805kRwJ*p8PP>O})Q zw7^5B1GECviD7?qOyLVdRGi^_h_NczZ;tPmK>LWo@mp|lD#5$IP_@8B*c^ut8j0;d z*zO729mSCU<9wS+!;UPVP;OHVTTOri=m2_v0btaG-x)G%11$uxVS_rcM_@bPr0nOQ zvku}mIMB)s9fn|Zp0jZLA-n?&9AFbu{g`ps4u$PLu-zGcZVkJbqKxwdGyvA79$*NV z0A_#%UCMu+s@ZQ;9_299mO-TO`u7DM{3vdu^?z%8(=6?8ybis_TxuMiOgKo z_f#2N7jWJKVY?fut2NHI5qqxed9vroo*!!u;mk`01F#JQLVz%UwHFO=fdn8KNC!~e z%tpj&URyj=?+FbA@mVw*I*1a+g22db#O980JniMbgX1Z-kAR2Bt?nSFxq-au68g<2 zkwct7&-gfc&BxKBX4eEQMxXg8vW&wB)Praog$S6v$clC|dgv{iK>J|d&FX~AaxpTA zHP8>@L1!y8gbT>BzJMy5*AVgwf(r>*AK&(EY_9=lfMdwH_o5Haz7I7YJ>c!=yJJK& z%AJIgW}xgj&_3p&d`UblWoPg!Kr>K*_Mr)lLJxLKpiS5?NeGsBIFNl4Ni^n*9pi6k-d{^UNJamM7gwIW$ZFc+xI|qb)7f2aw zo+`J!0ox>it=e$-A_RF=F!TqENJjM!0!E|l0n^}R5tPjzhp`g5-Ev&Ge8FQW zcq~B%?u~})g`i?r1a$*57o43lYS#&)Pgw1PU4xU=Ax^;FRsdITp6#{Rt^$1F3m@cF zi;-h`p-#Mz%XlI-Ji)^gJUkH}p15(jck2x zEWnr@gJFu1JSK=dW7swT*m)qV@7XYB!+Qn5HaoTnIs$gkA8e89+90>HMowkT^aBP1 z{COal$&aQx1?A5``Lj{JILa5y17T-h=)i^<{_h0Mz!$7y;Gxa9++9Wn_yv(nh$7-O zA=dtKzztxVBzx&&FN22gg#mJ1eZ;38+KC=`=z)hGc<6zL9x^;#WPCcvI5~(BE%49; z4-M3d8sbzHpOjIGB1%=j*)2lt${})P(W+z-n+suI8i<80_7cOMyA@yr=m5A>l>kihk0 z0eH*@k9mj>F_bMYdm3|!>cw+v55>z;-y!BkwGrh)?-e@=iYzyXv1 z+^SGMzl$)B!I}%uALgP?=3<Wn*z#r@wh#4q#I{umlHd9g3 z6x7OO)F|F$i|AR3Bf^oZuSZm$1hD=CIQYO8>jQm2wF&Gug>7IiKF@B9zBcyHgfC_w zKBuFdOhbE}1|CzP$4rF|G6mPL$%v0hh>wZ5j^Qm7JSOA7cr0pT3~G&yno+3fk%);A z@ceLGeTLy`Gz{Z%hQSx??ZCVde9#R3Z;q}}3)lx{0n_mJWbB^=J`)=?L-{RGerudZ zd!RFHbcc;Tuz~kKK#v*8`!^1Dros;E9SK}v74QwKqFKUY9=O^C;AmEVD8TkI@Hej> zjA4hJCN0wn-LyYpzcub-z(jmzyY6GaXVf2!nUPKZfWFa+8Q!Q3Y;=H)F0j!9Ht_xj zls*`HST$rIyfOeD?2icPhe+*Ib3H0j9nZ_<_N*BJ93 z8utV64$8(Oncj^jK;xLf^lU25^k^!}bZ@H3bi;(?u1#E+E{#`#ImSRhneB|bUT54L zt^Pn;#JS4D-u!mx-?m5h9~cLW0)}DVU~n1$F8x|JX8K}WOrMr5psD=H^lILY>Djz9 z)1!HJru!d#nC{I7GToXFXS%i+2aN-&RZB6ZQ%hN3}4xeVg%2yS6i!wrw&0;V(Jn&%ZR8R)3i?E!(;< zE!(VOTK*Zsv~0P7Y4yiGrd87mj0x&M9p}joR~PMsI6&76ui<4z0z>fk007foFpdR% zy!MTmuI-yLo!dcsz9?>CLNYDP1;8?P1>$!nzY`_G;LYRo0F!3@+DzQqzl>sfT0A;Fl+|_ z{lKLc&>i1*#r&vF9h)*8I$-QWhZaohjxCv1o!UTG>AL3DHGKcTJf&t`M>0*j zPK4Gmi)q|Vf@uUThSx#sYLA3v86H+epu?Dn#9Y#BKoA|&8yEzP0wx2ZfDE9C?=5j0dI$YO!3P|A zqb_=vGODmI34618p&sz_k-%Vpr~hCd+{<(YkIopA)B$JZ1Q8Mgc#W2b=6zBz6uM zhSo5wtc0mwOfT&34j$crPWX)Fl9)gHA@>;2h-o?q7q=m}z+%lBW(0gO3iZK?sjQ$l z76ptQiUP+1Gw^u(dgZAY#@HK4!;G5Ay*h#94?A-ry!I@14DrU_`46#3*SNV zW4d750sC7Ig#E$D|4=+G*eDA(w=wX=IQU`$d@%{WUpCyWeOmU28Xzu ztPf{9<5++ECJny@htXFVNw{qm4xfN3WHq3{_}m}+`oLyy@aTqZN9=Dq1o{WUz1c|E z9}SEHCNzfKrm)c*0S#ZwKwPjw_iXrL4t|VxN;6oih4-Q%5m|hvjmBpWr~BdfMEq_y zet(s>CebvM&uT!!fI&FV{;Ct#my7H0}%4KnWIeb5J8>H z0gt)3;EBT*tmrKTUr3<^&ugN{Tirv#HE-ooX-WfeEjw@-ffN|VPiD5Lvfyi zU~>St^v1R;_P575|2ZD^S-Ws6&+aU2=Kym7abP}pNPvfQ3*3BBCvs>b3J7-Pez+N; z5Jmh@VIFQu%Fr!LaAWXBV~oJ@oAA3cc;6TDU0@{6b13{g7+m_{XWh}fJK~&MPiDse z!H&SL7;6*2X`$1|?{qD~R-qB>HAP1VJT%&%LxMWdMZoLB7ka2T-g|pwv0o3`g)?pt zK{z%OJP+Z$U#JHhG7>Ih#{mt3ZFU?`XZX7HOxS17m$kb9kOoi;Is`1HxuBxD1DF z_R@ySJcGv%Of%Lld#-YTBA^PO2qX(QH^2Ze0!#sO09DQF7}}$5oWR2svEhk2@j-Ac z#fkceB8Nvoo~RGE#dw$*iQ{s3`pX1V*BD?h&bt?EV`(&|1$(|q0DGS7IqCt10DjK1 zhg0DtBM#U)0WN?$-~}uJd{H;6z{9^i=Bl7hB8CAt!B99e7!F&D`dAZy#3BXne@9Fr zY)6QqJ>a~%!1kZ;aU(61qYnt%w8!79Jy*a3Kxi>OfG@BT@B>gRFJTD77N^N@!6ON= zu?}^Tjf!3`it!mRkP4?IFN0nYh4;dvsUyaQ$e=AEY}>E|QHZe7~u(YzVwX=70a&~od_we#w z?6Y*4?~0YH{MPsf1O^3%goQ^$M#sd)#U~^tr=+H>%g9{6AvWe&gotJ9qCtc=+ha(`V0LzWV#k+js9j ze)|078#VO5s%MY-|9$@df7O4Z#%zr@XG6Xq!?pkgwh5@B4QR5BK$mR=Mra0RY&)=K8-fE`f(zRe zJkb`Gu#I6k+ZtA*Ijm*dLnzxIqR=9^Y?DZ0+eA7VMHbsCHlkTKws~Aa`zT`@$St&xdu$VV%(jsiXe6)MR`P*uCSSjiwTTn%1bFt- zv_FEsT5G83?+Sl?@2(oXRj0Fls;?U4tUA0^tGk5##WZTvxN(yvO%XGHG;iLbMN7oZ zpRHTBLFoL|u3dXXPsdJ(pDtYyLEU>Gh7d&X3&IF-gg{~=i5E)zSgICGyjVgg$;h&o zQd~(jIXZdVuy;^SBilE@dM75CM z#{@4Z*r-5QATAIXhzx`VV&mR@M90HNh>s^v5h2fCym*Nq`TO1i2 z4?hG5`Qh66|A51%RE59>bBM#WmBmlacRr>n1XXZ=55D(<3BMi$_3;aMd`MLY73NU- zU0u||;V1gzU8+J9IPmjLo_#hZjRn`J>*wOW;~?-0zg`4>`4`*QsS5Et4sqZSXT;jS zRvHfu@g}VOx;XqaE-z9Q)2ncZ2aoSK)anZXkAJm&lA^fre4HWcgR=8H`+N@Ps`%m8 za(IxUn9t)7Z+PuIPzoOB;0GQDAs%ND7iZBneg!B^*7QNVMn1F;R{sD@SPi6H2Qh^f zQbh;(CD-fU9QfDV-+uV*2jP3~?|Aqf55ME#`~E1~F7C%`?D((mS>RC6Kn_2>PtC>! z{9tx3USnr{9B}L}xo-<`*!4faVRt15-gBJq`Y^xR4|n4<_WZ{j%)w!A%^X-Q=GW)` zh)MH1ahm)7Qx55}7PsRx3+v_Z3v++sb2v~d2iUIAAAVwv$+k%Wz`^oXoaP~Z&ROw% zliw}`x>#k-&E``TzZI16Z?`Om8*!RPet-j?$4}%!=`vQ=!5QjWVh(jIuf2B|UMea}QWNa^S^)FWCP_9An^G(>P(GRSBVN~_Y z4{!np2iULFiupPS^oR1imuJ609EAD&&}Yh6=v=`;po7F433DjdL4F+%aPU6QHN48x zA6OlxTmz}d|0}eZnmPOoE#Py8%t~;81_D&kA1dnNInt`2JtpSHkfbUpsX~( z5c-Ee12MjS7BR^KpMwyOpUZ{Pq~*RODz82t%fCj}pn+7}JAc(Z^fw1VuJ^kx*n7+G zc=#O;!hMYYp?Fx+(CZ}l|MX{klrAmzIZ^pXIsDXqEjTO!2mc@R!>`~FPzML$XWqXg z|1bB$f6n1kqVn2uKU8~uQJM3y_wDj{{HgW8yH=-5%Y#GU{{)8`pW}Szho9~Pfge65 zDdSmhP0y14CprA8cA)@21lQq*ivHv;i^+A;3Llb`Ln=84Yhr9&l-GwKulpCdEnD|B zf!0Z}9Pr#-5D(Qj@N(Z@tAl`p;`=1!uo^k=?86VpZG^NKHkbUV;~-cL?~;@wggKPw z7!}X{Yv54H<0oh#lp&?`Hc2JADhD?A=bgWRM_qaJKhF6D{saf*H%Tg7Vf}$W7Nq(Z zh?;ZWY8pgcTx<5ahhS>*9AjRQ9R;FVTKDy#BIgub7uanL*M=XuE(op{@B@#- zlQ_+sI<8UQaS*cqkL(KUvs+5%aj<+Gr@5)29Db&KAr4mHu=#rqyng+6*XeQ&|N8t{ z`{f*FAs(zA#c6J72#23-UxIBex{s4{l(dq4b24iDor^ZsKFHsG+WfgFBX9I*8u zR2Mc6;xxB|LrpqNm6#O#?q_Z(U4o5CaL~%HnFB8-Dy|Pd)3%^2a1a3p+xuLt9rba* zZ+?ybu#V5+9#?B;eH_ZK&p$sN_#EuOVRtnS<>PXM>VnlE%H!x~Zi&yq{w`N*uYg0< zXM5mKp8Hqi|38@rILt;oINafC?dRv-CfBNP;OE~w{h@-xPuCxK92{?PwJ;W>+B43| z{2v^?%XxpA4ni3V=1_`)(+w`?Xe9@p{vfPD)WkuM+g8;d{y{yJb8so+>YS>|f#p$M zgFs9Qw6U7ERrv_&te!1pE)b;@dG||P1B}Bc*Tn>LLH>{CKv35Fx!~Ycp@CG6f#A)< z_+EebE-tG0=AVp3p!M^_DOKKw&w;Ojl+VL}{_*`B2v!IA*<%oB!@POlQsprQ0^?C? z83R%K_=BILhfwzX1)tL7*VZ%!qMrQ!r|A&1K|=Q3I@z!r>5Ga=g)|Vp{c0LSRW0PF z=@7I*Qtf`Wm_H**dY=jt3R-lTh| zc#|HB!^Io*%VAJdyiou{eExAzs>Dc2UL;TPYD;g&XczJ{5LO3?#eE`{r+o-CkScu{ z;asQkvl;psX1K5Y7=Cl$`|GzKe)~bV?UcuZ0@~)XTQNF^>S&u4{rk$cSUHESyuVwn zN&KU>$#PhJBS!aV6%PF8X@cuRjU3AR`{lN4)J5w3j?#G7D8+B_YAdeC=oSA62O;~_ z`}_6U75eRsat=!1u<}}r{>g@N5Gtd(eZk)i=0Rx-M&Vkm^ff_kRauOFNdX@DtSWx^g&bC2jx{{r zKo0-P_w2a~L&y(nF2x#MtdGOb_5sU<&p}S*bAp=xg;=92EC(KsikPfg7r%r-%IARRi&$%~ zfnL)yr<^-;^lCFyh>0@=4&A3`9FUg#1GHI_(%FfmiTzgRnvQ(pq|Mee^7lKM&%p`#6!iH zke{hR(0Z{6m=|R9I6*y^&!Ogd7{5}3pls2Jn3H4nG+8;jT!VlHR5=c#%J_qtbN_#> zLr~7#Nt7vV{v<;#`D(_(Vxf5;-;aS{In>Of1}&oDYi7_!@hNXMNSocr6ZJoPd}REI zqM7N(4$aFvvVURrkv$5zM|P>?9Vt-Xeq@K{jwAV;f+O3tcOKcUv+Kw<7P`BOw(0Sa zr?;mlPoIawc?Nrb1hjF!$enxx^Q=t`e1~U8mGzcAeCSEjXdc%|D?TxBa+Q{5Bx(I45CiF(+Y5v3BBSAorL~(vN_W zCHhd3+$@S$(f`ENTzflOuMqkNtAPN}K&k`JFuu>T_}T}nfBX~t=D_#cZ$H%Nhrs_d zCKFT)KXNsLZ$|4E)z~ihW2CC&TUFaf)ixolan$#jk{6(D%8F2eve5^wX6W^3gW^gK z=r=T2qvUg_&t6U6Q%VH~S)z@aZC%d#9yz#-~#j8SPt&dKV<_2@xD_67aya_ytx zJcKxyzU690Uy7-c_twdw+`hoJ@b-u5q@2SX_`wVuVlKoQ^YUI^eN?Wc&DKMex~RiG z%jJiBAWI#{|z04e-H_`9X-o-(1ZE0f&lQuWC&E z2!{sq5b}fN-(0Pv26Cv$_ciTnPew808IK1R)rPj$o zSc7OtzTLpwm*c+Aye$1n2rhe{2if&*UzsWtbnyia|`AC4bQP_(rGA5 zbReZm$xzxN&j-m$SPzq32cnL77*%=vl=%;|Va_1Rkd*(FCF628OJ?1r9I>L(6wXC< zE(*p#2x$=boM2pbCeiq6Ij)>ZGG=l4G>c^8OG3DKI>q>6NdrL}XAP$81q&W+l(4&8 zJS?E(#JITPBePSF9$Ju5bU{@!EZB zw$IKR^L=-gNv|rnEVpLIMa6&}=amEVOI3rnol^_RJEtDH<*Y{7=Cc~%xo0#ZHl5Lm z*mPPea^q=EWOj*m)P@or7SRm>ZJ9IZMoB-B6A$|>JoI9u(%v_d)OWt0sgwU!)F|)O zJoC-ZC9SicNZDmRmT^pfDCeB|K;AX^zJgoQJw=biyUHF3cU3&&@2Gmk-BI)A-d6LD zy{*1D=9b2i=v!J#qHbyVGz1hmvm+(Qj-)sxNs3iB{~XQne;T2?;ZlsrrSJ5Q>Uv15 z8S*gXSN{^fIq-e>KjMc@)#Kqwgf8YFm|Sf@JpAieU}K4*W(qh+5m(*vQ#2>=VWe&z zKkqfIS)1d>ChQ~Ut}PyaK%=ARSuBT96t5%=4ptwdIl=d$^mbO}-Zkih0uDT#u&VY^ zT^nqO7C1;O}h zfBvUDs&R1oJBE`09pq|-AFAfO-*Kpxga45BA*d6IpVOJ=hXkcX@PqTK7*0jr%d3kz zI8?JQ$alcw$LfUQMLSax>LNjr<>2x%mQ#^)mUAe7=3P-Ayt=5xKFi~WeL)F44r9SV zp5^c&mc!3ESxxAl>mWbm3rd*F`a!_K^?58OgO~T#&q3h(nzldW3n2~)6sPX?ES8h? zQ#r&THtGQ-vVK^|`auyKJf6mKvU$08jeaQCLD<~CKK+5G57cj;*bM;%o7;k&%MrktPMS7VM{(|$^s*M-L+QBehBD1CTy zqy%#ys%Q}Wxfpe55W-w|_G`*5YuZOC;@}`Xff5zfC_!x*Z+;Sw1Dk*I=7I3$K=9V1 zgfHsgP;uTh*{N$ErGi6>^h9vb_?Do$oWEW!I|rf8F$l0%wNC1?QQvozD#mh{ggO6O z-;$J<--*}2n!T+3n#Ul(HqRIQ@ee;~?n9bbS4x$djB%V=m>ab0ZlbC{gQzkFft`<1 zF%AJdDmnan9<*+5SIjG(O6d#LDMQBVVY>X*^YQxsP=okqS_oy#>4tfO(=iW2?&W$J z&l@=s$rrZI+I4orqGM;)X`VilrgQF0ntti&RD<)U(+o;arx})(@No{?b0z6U=T4;? zovpw+d|qdK<|K;@6JdKPnJxGI#xV8v8YHQ?eHPl zj6(+%vJMt1uRmC*mVIEq2B4X9V4v2;1N%6D_Qt}!I-B+Zd-v*Y+5_y`tCzbI*s)i? zL7<$O-D$JfhzHweOP{;dOaIuzVKxUJjCb98XS&z!o1#8DuS@tAlr3Djz5ThH9R?hI+ILLhn-NoXeVjOR``78BIW%+LIuemera7{4G*=;-#FfLp%~uWi zDxn_qMM`t+=Y?7UpJX}yALX=Hf0Wnp`=Fq^>b;WQ%J<6pE8eN-FMp?I;QLm?aM@eU zdO@qkw5Fgry(wzZB#Kd!r6?^EiqKy2CR~^MFv4)#wI~xj_pK@4A+ z&4KSDo*#ne^r=!8k=j15!*$~xL>T5@jWRj?%j?1q@em@~hhpR>BOc@s4`$yYwU@t) z&`r4$X}JG(T2`NgHUHHfHOg4lSm_ z54G4AaH)rbKRASou^gsStePS?*nEuE4tf-+yX7KJqh#~%n%X9BUp=-Pmh(}-xYiU= z!9j^)H0?e_b3z|P>E_p{gH+{Hm4lE5^&>eatp$fj@xBzNI1PSK1_uY|AQ5+?^!C)r zp^66ZFLf9m2RvVlQ=ASCDio{f1RW$AW0Ve5Rn*7`j&#gAsWxT;}mAV58$BT`gbfR;aZHri7Fg; zIz)vZs%SC)B;Ob2LBZqO2ysv&u7>-oSWXffqqP>L3D5ipQfC4&~=BobUZ39uxu&u>uaN8sOmhB33J1FgB+i9YokZ`}yB-h-dwv zs)?M_8_(XMgCOUvG5_bq1eo$jITk|$)N#ug>?}MV>w6;;B!#Xf*+QYKmQlX|Alz)^6&DTy1F0ud4GA{ z*)aWs=LeQU9FKzt#`SX8-1|wK7Q2R$;Q2pKhp4VWROIl&JZj)@7CxyD6h5g9#Y?e% zm`(A@+Q_|^VjZWN#voJ~i&4R$Qu7ex{z7?w{q|XY6afwi(kusdoEA8!F5|7CB*_1T zH3)VbLPh>xyT(ypo?ZQWik#er5~T-Hf`TX|D(O(7itnR%jV;1s5cums!oFY~1YX`> zHTP%zQoZbk{!P(S$~nl-r6ffiN>X0-C_!yAI5a?m_~&+o`BBW&wv;3_h!PjEYf)%Z zs)EmxWR*=k{h9wX9)=cW$a*|YQ^;Y*Ae>LouTO(0*CGBL z4@#WbmXalhU<`x^t(R1KmnG|QFH1J<%6iGer3w0Fvt9EY$zzy-H>0Xw_#hM{)W6lgX}GZhS|A=M)iU+ME;^JGdq3UGIz|4 zBcr5C?)BF>@??}{;iE}Td+yEj*m+BAalsAAWjn6PuE@VCziRsxrPW(6E3e7Dr0T!% zl3GCaMfJ7oFK7g2UeFB8IIk6yex4JQR;nGGQmPxAT&fq6c+Q|+(3XkKDQ{M%JBLS( zI(5JI?87gHE!_8Jg3_+{(>3!y%+bwzJI^rp^#YUi&>zyFKO{qcNPH%11O364`&8a8 z=82+x)MF)w$j8c#;g3|E!XBwPg*?=74t}WV68M1Q67WFBHQ<46ouF;gnmpb=rqikW z-3A_hIcUt@ccZ5ie4Z?_m1c--pjiu2NklrH=E%g*+(qFet`I`=71z=NWq*=XS@TU= z)$iLvwN+nb)mMI9q`v%%yoT==Ma^ZOl{I}nt7t9xq{i|7q@nHoNwZG;y?b2Kl6&p` zI`pb{w>=+*_Ss6K24>ReVTm+uR5VQ(8%mQWtfgsFR@02>D@bJKGMXc@gyzm!OyZ(m zG=HuqNr<_Vl(-v7&vPZ2`7R{4z?tMFoJdj9ft01}Nlm&wkki0F$ZJwZ@)a9ItL3Ir zfT|RQb95<8$MIXR?uvK8h7m7AOj4ePnq@x>HP5>rX0h{bnC0HvVOE8=!mSS847WZE z6x|57IdUW1wj6dxZiYJ)-3)U)d@Ic9(Cski19jnlhJ!c!upE9^13#>-@PpIWVBJ;k zLJT9}hqNc5W*Z-bnQy-rX0iKDxaIy^;g;}$)!`fAR!43`SRcI}VSVg+giZ1F2;1W8 z;daMvgxdoSM{kBX72OJVK2#UT8FjH399DpXzuYtmQCmn690Lm1ar+#q>;F2`Fz#un z@rDOsrrYm?oA0|BVSe~}ghg>#q{Ydrkrt<~Mp>M>5@mUg53ABEQ9z{ixhs)2XRk)u zo+*p4FDZ*~I9V6S6&#j;!%FeN6tHMIg{#X_B*z%KnCHhZ-QbsD1}TrijJMngH{E+9 z((Gtilv&B;DD%=w(dL&f#F$+lwC(N~>H~e|HLFWB%1Td;L~WG?Ca5n$Lirw)qaH*QNL{g zH(ETnIjx%7nSvySQ;fl5kEhuuV4ae$Ukkdl;jgS7m;VpdP-{W9rgj>h}lv3~5fQr*@$bNpMgWEzf&h z-f^`e9>i#4T}+|5F;&OLRA@0(&%Gu-b@QO5;IL*|7YdaeL0sjz?hp3z%#LN9F#^wlV z5QzWpd7Q<*`oQDh4?l!Sv13WgIjFh6iPMa|6U*7h*C49v5EVLzum)1ep!`L*C0l?(Oj=UW0p^%YjB&IS!$UFWBe$)wLMjvwrxZ0r|ebJ}>WH zKD0Tl1&0XfkrXfBpz84^K_l)Kmm^q{vO*8}ZVZAzf2hzN8kX-@;jjW60%vrk$c3Yj zdt)w$5(l}r$Lj?3xSMfWhb!~%@-YDA>p@m~=3b#a@N?N8<3TIIAqX6zWxzpR47oQ4 zxwpsbL^W{GEaK(g{CNQEdX(kk5Cmfos%j9zxwPLVn;7I;$JS%V7;Cxw(|ONR!g! zUEZZBMcztOJ6Jge!4N(WybqG+habqf>*hgghBv1$k!~0>Ht$`8WGxE6ktMV3 zQs%-_=Y-ygS!F#)K8G6paR&MRnFQ0ak|eV$r;^Ps)ddO~*_@(9yHhIe-5cgjr5q`# z7uhnlw>C)zT{<`}QI`hWq(78n9XB`i*4 z8UrS$j%S&iEM9MV;^+pm;=>!viw|U59IFcyI=TfVigzP+KFTKX(eF1&$=umG&+1ZH z?-eDd$3-4LIy>dq0g0?5`{Xtr*{!stXqQ@E(N4|nhYL9Qhj-}gIJ`r5$Dw@vf&;+5 ze1n~P@(p+H$~P(~$Tum--(k9Adx3d=UV-KIx`UzZn&!Imw82f z^}5R%Ytk-j`6plE1SDS435dU-x0ZXsU~S9=qrj*OrhyR`%=_-d5oo_CWJ3O>wK-}X^dd&|4|205=Kj5A(Inx(vywn%s`V-@#I&L-xm zf^F1OMZ1V6%JyN8RUJYesXGQe(sWw;kmKzCP}gPk1AUiO4~$$_+&8HclsAs~mOr)G z`Ri>v9edJiK;i45V|ITWHzoheRFPboDV9kh5*P!q5MvdNYcKw&spIv5qvP>GSJ(Z$fnHspZIhS_ms&PE z_N4P)```BOy7TjhzME*&;B*>0BAzCUiK0mpLTKvb0Gd8+70sIAOS5PBkf_LF5}V^i z^F%#p!CW_z6munMF=vtyccMk}97tilJt-})B~=LBv@OJ-sX0&l^PU zvXjVHWj?LZQlUUSLkcpor9e}63ba`IHPC9+=Rh0(Pl2|99|G+|J_Ol^y$^B-e;4E! z0YttFc8Yoz>lsd=l!O@hH@N-J?*C^x8m{;9xcCPjUnYH{`v3i)K(5w3rBJbKyF6 z6r#K2bFg8+yI_;Zzk|&aUItsHJrB0dd>U+%{Uq3KBybdvrdl6!m{xrlQ`*DcX=7%BHc@IKt^6!V(72FH4-+3?8Vb{G-$KCfrop#*| zbKZF`%w@;jFxUJ$VQ$-Rhk4`yTWbSZgM;m;*5m>XJ_|-b2bl#9?6?>s=s~Vu!*%@L zh3QAW3^jsAX`1~oteQp{W^?dXxb2~vVRnaahB-iwbUbtmdgRS;m%^LjuKR9;yYIOk z?y>87xaZE=KsMlD4-T#~deCxkU^R$nb&N^TG@&St$LC0$fH&cKanHgHvmS<<gEstM|v^-fBX?3bB(z>K9(iR$=Jv2DS6J-(3#bpt$N3KS=AHEvl zdEjb<_x{>Iw*8xt130*Y!z!$g8l^lNItV)^#f)M&-XEj1gI`7HCO?ia$h{k3yqnd= zu0@%hyb@)87TVl}i>y8uZE+cZ24{8oQk2cbOHp>`FGV?=y%gnCaw*E?#HA?rqnDyQ zi)sUpgX8GdL9tp(KE!B;y@=9T_aM?>`>jahLuFAWC6}U2 z%jXkT*Mj(3*tuAX>!mT)&<*V_oR4uldp^eH$>{tF`o9EGP0=w9fjwQTn^DMH`>E5MxrV4GJ_N0S|$FVGYi*>|Ctv#dEQaXV1kt zpRNsL4-U@Y;5D-s1xt-0t`e(3sNlJ`6>&9}z2$PEpTy{FydAB7;Bt&{DfFmnW0flP zC?O5%IoLLTus@C5U)R(`G*NlI_@#|s>eNu z*UZ1n)i14Bj{tKs%5@0V59N7(^_)_OLxT0?l0@4JwSk<$!4n+TfCHQNCM}wS+*=vB zwhEPIoz#(j{evHfzK9ysvV0uX2b!Wu+Py2Bax;!M27wUR`ei>H$8&esMW2QP34 znB9vvk26_r4svfLN>ebWG$o(6sVdQrlGV0ciRYZF%)!g^Z(hDxJ)f=g$C-HZ>m^B6 zS5Btbm7YkkKU*8f1%6mOwk-vL1DkuN$cSJaMg>~8$b>TFJ>O?2hCfPE-ddKdTEfr4 z*|i9b`T2K^99T|fIDfNiB}tZ-Po&zMKc4Pzrg)uWNo^oka99EkVRL#>5_0b}>6w(V zP>$Bi>VIA@@A^DTA>wYP!se?Pa!1Z5b1qdrgBMYVfNWS@0owL;n8E+#z%{COpYAdXjZguqs8H!o2(Ay z=h_{}+u~T5yVZT~raaF*wSm0)Hla1++hT4|SIU~(ANl{tubZUiKHe;)b8X)&*K?PL ztvOjTE&A9|@suM67iJahQ`~rXx9XO|yEO9-6=-iiv_o$PoA>X{H!RqhZ@hCyzG=bs z9TxeUcUb4=?6AwvDsbAqZm0XUv|XNgwSks^L+FIoy!=0xm;Voav30)qje}FQ&)yzv zUwm`4PtoP+{s+&AhZUZZj^2M+l5e z({~1{AA3B?^x)%3j(hLS@Y;1leA&*c(yMk{TI8R9K?!RxVEu)&nqj$Eb0O!nc4StG zZq&L{2GOafjAD{bna0GQvW$rubQn|d(~={|5dwHtIJ$gt}b({6%^605p5aE z(6;GKpA?SmTyn3+(4yyqr|y3-51?*WtaT zp8b34T0t8-Fz?o4v?97(=hcTkhCCQV68(w4FyeQ7f?UT8|Dwc_&r#>{bO z5c}^nZ@i7#H_xK3Z4#+hhbZdTHHZfF^rK;YmeHsIi=jWb)5IapG-a3rO&eiHGe_Fc z>`_)EI@*H7$C%TCv8K=+jA`L`BU&`UfE1xQs7%r!jmaF+p28uWT5e*H@V&`YF-EehMVle-SATkR#<EHQv=<;IbVst9>-B*{x(krtb1&=LzhT4HTPi|x$F+rg5&ovg{r)rLIXZOPNq zjy$~W$-~Ei+?P3!+e%=yJ-G$gk=t5ZatpHg>KzItl3 z!dQ=1n48iHD=S)VYe&BJj=s#+_E% zd6S=m5BWJQBR^-~Z>wCFe_Q3g;@e8EmETq@UioeL(iNXq_%8pjV#Ts|%l($UUb=Sa zEAOycLHgjp0SC^+wq!2SgWRCs`$7*|qq2YkwUjAPM~?yxEuo7!LKlNJX1#>g+WLMA zuwVH#z|rqZfYX}K{x1HX{oMjSukir913s=^;{R@q&zje(eOJF)>F4*tcdg&ECE=@T z0~tUQ(*_3}aIhBXN!|;FLyMVCL8_7z#!-VN1#M2>7P^=l1)4AU7HGBNOQ6l_Pl5Jp zKL$DkeF$=dM(6~M&ILN5TiE+RkFYmEo}sUTyn~+yEDL_-=NJ5BMPSgwWf5y@0~vyY z&af7wH}NlWnAMY(LW5X~F)86{(iEYkLE+k_6s+qAjnMmBklBjQL6-g>18sue2iidw zw2yon@9Am~@C|G~#mtd1MAA&4G_}ZK;_wOM4gjc~1 z(B_;{pv9%W3~@_+7UG`#IK(sQL5O$Ky};#(cl`b1Z>4?s%`opx*F$}C%7OzoTwWWKc_AP%z0^M?r8bZWI2eF~H8?B-hftmd zF^jk=>>4FH&>(CmmgD_7TF3uggkIFEaHHgBVP;v6LoIV3hT3eqA8Nk?dK|{_x$L|Z z=3a0;+-pZ!_|k1x!q#lP7!tmzG&nl@Y%n*gBq%YxHjpVe7=nWxIIIxq&3o=0hq{PY zmW2kP0}a9!dd!kfvD$0jMC-)9h%`ul5@E9GLAd$$yJ1$l?}XXyza8epYH<6@!aet2 ziSXHbDPrZW^AY~}XTw9co(_-5Jrx$6eLOTSvo`QJ7=eT1_;%z64p9s7>>V+gpd^EF z?>dyAZcFi+OFqSM0$+1=;+{wAWj=~B+IlzAboZ?YOXzbpN3MrE9lILo3f;@A=zO%# zfpgKz_nnDev-4C`(DoBiVOxqLBR3w6h+SVB$P9ik1_u{#VDsKs zB&%9evby)DB+a$2leFTVC1|g|AE&e9R;=Nnt1%|0FGX8iEQRiMCeHEf>3G+ZrxLu5 zo=jMJusFeQ?~(X`g2Qpa+YWNWHy6f4XV(TE2NQ7c1czWY_m&xf=ibvX?p=yf74hs_ z*^<&!J-?)?uX&rQ5%WAnE8}4bXWOl0or6~s49}e5T3kDwV14COqC@G)WS3LNlRb|e zO<8v6aPq3X2NMH!7A6F5+ZP|cd2d{FPHi9y_`wt$yul%Kc27!NI2^h6G)j|`q;z?8 zTBm468OpBTGF4W5$WjY?v0fwb!Fr8N*Vn7>FU{0GbvoJP67t)N#p(8EkEOeuJiN~P z=z(#xucz6nL3eXhlFPOzY(BSDabHQc_Az!I`>}QAXAWoDloW1oDc+s! zQ?x56&Fcg6H)n=!&0QCx!v*Jph1+iU_9mE zy}cV{CEst7SAVie!RA(;+_KBXvqR4u6H7W#D7)dpGiobb(A>!WhAGP&z(16e}@u?Gi#7Cx)$D`2Ia>}C=x^VZZ(Oh~ z-@IT`zD+^a4woHi1xvOk?h4qNP!OJ*loyqglpDJtIVV1&HjphixL^z=J8v%)You(z zeDhrK!L&tc{L3v1C2tqZ*ST?vNS#M0=>$oX#uji)NeJgV!3d1&p?TyQdC`ecrm6w!O8^{hEyzq=Oa>AdK zF}pqGigcx|^LoGADlzHquDR0ZZw%8pd4H@;(cLMYh1Wz^?7b+tcK2ENu$`w=qYLo9 zh3&_5lQtbONX;rTNl!1bOiwvtpUOS#mL7h1X?jpmK-!ukF)1sHQj=HjOHZr~v*zTg*yX1}VwRkWk8v;A z5LFw<2^vTM^8d66&1lQCKWN+Rzn<=zIk5C*->D}a4U;_lVw~!}*VFWOzMO4={NH}d z18LXIcjUctZYuj^URU!?Ez?|?a7D*2_Oij6h)X5`A(yNI)?RW5@W1S_X2qpt0ZT6W zuXVo=vDWEAa;+e*UX3ZDZzI|`z7g{O#;*&ew=ccXecXUoNy*hq({QPZaFqu--!S19jK%ds^9yWH*hjBCV zzf7CA_1o-)8!_)8}rydUr92G%Sc{%2`Q;~e^XQQ_@brZ z`dL@g>9diR{U>vd%|}~p%a6|5W*QSo92mlki;B}dzcIDK@4La=Gl?T zd~4EJU_siFW@I2`OlHysWW7+A92RoOqgGIEOXlpMrp)2f&6pjxnl#=>?V6`y90V5{ zL>Tq&8bCvOuA~uteQ4}JPns~;g{MDEhyE~Yq&2h$%L?s5db}~oLU&Mr?w|_YL2I%W z=}plf_E@s|_z z?O+T2!4mp|8I9^@L}R-f(1f14G^LjgP4CU2S$#A~w68je_fsW_{>rp)fC9-6Ttq5^ zWJz<-Led>9flf+oI0hdSoNCNuU0|4)>kQ+6muckvglXjXm1%56O`4cet7Zn&{tq4M z(n5=Rwp6FStyE~>pGq{OwLA@Pvxr8wm8EfiEu@KmNz>GJk~E|J0-Dod9*K7lBgu}U zB-?2=DR!PkYPEtChc+Xb(amYWq!u)P=AWdD>$Q$_KQdMvLFO70$x?SFSsIIxrG*4p z+Dem!qb!-b%9FXL0+}sQB2!;wGFhce#{Mc~6r@6i5h`R53nVC$K`M}`NCw#oWUyJD z^tRUwr6J8ob`+n3)Qr}oHn$7uOZ6u+g;8XsKAEg_M99Wy9@&^nk&TTkSvx3@m5UNt zdZ>`aVpTFqK(ZPcuTv%C^~&E2H!FWJ*j^(@z(E3j zkioTDQ?x4?VQ#IZ{AjXKn@V;%bI9Is0oj|$ke#(W+1V?Tt+OiGxT}-3weZq=WT? z%p?Yq&7!g7pgNtLw8hBTK#H8r7Lk*cGCA6*lcSR+Ik;()y_XKz`RJ0ZuO8X>>63MU zK3Roe8xBP4k#(HTSLdrS#@+%f>MU&k|H9CAp%^IA4bt7+-QC^Y-HnteiUHWzh2352s%z`o-39mW8TWm! zcNh6z*MGe}erE>O^UOJCVD9_e=Zp?Aeaw&*WQVM97i7kGA~V4k87Tos&j?0ZP6$%- z!;n%Oj^v7PB-Rp*;YeyB+QNpDyF$Jt^oD$nSr_s?a$Cr|h}~lXck<<&s0JHlet4-* zLZtp|B$FSMWw!`9E_%rGwnToQBl5#MkQeQTy!asGB!?k8Jp!58QOL-PL3&Xv(#m6z zS`&xVdb-^l`!j8E^w+emsE>)Oqu<7EihdQfE9yn$(3rr3YH+3+9F+JGpf(xt|2=e8kk)IZayv#)8=KhZ-SeAm!>J((vrToZh zO#YJHn)o5DC+T(4+N2k8+Y+C~3?)8}KA12T@T3}CC`;<1z>iSX$w(m|B%g9IWp-+) zqTG44mmRA7JWv%9fXc{ll#@4CmPp=QY6^LB>Ey>{BEKLDd8OINt;+t9SDW=YzaisY zc1Px`wAGnUlQw7movyJctsip@wQ`AaAb8 z%N~t>UZ@WWMqNZS`C>_^N=QRRN*2n~$(PH{M{#~33QG&W7gZE~Dy}JbTi9IiB70fE zgz*(P#F0^@#IORp)NiLHOU32CQq(1t8BPDzv6pYNyW$V^71$3HD%8V+shwi_m$tx z*jn~i>YkEosRxTMryef6n0lmOEZ{>mxKp;+pFGNBs-cK-5Ow6uEq0*uV%$w=pAtKt z<4ZXu@}+_j(GWr2SZu*aT~g`K`qauFwV5^Fs`KjJR~Oa2sx7U3THaXqpm0Uq?VQcE ze`f8fzMQqE@_g35@-tccOHXGVEEx;mYap^Tdo#t zX}VCbt>H|;j@pw2yQ`1o@2xnRKU6*z@FP#mlWGW)leo4T*JM8&g_7HDtBFY|QI?)KuJYr+RVMpQUR%FO{wD zI9tAP@yW8m=A)%s8xEK5s5@A)yLx}|p31TKRf9KqVv$rso)YPQ-P!0Qy|>Ik1wHKj z_vFX)cz9!(R|J;&CS!?z-f&x3#kb~!hR@BZ?XMfNmp*JP>bX_l+;gRB)$()IYnGj? z>F+*Pv##@S^~S{qss@|)Rc>q8Q?avlcloaBv4B7IgD-g?F{C*QX|1(UR|ws-=X|-H zGM#&;k6vdRTL1S%uSY0)y%Vv*FK1+VVEMNtv5gFhN`x@4_@2=m}y0dmm(~g?$_1mj=){X@N*dD0_58_GhvDbTBD0|;y zErwn?n{%a|F6nyfoxxn^k%9U z?UAAyI`!xbMDqn$Ltg2CjT+Y38)AdA)$pL3(}x|Np3nFA``$Yo7=P(@VA=k2QC+JK zX0C7F*0gQqz~aq)t2@^X^e*cg=;`epSh{A}z>}%a#KNbjL zdty{W22ra-XJY8jz-m)r^wVo&9i7j)(Ozq0i?iW}9d71N_qn^>Iqw>B>8UGi81^sP zbup}M;7DwL`%vEIroHVOyEb*LUAA`Fn&oSH>AyA0dY2D$tz5dHZRL{9ExjFE8++Tg z*N+9&z(tu_Zsd^eZB!bM<@(dmXEKM*9~8w#8yRe|SNgcaQRm5CN1NN%%)Ku@Gmkp{ z#35tfZLhLzmxEf?or+rCeKfVd@o?op&4H!;&7`l}HmqFHIncMXYxUaB?%oY;-94LI zy1TbFb}!jpKNbk48e+(kDxm&oRT97|gUJ{mA7Z1$JZ!O9_+^K!;?uo027lewa=7qR z-~aea)7YWMwwXI_yO(bMBcO5MTzFUasf6XNr}CFqo@iWIa)dPYuGP!x2m6;aty|yG z(zj`G>#D)##VfWoEbiG+Hx>vXABOEgu(fvw<^TKiCt$ttR18`O{@89Q@_euPqU(1R zO)fmqa6bN8KXCtR)7TwPY%>P$x)p7>;a9!-N@(+v3o#w7=QBuWmvt4K?O2k1a-ged zXnjZ7_Kk}xHx4#euie^MyJkmS?W&!%V}UTLfvvr(X#IbgA`hLz$dAp&%n=WE|Skl(TzVL;ltsb%h&u)fBJWT{RYnpc=C0aiU>9Csrx3uu+d2 zTaEbM@6n%m{kDkY`6n_O$KNb6JMdAtET%4?J~ETk*&%r|VuoVax5X z(i&RhFZ(N_vglS>c?KEgaW~e~L|)xh9d~kfRpNoYl_|USm8WeVDjN&L3A0c%n*(ig z84RdU20)kjx=Tmk#$U6A&OZ^AI{rpZ7zzJ&^8o z=VZF~wKHRa{8=1mA^pF4I(ZOE%(J5soaY};n?M-^q5WT^#drN&q_hQES{qJyOrIVdROQ$$G0$M~@L4;kUnABw}mKQ@I1eOw;q z^L}H5>${;Sr#C0Z1Xa|2&v*u#Co!*%O=B+Hij5pGc<2_B)tnI3_|r5?T`O&(q& z%iUc^HhVY?@9`W9w2dPVgrB)a`uofo9_GmVah!X8%;q12$drCa3N451f(|H2G(lCi z8k$O_&{fHYfkrk=wbEg!n*v+?L^v77!rdqeJ|^J^G7Cn8xjzytypU<>hEhu>G+NqW znWYW=@Er@RW--_IFwEKG+{}K;AZ)(J!O@2aJj)lGEDDvYX2U?ls#cMvOY2#31B#*!N(PbNjyR1KlqXV;DrU_+#xpJ1yU0nAve*U zya!vTPPT^D6iet$HHYyu6If0+g2N1bxC?5-Z>BmzXR07}mNJrs#^TsvE@t}~hFN`? zVcKpoOyzxs$$rW(@oyQ9a4;MJREHOMI9)N0<%r2FTTJJ&!YnRx%;7e{d~QQ5xAB^0c>~c!}cIOc2W;^ zmvmu&OB=S2sGcWO*BDqyalwRY&{yMyu?8Q#+{VM%bOt2qZV0Cf25`Em5BvN2 zKkXhF{IvZYSW^wARD+Q!FU-{VV5dC+9(1O)pY?nMx=13}TLB@#stAtMLQuRO0#gm) zpKT1kLKFB@n8B;w3?7Tk;kwKMF8#y?3%G8j`+LpccEl9!XN`xQZ<>5}x?}pq@xJNj zF@O!#V5YzW(?$RIAkLZ-;Y)r{uqF9oPSS|*R7OOg1|q`s5FTfQ&=gYyXPG0Yz!Cvv z*6^#fflsRqyq4O+b2YKn7G8rk@ZN0=??aY9J4cJO@ z!JKNaRN{plc@UoDg9KA9Cc>ONDSKJOx~q^crj6JzLqx}zAu7oV5g9fJ%djeLPVv{2Rb~=3bKWzWW`=Y};ud9x)-EKHOcey<#u%j9*pX_PpW#(Blqq_cvfqHCR#&j^vAZ$nhbRJcxMG zT4^R?$h1*FrjrITy$q2NXo0i{JEX)pBPrDl3E7^AE%yH9$3-^#A*$UEkv)FHQEPm^ zL~ZnW7dGVkBKVZw6aNc-5B&b{yW@My_lC~{pRs_01Q)F3cz*dHzNG)7Y5kwQKPZRJ ztSPisK%t{1^4*P*=WmUiFh^v?xFbE;8>w0TNGb|MLKWHKO(BSD5BV9_6Y@E}FZfN& z_TXm`M?)TkoC&=XbS3mg(5;ZGL3e{M2Hy)B3pi2@b}~HhB;Cu_|B0l%bEt-5s-c2v zsC3jq1$iMQJ~k){aiPqb7jhH*k(o)pTwxfJt0R%r9Q8A)GwNefZ{+L54G~YG4@KV( zKNWp5>~i##@as|MBW^^ViMSbYGU8U)i7|i^)!-=2L%Nse*ZO}NY0g3|Aym@--a0D< zIx~j$j8P4>-nOU=bVFIBFNza_k(UvHtil+i)x-^_wj_K^U7GMRtv~MX_@RV5(I*oB zh`y9`A?8xz>DVjr$78R?9f`dbb1?RL)WO&rzX4})F1S+r0i=7QNqc82PC~inEYuq< zq&*>WSnQ~QHX4gos-eNp4K-o@lobm@ae54SVF@EywaFhdI#QpfuSt23vN!c+{K@o7 z31>1+C!S3|mUJ%lQ1XT3{mGXS_a{!5sYH%l?G?=s=TYKlT^Z)GgAdD8E z!4AJRDIU!#+@`LNDWWU8Zt_F^<>&OnE; zFqYa%(|!yUEOVpJG4`}Xw~zBkXMpdQmZ)&rCz$ZGCO7?FerxWZsXGhLWgaOwmVK!B zK+d6}y}5@Acjg_<-%*{Dx(>G?HPuY-tDQPU=O8vn0#n^ce_Bl@Fv`?x< zV>-G`DSvM#L0JqX^tx)32V(lO*W3PMufONZp74-|9m(-G>WZ_^m9`ZhDcDf4uW)b0 z&f>ilTT6z@2TKl=Y%Dokw4wND{<@+Q*=q|=XATscP3zCQkUSQ&)WY@4i(zZdEUKfL z_U^T7PDPJ7WdLkNvC>fvt6kN8tntwQ(C=;We7(Qp{q>>#f2@vAIN6n(ySKT%d~@xp z+O;)1>eklmtQn};Q`KL+ue`7FK*^elBL%C=j_0l_J(abp52NBO+ zbEZ1#dyzg^zQHQwf^Ivzu$?iQSrO`((=~z zlvnk1E^X{=8*J`w-PW+Id1p;escLT>D(h%ERNT>UG{3Xv4Zj49;92!7q*F#FXOr@0UJJB!?SY@u}KnVsp- zJ$KKIR|BJaPQ_$59>}UF+SS;Ux@}`?!N8WL@|8R4YL@M;s_z;qZ)`hQ+|+WUpt0<#;QSv~j4>IPlP$A5T4h3$kGUCU8#rWY3Hk00Nv77SuUfbE%@9RpO ze__7p@OwL>t#4diS3mI!UwkJbz2s_INy@p}x``iyz7-h%hjpcUVf-R4uT z-&GPm|4duv=sRPL-CwOu*ZpvITK>g1u;pcFV$s9IyyRQOWnnj$R|Q?zQx!9`rz&}H zUq$A?{?fdahl&b&j^>vvIgwM=aVoQX@!9mTfIrQJ1X_156yiYB90tpkIIz~3^XDcr z){7k`0@rVg%{c!=QRLWLO@)154YdbhVX+4GE=#|92DH2liY|Q?lbZ1;D>v?beL?WO zwT1qd50-=-I#?3B{ZLWT`XdGDYmVn;^`6YmS#~-jzw2DuSRjPvKImT?EPPqBzBBwDsO~=ZXe9em%_%Ob=cXX=Bu}V z@yFn>%(pSoiLW!_qFzi}!Ju^tgI>CBmSkS+qdfo*r|=wqEj)Su7pXZrN0cRK{aAD$R8MBmJ(0n>$;{m&yv(I*{LG2hGkEuZSulC$h|HYLP!St|mi%() zskOmKw-#n5lsm92fU{i|yqr?lntwRTBjRU@chHX_AMYQnK5joZdD(tD<7e~ya)9lf zTR{$&?gl%b7#kE&_Mw%Z=D|4T#vV@Q{0ScB#63Qa{a-Fv=K71RzqpQQm9I` zLQAoh>L`J+c0MfiGbwYB0#~zmI$I$M!8W0YvGYfo0|nC^oY3iDi|zInxZ-H``KF8c zi@UBCcmH-D3sjT;(8I&r-pVi+NPizaPZ@-}ERM~X#?z0vv=2gbdOM`&G(b_L5~>n~ z(2~u9o?;qI7A3+;Efx-%k#N%qg|A)!LJVl(*N7HEjqOlrVu5ZGW9&9Iz&#@)JTx`< z{@8ph(80mHUC%I=_A$&c@*s9z;b8hn|M!sIZpSqK2F#gQ36W_+ZD zqS4Tk2#2v$2rOj+X`$E`t_q&;QF2AdB74NCSRzZ+7*%Sz=uy|eJ~dU`Rnx$0RSmrP zjo~#6b7d#PoH)#4cAw*5*8a&bOGy9MQU0I|yj=O@L1bdqxFiTqqCFT>BOo&)7>Y9k zpep18?Kz$>nCAvlVP{w`aDbDD4LlcGPsS6ke7rMcDEpu| z@qfJsi(i=svuQ@KrM!dd3@!2}`%#J?Qv1&tCi)A*5lkM04_KV;;OBJ0#9#h{C4{)lFpt{^BHRY#J?KG}M+Zv0 zno#Fchc2HAO!yUHDp!Yu?J?&%006WZ3p1ue>mscUeP4aoy7Jw9lf@I%*TA`IQ8 z!N`9WjKb%@AVC;<86warTnMcyF=#Z4L9I(1Dyt-*yio#*`y?QDK?2fGBp~~SZhsMn z{10&`4vRqv3!w~=UsolF()GU>$o^AG64=`iyZf?3!+ zn8YoBQMxGf3&f#YDFN+9NoaOTL4BnZR5waPc}NJ?zrt_b~J zMd)r+g4O{gXkMV}O(p0&qQ_nP=HT+)K&T`gEW(-^USsqxL?jn+q#ciL}_KWe`;`l9p9=&SZ$s#aViF`3t_@rpUGfx|?6}oV2)q`!XKCA`}V7-U@ zxT6NJyJ!IWyZS#IAM1XyeWw4`>ZRcei?>FPEj}9EGyh_A$KtEuXkbb;n95Q{O6H$D z2-^nGW`o9zp z{AqtsIAx4u>HNP0`s|(rOG(5#DInHI4bh=G2#+^FNQMali_PKNUH4Y$44 z!yYHCKYLuaeB*J)>Z!{^+xyPX?QXcdw!7r|*6y6!JKIz4A8byzfBX%YlP_mW?XxpV zex(1|dokh^Cm@B+=FFhaQOcyX)^vMCq3mJ0J0V<8;vXt>b>*cMhY01=Yaz zgV?#4U?Ph2Kl^-;96B$yKz|z9;Q}$-Hu!0cP(mD;Dv~jL8l_`1s#gM6S6nvcIb}SJ0V-*?gnp)yC1kd z?xFv>*uQ-TVxM^T$3FEM4Qyx}JV|SXQ2X(u`7((zS$@=!SK4AIh;}PsbT~?W?{ri8 z(C(%6qRHR*VNJBnt)dL~E7|3KXVTh2jwbg->`&MqwJY&PdbjoATA+r1z3YbF%lnS5a=PS%W^a#&{aK zZD#*i=Cbg8kB7|j9&eTVOa1i!Yz?+CG`|< zO6@ESps@AF-(w+rc8jylp#eXe9lcSW>$}rJ zd)rFlI-6Qko9YHKtE*3D)>WNNYpl4C+*Ecsp}FLb*p{Ljku3$c!dmj~gtX?~3vA7K z5HK3JEMy^=o_hl6?p)HnmBJiYEKBR}+T0_nZ1~>w+l~8sgZ+fRhRg)dA2*gdJmjFY zbGx_I`VCc+E?Vq)pa+dm9($Q%3XXoyRhkWMp^y&)XJJmiPe=?<7&&UN7t47 z6<%L-JG8#wUQm7h{lL+{o#sL)Y0hM7zmPOn?K}pZvK;8uVSVYd;(E5;n)~h^Gw#c` z7EL>L*+6{vX&dzohuqD3_XN4MZ;cJA-H;Vq)K{IFzH)hX!t$YF1`|<+juRepzda5QT45`;>x=rCFS>mMgt$33o*3rpDoCsidf9{ zAS9SC1KP~vK?CO6Z9&d6Po?+{y-}aM{e^*W|KAo0-FF@J8*X^pm0S+@$~c=G8grr~ zF6c;Cvgd&vsop!zr-ZD#kQ}x0Qer~))wq=QKcmxIZboD^-U`d9yBm^Qb1!H#2%vFD zpgt&|b?-XT|6Qd2*DPROZ&qe*U7F0Cdo08{@>W`4_ZPKkn?4x`uX=AO-TvB9tLmAj zMeg4rE(!M&{X%XRg?ZlSh;h8KBhKOUAF&>LuEzOqx*8X{=2}ct&yA?KuD`+)+ir&@ zx7-a*ZMqjU8idd|q>|<@CGE{#|6e|X`M7xsbL#{@bN(-W=IEQb+J?wz4RbyPTE~BgaSeW(5e>1{s%U=-z z1GmCLR^AScSavr!s_TAWOvi(O(IATYAe*#z4XyomPG&~>Coq5P;bzXClVRDF9SYiu?V*fLW;BdNTs{&aG!_ej|=V=Pj32H zUw+{4aQJ?p>(2XuUK<|-`1U{W4_NugFKERh-_am}`k-h6gJuB+E5p3%%+r0{Gmp_Ao&1+7Uh;u>nVY1)FYIA4 z$4P(h`GcDo#1zf}2=lFg#N-ah&#Z^a{4!`sv8S?wcBXd~9X-;FYcEgC*C<&?SJQBv*)ejXi!MczlD=|v6^8nQwHGV zK`v(BITo|=FBY?EcpT?a%;If@$i!MmO)r7M>};qmNQRDhER3ZiU?U$47iC}gsOpvOeL%};$tWs9MWo0FNP*TBHy8m58?c+~1^;aY6qd_ObTwg~Sj9m z8GIHPP}oTm>7vT07L&mmadBK07sF3+X+TVlkOjm?^H(3koZrqcN63TNPWpQ_c@V!c z2&DhBNqZ-f=8gatXAmZE`9hG#1M~P?ASU1dnejGIo?r=$NoLTWYy|VEw5UBz3!c;I z4Y;5@QfEq_a@GPmaAXe72+hJ<0(AR#E)J3hw3ma~b(CTH$%E)552E@O!{n0wPk6~N zVZ#jLPjz@u2EqyB$$yx}V*3v!5GL3lKkk zHofR(pnc*L3{ISYE5!5PII){y_8#P5)=?jHlLt{p9z+4nh2+N!6aI=}e7`Y_8+i~8 zREITqILtAg)0kc_`Vi#Q!5o$*=Cjlw###g!E=4Gj@1V{l34QV%EV<{vm3sz4xF;f+ zhaY7;+-T!rq4zh2hRB0CMji}hK^iZS2Salqjr^#nCk*5Fnqgc%F^nD6U`dz~hT!JV zg8+v%CUR(CDu*frITm3yry}Na%0ZM<2GX2lMsSKkoiz^zteLQ4O@S+G90FJr+GWwA z*>6ByfdwTs78a>NZnFi6a=~&P>1HwxLF=ve+gtpFv z;E|b__SZ~IdOs86F%uIZNUaD`yE8D0Dws{R%pwHoHv6&Be>A9uMXFqM3=J1F$p_J- zeS_-yyihUY!y<=qSmZSkN+DCA5I-HVnSziinF;awSyn#TOl zvKAH~ai(uZ*`4AorS>Y^-pm9M@feVJ@Ll}`C!_MaD8SulvihMEGsW9*t zgl?1&G*jn7txy=swez9Sxd1YKB9Pi40`b!#SolZ;^S_HgbVP*OrLPh4RE-Svgfv8c zN@C#`35dQQ18B>!pd(Kn1o?BO@~r}-UdTi0vEuJQ zhiYK^Vr&nX3P_2D#^IPpXU&;%2rhKKxU0%Ic<4=mr{yeoy3B*S-$L@FB;Z88m|dPMtg7W<-mUvzAK=rC2R4*Ds^{g>eMnmyG7R)7CuqVyu zN}9`)dysTUB9x zTJ@*-LzQnPpBBA0`L6QR=%>aVlM$_}=FmPzoA^#xLHh`W>JHjKb2KoNX2DvV1!vM* zUZj25`w-aYVniqk(3Zz3h&G}0~6{4J8GY; zb^J+dgp&S_qCEf!^8Dn*P&US3CXy}46LJzqtgr0%s0iiHVaaOmf(o==`_<_@_wLqx z?72?wf%}l&UDxZnHyqy?TyXkoa@_Ts*#YeeQ(kkA*A`DiA3^a(#adkA%8B9^7pxBbAM*piG51-lzp2Ry6Aaqg2vwwc{=w( zY7G7g>@@zvZ@~1T&pxxW-hY^%@Oo!)!27e+F5geqTm3#+Z}R_awJzX`wnvMpRVl24O^Ysox4!Nr(zv(u5SR>#76Z4ZWQv)dbd)_zCOOZ&m#cMj`A-a7V&zH?X|`oV5x z=tr9sp`WdnhkUi{3HfF|8d$T(f%4r!)P4-vO!`o7GUIu@ z)w~BaPU1Jqe3dWdM`)kQOffl}T41#=vCe*He7Eyp%m&x>(MR3-qaM4iihk*~BKoCk zPxNc&rO|I4yQALQFNyqU+ZFlAx-;U7y4#ETAe7pVC#{h=hXX|-q`#$DXjsJa zxm9P}i+1D54>~MnUSDh@cD}(==~z{W&QM9b>5lwt+fCUO&I1{3?yFM!yn2#{yq6^1 z^X^P~;?gna1h~y4?Xt?YClk*Uk-zZyg&F-rG0Cf3zJ9oEC7x zpY&cd>Haj*n}xLJyjqwvk0|r0OM&Cr3Juon0d4M!>oukxUt_vp-wG#%ElYg02in5T zdYh8$yXx}YTdHb(>&lh|RupduE-t(rTtXg0S?(jhitH!em6^{ys?uM&R;9jis!D$6 zP@VL_ZZvSAxe!RtJ#GdEvPf^1lIE?S%lvGg&%9nP#oVX$@2h(ja-H0+GI?l|k?_{F zc5-W1d1|fb2{G+l5@+AkmgQd6T5UlvoSQS>Rf1M`Q6~$k_Z0zg@5}Lf3pztxm$>N+&`DOzF&YjdqkM? z-~pux+jr~FUANU*rgx*8M(04FapS6JyNc!MZUx;Lu{%^dbkZo^USSu6LOOLXmaR&CtoYf!N%!a9F#vP)WDp-=3p=D@HOeW3x% zj)ewy-VKgyc@Pj+|HwC~>WNos*>m^w;#aO21#g_Q^4>X&2EH^7(bR4Rt+|(x{%@Mf zd|Nt^xwejnIlqgCId+PN+5P8KX46$kzSS4hXDm5wBHDD!Ua9h+mrnk^Q1jHC2@Wya z@;pKY8~l7WtPXTrdnCwx)q?>4r4Ri>+Wz*5Y<%h!Tl?HSzVel8V%clwa9Pn2Uql%*p+91|OXRF!*3P(|=!*d+BYJDb3dngsZOD$P`>~ zS5G?=WEguQ&MM?+j+6JnI(O%xmELx{5Bb<`eeCVh|Ja*OlJN@YeC83-^1>~=;gw5d z%^Roa%6AT>+29rL+JOT6Vu|MBSfDwAshsIQEzxj zIU`Kg3Mq<4s8rU*Diw8{QBlRmMXGOqYG~XZ(bhQ&J^h2wH`)mUlg+<_0uJ&(80Oka z(*K(o<}hVJb{wM@wx!5S)3K!;=Q_adD8J6b`lNfzY4j4U4(1aGGyV zAuDrw)1i+nNp&`<=JAF=LcovCTkwGT6na}U%?=iV7I2zn1i#r@ zh?%M0jN|kr-S2_9+)fbVwS^qNB~-_mLU)1z%qHrl0v-AsK@N*+Yo z-=x3Eg9s$O?Fkl#GXywoDR*E&nFAv%;L?L6H@#KoR)ac^G7Na-V9h5E4}N;{DKHCp zGINap579XEqb zJPhv96?FZbBRd#o=MZ@?;4igy}Ok*T5 zixHz*NN{q@gAB(kD057K4hIQF4i?<##TP;(5hcF?d1X$>sBuD`JP3IW7G!m}AZf$} z5le0eIdfyOA2;}-xWSpm4W@_(OdZj|3uYBDNF3k;bDnrWd?LsNVaNsfKj`~G|L-r@ zw|_^0YLKJPD^gVX$B$9a;)09;H^j}kG0%}3Gkkb3K7#5<;Q@C6FF0$cjdp&jffytX z5toT4#5V%}W^_IOTjRgKhaZYmgW@9cL6ligAs=Ls1{V}`xglf9jfHkRnC;1fX`#HB zlE{bgdHmq7765nqIIvcZ1IOlZ)DPpSABcyf}SHlP9C1#SKLx9!OjBLd1<1bA$LH6ej?|tnrv$J^@pjCt%XjiI}i_u>Iq^aAmoGK}qNx+Hpl~vTYbRq)`xFSRoPwEy(R9(iT&3G%+? zkOwC8%LfzuCWu+@Wrjp&dOP8fk)1Et-i%b+e$;_AL!$sIAJKk33Jk&)`Ttj5H^&_u%SId*4q5AHX9FX z$0@M%p{!K+Oc*B#!60`IWu)dpyLBEkR?LI?24dg5VYLhMeyBX2`%U@loR4x4ekBQ! zhh&^mQb_EQ3dGKkp?iV?WyduB3v?wpVJgN6Ytonwq`#d>`?=COyqi4vF>3s9F`NJw zo2hX2AWtZGHf-bO!ZK5M*t~SXcauhuFGfp6J{b;(yf@e>@>c(p$Sd6k3!Z6x5_w3u ztXpc3q=b(2Niww$>qBP00i^dBLVEWoU?9#33wHaYb=+v3*^@i~U-D!8DI?=YXRZ0^ zPJpk)RCqhj9QO2|^VKbC{zs=&k+%*-qOWZ0#Gc!9ia)VlE&j-IoA`Z;lVZ2c9*SKv z`Ye6P0CJ~{pm4|(ihInVxZMJZTP&fl$rAD#M*(B%0~^wNE~It5N%IAe-Uy|>h@fmn z1Z9;XG{=vG8Bh5dVmI@Bp!eLDeqjrqcqfTH^vIXE>sBrGmrJ|MpN^~Kt~hL!zhHM< z{*2vy`QtWUlnz=!Ww#Afx7tyX%>k-w9iYrQnk&x4HMigGFuz$4Ood$dbL_SEhKzr%CyQ*D{qO z9_!T(xF1m8>vltZhwDd;K^JJPbAwi&JG550Lvw{YG)rIOj@&pXj;RNSPCnDT;@~3si?f zYBY8QcW7@5T%)_$f0y2Rze{@kzHjta`aplVFZ7rCLT?FecJ1_qc83o%+kK$X<_-0K z18bTGuIxUb_QUCYegbLUbXs@M5#jh&Ai?>%Sf1-qsT%*yVuQ&S^KIvx%<>dFlolel zHz|J6_V_H#&9P;=>!OP$LO zVLAIiiL2Po`~dmE>}b_>8EM+9Qwt52C)JsBCv=;)$E~+)i8*T382#9?KI)rAUF3JO zx`>}9wc#U1wP7%*4TWA!Fmwp*e*^n@9Pp-j5K4UzPkJ+hv}eIA=6m^U=4IVn=5Fge z=1RLH*U46miTfK(g?7|BiEgU!mG3VJSL-cE)a}mAHEPSMGH*(6x2{X=x2;S%U{{v- zz_v8ulXY3#SIe^4?`CDuKTXP`U|1dj{qk_=m4`w1-@uXPf-gPy2LzZq@NSJB(&-Y_(h1*XSj;yf#FwvocP%r8Lu|uBgUgjE?(yRNqsWDo& zrZ&~2w5rG|zr4XNvt)&1O5qOYg!~&$iMj6_QnEhUrl)_g%1rrgo|W{|Bs*cmC?^gE zIrO1?{|0XCxj@f7mh$GAwC-Lqf%#M?z+7wRW-j#bFvkbDnY|n4aBNv8&)46tGi~`Q z3*nCC&eBaw{Zy(tBXo)vCmH87=Ub*W)Y`?@E^~^i+UgQkan(7z^sQq|;Rn0;+|SlY zncpl@(tenxrVJaWCBZN~5eEMTp5#M?(mY5c-JM6;w^D$)No&uSy1AHBAv*zT^^`>*11{@@+R(mTI^@eI?FN-%!UXpDQ+gW8B*0#hkuw}EePtzr5 z-@11W!IdBE!b?6|N9BLDh|T$77N0q6oRAKq#57uk_!sb@*Fz+YK`OoVF5+cg*Rz2p5{k^?s<40S+n$Oljp% z^!&3(dslEVH(N>fE#qKL^l>sngQR=*ax%RqNPC~0&)IxTNucVm-t?jamcm*4T%?kA z`zuB5h|&z+nx^lwxzyBkLz|`j+V$4fednw#dp}z{bbYaMZTV{HRsY@Gx9W#!K-sWy zP%(^x>CnV~gAn#Okn|w!`=XxK-bwqNUd6#2+Q7l=B>lOT*1(sXBkg@z*8DKFS^C4& zq3x%MOVhBidmW5Ct6|_(`Ckx8^Dv!ZP|h$n7Sq}{c@W3elJ+DIU^ATs&`Xk}^)e4r zaeo?9cwd~Ac~?as@wVaA$iHl72i@>o=yfer%K2)Ng6*Y374r)%8ir@rYU!Oir=@e` zi?;4A=ozhpzIiY7t(QRGt_^w)&CqjfgzmpVJmoL)8Rk&~`48QsJ^M&|(i-@7(x3fD zIGC=pET-l<4^#AN8k6-}lr!m-5^vN?oe3c?ET{QCcM)=X7BJubNvxQ~<6J4@M-8%i z5Bub`?wyoZyZJ#uB@U+e78jHGeF~HKU6?cay9`h8cQpauZ-$fHzu8W6{OU2&`fKPM zlP@X4dY{Whv_36esQz(ip~|}_3+0}X*1rjvMJGu??S-P|7AWbggR*Y_e?bAm+-YW* zi!=tu*D=fxy$%LRf3Bo8@K(}(<$sX=y~n{Mj7(r6AVe7iF)nW?@VY@$z!4@BtYJUd z6yDSH5h19FOgdkoVYUJW=E&mO9BB;C7W)igsmBnJy#_IbQxI1=2ubCk|MH$ZkgH2+ z49J5xxRu6$_Cl;751@neZx!j^9NLc&Puer=Cm&@HrgD(~Kp6yaE?X$_SU{7{1SSHM zdl;_??}^HYnk0*yDdK3Gx)9r^&Bx1W^2XG0D;snh>_8i!}k31L}gXU8V zQ*x1EQptmeAP>Tu_Qcq~V;D24!H{&nHlYq~Mj7K7dD8sSREq@FvJg_F`IR{Yp~E== zmYlRe$w|M6i6(-AE*K`rycmN6ltgSHdo!JMTzNFGECc@P0K4_v4ZEU5 zKkTGA(0_pD0QoRA)CYMq4-#k|gwQy6kRN4Bew4|3dj4M+=KtDfsC|Z!AVjH87SQ<2 z!z5-lW-t^oV5UNp5r7=S33Ynmn-XqBB#}cj5i|+^1;kZ3Awfv8ujGY@YI9x~i~qLgY=!YI8x%Egv~QyyQOd4s$`^8~IQl$QybgfQgSMVB-BL|Amf+ zqIOAp>xptgpAFsCB~M0&GCEp1w0F#$2g**okoTjkQY1g5Qv@KHPnoHzaS&@657DLL zv9NDEM79z~#*ZwxHg0&{%W+@l{uKBy0~20P#iSSH3_X~MsW<0f>SbX}JHOz+{6EUx z0=lYeZNvSx#NAzq6Za5>I58r`-Q7bxBm_wakU)YH+@0c9iWUl`(3YN-DlN27r%GGs zci#o&!1?d_@A&V?c*fp@B=4NxoNMp-zH6?v*$4SY-~z$eSpRm6b^l@%E0-i4#o+}2# zp&s!VlUNTKjt<22Qc(&VmaO!6N}kH2;7YY$0$Vg52J~uv??0sZ&7@_T5B%0^e(AeM z^RCY+jXT~qH9qoqp!uHLU%J?xVsOowjV`#d@o5h>KIX~BhrHPMfcJmGftKh$^ryo2 zCK!k~37#hvZA!!3NOh5NvajspX;Tz_i%(GgF*Zx>o9I%FuOb_@zKH17{w#bz=cBMu z-4CX$)4Mf!xBlB9C-vV7zM=m{&;x^u0gp{yo5W@({n+A=KU?ezWQ$!vY_UCv&A0v! zI0Sl8@OugHoGDo6XTW#oqD}dzPu@hyUpXE!KV<~Uf0Guie0O@P`X|YSS|23V>b;xL zZtzxoztOe0MJAVHSDIan*=l}v>LK%2qb^&Vh`MWWIO5L<2g2BTcNp7j3ul{65p1(Q z0!dy3Ta8Cx7QFoafAk_bkR<^D+y1Ty86RSn4dfU*seESz(yc`+12P zH*#`xuV$4SUCL-OJ)7QbaXNKi!ini))<;s-+a65XYqu})oZaqepV)0n_}yVs98X*i z*2J;H%6PV49>;de;@Ea+EZe*cM_~LDu+KgfJ|hR7vjpC=61Au?p3*G^=#yGE z>ATgzO4ll5H7}N>>z^qpGCfgPYk4@o&E`N}zx|$^;fXu4S37OZ+~K@2$o6ZmXl@jT!!m=bEB4Pu8ax9IDAP-&a*(y|bdpers8e)5endu4{{yxvwtR z;<-HksMlETO|Qi{4?P!UvFCy;_87`!xA__DGMLUzgK6wIkjfKZhGQ@eDd>9+^h@A- zYt)c~XwXk>8uUe{I^F8grmMYn;n4uZcDSyn zp5rlAxx{;M`Fh`lrLXu67GLw5Tll@->;m?gozLF=xB$67m)-ia*`+^=o%=J{>18+$ zbx*@Q$b;Xj01e9YH~jY(-STv6mK?n~SB=gOT8U1~^OQd@d$PvPzBq%;J?R!}I}2@B zwpBTewajoIY3%bJs$b+cuXfzOzj|-r%*xjTyUM=_=q%$&T_x<>Rm|Sqh3wIt&u-m$ z?Ao2n&M)(HE@WUF3gNq}L5l)?(K&Qhz?AsrIYj#u^T2uHs28mTvcn=;nZgE-!-jACsZOxF_T8rB1S& zNBvdDMEle^SnV)Ste_n~xoY{3AGy6JxJ9_2?v~(>Cu5aHqxw`etl!}(SlPj7y z2%o^_Ut7a|^;PWMP{E#!W$fNm!fr3~bS~s#9+cx$SF@D9?k2hg?|E&Jn9hxf>F6q~ zx5xEp%Nhsi_$puJWh=sThQ|_22A5=5_YW62b}y)QZyjv)X`DMdpnCRbaM`TQQ;KGu z3eE5Sd`f;N2N$<;06qn6Qbj%cRMoIobrpNmRIuC2a4P0PfiMnOX10mw!+v-!_@38C zMRa-2G+zH&03W@p>uh@18y1aTDPRdyJEP{zj)zD zP~OnS$(aMkr=-pKY;syZ2WR(iV17GKDr{ljl1BC_t!K}&T6TXK7UqI54h=*PQG@pe zPy;MG&tchlY#q@qoP%AvONo~6vJj2zbdwv{7O390C0f5@W3qYk`W)NZwPj9aYZ^TY zR?hUzUcM+Gb!>fb(&A$w3B#WR#}9E(@*ECG>*GmTUF@6F#$I_d*t4LC-Cu@t;Qh-{ z1Ni@sQG=VvAYO<6eHHo#x1a{QvF1N0p+$#`XyBluOz#0d<@SBy+Ks!X8CUPjv@G3T zVxPaY&N*{)mq*Hm1wQfX)=iqa=14%~%8&gcmhz;j!#pWrfPItu*(Y@-d!==;M@9#B z3ctkDai}4B)CKQ7A2mP@a~}TlIP~|z18=~2=n>=qv(IQz&#N}#w$onn4JRh6RUV7g zEk2rNl6$ytLi#H;c1im?oMQLRcZ=LL?m1=0VXu&_A9)6C_}gppDy%@3u-DWD>=`$Y zJreraJ+T)nl9%98WI#wjjT*rJ9fJOL=&w2s-+|A^?6@LN%~wsS_6=ug z`DK5_!q+3!vo9v=rk>9;ns&C*JmyTRb@=H4yO5J>CI%ck*Ga+tjbPy_g%(KGOVmqgTrYh%hk z(4?Z<)}q`GJY_Q8pRADlUc74jyO~;1@096<-fS@ndTXxfr0c6JyssX#^t}9`h5Okj zRxT&m)_pJA`)p>1N#i^*U*zBLg@D54;T?c;%}&{|*1u zc@_H)Zb_-&zA9xsFr(=Y9L3Y_Pm;#okCco2Dn)6^R|Tp;U)E{(-Rsr%{$fnm{qsG# zE}y-t>-hd326i{t*zO{mIUZ$8*F8MJeJfjeZv0Ok^$`7Qgy<^fz*%Gf$MzBJK7wQ6 zH1=Se$9*8+{~F(~PU|C#+MQPM*hie(M#0WhX1Y%2=FIetG5ka%(=u6vj+ zc6fb<^u;*Gs&EIdB@s?LX`%KWX6H((C}OXQ!hjPkE6tNH`Wz04b%Fa~?D2Ld_J4rBnU zpg$k_o#=lJ^b3#yq*-&2!U_{n3UK9o_{jDz2Nup!o!OI{y6^y@QeS2Q+N$H zfMZ}Ij)76kL17NGpa$ib16iLS1Nsv6{|@=X&&WRhc$Pa@<4>6b6JUVu;ldBX#T}%K zw#cC^gdBv}7sP-f&<8N#_)~yi`X3y^F|ZYLa5=_c@Di>?x`r`84f1be4DMhK+$ReD z9`#2XoF5_gctXfs2)PR(cOlXT+CT$Yh%zgYJn|t4G8w{LA&d|GyC!%c62f4{0mzv6 zFTv)XBDQoFu>~;0b3<1^ndGIgr0Q?62 ze|!o0R=97KfbiVZTg1BlB37IN-B^$Y3PBy{#9y8V7K2q_BiIEFfivI+_#8Y0e}n(y zm!NIy44g!4>7y?vqGAfrG?0HYN6c@`oRzw18blp z)IyjSr@?g~jK?qF*%wCTspenEhtM+d56Eb~dkJ=dzPq)EJ>f4sfSZko9Z?fIWM$TY zVm6Buvwo78wQ|v(3e=`q%<{;_{M{&GncWz>FH3N?7*xORzgMz2Uum;e7?XkwbXkdmb)gcJ;-+rBDgmAfK~9K4)5t z!>&%k`t6`!^0)qw_C69EsOMcTiB>6?_qU0yd55?cB{UE-t#L|08EPF?h zBpu& z$h>dD@^2fn;+w{-{DvW`oHJpS(`Kx4+=5k(y~NYL2SR@`_P9@h_YH;biZCM%$NUJz zaT4Muei9fa`OR;d^r3f_%(tFpviCh2Wbb)&%6;xWNA6SiVY!doR>|FV-6Hp%%PaEl zIG>ll;q;;W8%_@uFFLZyYYwb-%8oUTfma+@bMHjf*yYF?J70qRku^*m_aNt68~xJd4HP>sT^pjO43L48Ws1BaBa1};^( z9I#&XqW>P%bCXW1o$-4|?S$WV>W6(;>wp&$GhA4>18nhP-HrJ7da&*#I0$;-=zlbP zMl8I4655f9TBMrNA1T)KeWH`-Uc9gPl_K+rgJdtsP4YdYr4Cq+}GU}^0&d}U^ZMI$cE!!We^)K4`PF5fvmsu zC7#wG3cfo|(1+g={6@AmJ;^bkAF@p8ZiY48PID98NDG#^k`}9QJ~dtC)#*hV$CGQc z4=2sgeI>D1f8Vqr!(9o>jJC&bG~N<-z+_|0C6jei@0zTRer&oTlFgQev)PhxHXXr@ zuZF|ecu^P|Eew4DhoR3gs6it1)3C=l7v8f#oqj3Qq}d>9+EyQ{v7shacf2~^XhmhE*;skggvF)ZHVcaf z?dBIQbC_GO#j!u{xMOe5EywPxUnh2EvIFjHY}b*-Hu&^2tBw?&(2>j*9Z4_X7|g+B zWFMJWYGK>yua#oDT?gOWET+pdB=l;V5*_X|r#+pn(yi@*itAgWG*-1F>n?4|F zYQC_x-fFPA!){LHoQZwqOPspPHo9~aA9kHlbknuD;Adxi+LvQfE;}@3vmHKt%%(Y= zt(w!Y`!9_xpTlt)Sax9?a*MwSEWgIp2+Q-ZN!s?<`~5Hp@w}zIT$s z%ARobv94*l!|j>I^IMB7=CssU_cgZKch}EyY_A=5X{lM~)=>G1du{o3_v*5TZdE1h zTwTPDH3e*6o6ENN6f&E-EVio4d;upQe@e$W_X7B>D(v@eLJc}_zO@&AXExUQb49dm zK#$fB+KX4s^N}B&6RNhbKTc<$H_fQOr@*|Yv(mbwz1hB{b*588%L3P$rZw&r4f{Mx z>fZ1yuKB?opTh21QqE3gB}luA*sh|0Z7cKGx-$0#oQS!QiTtCO=zG-Qy*7A0=%2%~ z>li%n&V}%N!&}#;Eo!RA7(KYB= z(y`Jbziqc?&WzVRvzov2$ZBM_oLY9yuVTl-a<(rjVY^~%Rxf!8PQfuLj6*qmFKY01 zH~hvN_zifUgYaKl#<1RBrb463Eok8~SDCqEfl9qgqBJ{(lk{5_W|`Ixl}xA_thFtj z*Wp+&XP!$=|8n>A-kqM)yDxescYf=U+{SLHE$p1pz>fGNaEF{qw#zMN+q^Qic@C#x zE?^vW{lJN6#*6J3D*iN!?wmZ1i#u-+b*qXpx}Gr(X!$McP|*Hl`ht!lAOTHbFTw{+AodUTs}_{cfuu!Ubch0bTE@Y(Db z-NO@O+u1&026o3bv29`_+dPN!FbmHP2XB(JZ*E6dCbPX2@&fT+e}%z#V%;gYj%Mv zAK3bjv3UYB zgBrjG)#KWfisSlJa@<~2fX__LJ{l^Yb|hXU=}?A7+`(dmL-MXa-%7?g2lnN;-voXX3F}$ zQ!elMcB_)xjX`DSx7Mppymm^};qrYI+t*mb>LhDVILNxTJ6O+QBkMb^VFRbtFSyh5 zEQ`Ce8uI}92cf@hKjs1K*nH@BLcbdNxo;39z9Xj4Pvj{0D?Rf6%9{M{dyx12$>ecA zLF95jN9_1jjnw|DUKyKv%Vey+ctytY&ZjbFAF#af4OTRKgH?>ru$tK+R=3#88kW0X z@UQte7FOa|*aZI%{oT-C3lBPc0vXI1_;2VJLqGK<{P%5~|NmS}e!r`d=OZI>dt^(_ ze|V6?AHihvM;uxHks~twqfTt}$6T?&@9V|7zg-k-{q&1i;{i)l@3M^Q$1JOPgXOg^ zv4YNp7hK0NAmm^tF!%REeh0|EW`_i_IJGa}zF;s5`Vf&W*> zH4lb3_iqIc>;exy3H~=4{y!7mw~iEe5FUJ+NXj3<1H%;m0e)qX=zA7R?}5)>aA^bP zA@qeDY%BDa!Gq5`i#-s?K`P(GeJI|>H5llBIQ0FY@AfUB-@?8F3AB~Tly#APn8E+s zkt%yacM1;n=|J$=nCOgyit$S?`~AProRN_F4#;1EKDO%z;RX z{DIIGLR*L}fg1=1c>r$!yc?hc|D_EFaQtn8{ut)}-18WN%NT>}7z5NG@p%ot#vDKv zA=;tsiJby|`0Wr74N^cJr~<7($R8GfF|ZnJ0z1J$a2~u3?t%YL@Bg%gLMxxP z)B^c~r6XZC8QS4QtdDl6c;SBoK?IlvvOyVW1l>TW#V}X~#=#b_7n}lDfKUrK{Qr{0 z!2i?l;Fp)cRdmQb>=9wvVJhL#812!JK!tu2YB)I1kZ))APacfLZOucYzJ)z8(>9jYmIu?VocBub&QF8AQFTq{8Tc? z1r?wPbb&cw0T=~qfKZEl==n){Ojl9kFVKSDpZ57T{Ewjh6A&~XfO{{&j?i_s1Qx`O zz!uNV(I#P@>S11}hhgVPJQ9pd{8llj1;V`O1p{Chx+~}}+5o*>^fMi$AL$MH9$Cz{ zsQB0TiTmI#_!MB2MDGDq>fhjFxp63)5xW}`yCQ#ZG(k=9Jv-FG8uP+5nAjj1<1-!Y z$)hLA74)Z46aB8(Nxvx0rk@lR(2w#<>3g}g^sVf6`dWH~?uoCU1$UtNE8P}hWTY&5 zOTuF89FklC=doi3SC;)7+!CAE3)W*bDvkgTB8OUSiepsEwBuLp;Zr_)MffJ-z5Bmk|2KF^29r zq=`PWFA#lXUnP3qzD4w|eUIp-{XEfi`^DmG_N&BK?6-(7*&P(0x4S4gWBaM(q|L7~ zN32=y6)TqCYsm_`Em&bY*n;PqCcJ=sp%(z%VCV;9oga$FFw|p;F+B>hqVN2j>8_6- zee4xZ?|CJOZg}U2ulbZoF8kC=FZ#60ob{O{bH-?xmRa>uP=l#rXiML%9~RQ zRX0qp(pa0)thGA1TYE*)fbP=7QN1Mz8}x_c4;UW=PR?UHcW9zb-e1rs&vi4iUQraWt9fAN}G&(io4A^ ziv}%P3zko4&f8|ykaOCqHtSQXn#{+RRq1SwPkJ(~N@3&bBsQv^#s<|1FW{+)n1`rA zI???Ccz@_$MGkWoOOB(>@SIJ!?x01F*37UIEt}yb9ciAdFw_*IIpB4 zOJT$3a4hCRGTMMS@L37Ge=W}ULi=rAk8mMRVRThRP|7s>2d z{&F*WBa}OOrfIZvXXrF^78=yFSD93_wwRZ+^i3#e9=6G8TyK|Ici1ki=9XP*)gznK za<)nKaovDTiAaln#z)d2j?pgaxC zH>Upij^ds{U)i>Kp^8m&p8j*=UyA-D8>AxzIYbeXVWMjDvOw z&F|PIH2iLpP|H@+s@O8QjLoMPvuSD}o1_)6@pGOoH!%)(Fc02t!8oA*r{`cC=EL(X zgx^?%bG{>D>RYTwU5jl+GZuSE8-_#VYZgsaDPNGRQ9P8RlQ&pukTtK~ByCQYc~bw- zg!sNS)-gQ?Y@)hu+C;YhW*ynW6QUZ~GNy*j<15)Tp$u0{m$LD5I1O{55PLvSgX^8} zTyja^xb!u8^Np&k-#g!`pWJ{MtC>AW8rkXRBshPf{ST|*) z)-YjFhe^!BLG#F=RhFRx`zK7E^Oj}EtY0mIapTC44mJ;MVY3K)fJ9U+n?zT$@pCu} z<4}%s|1;n_XAzx&{^2F?ozPvo0^_g>zIQG5eXp0N$_++Tx?v&}uJ@7TuAd^Cu{KU| z`godZ;+g`D*j3d!Q7hZ@!sixoHEY=~V{OM_)^Q$U9hZR@e9?fph%vY{jMqizA6yUbiGGhmdl9r}?!!2| zf*Ksfx$o1s_rZAu3c6rK0T=Aa?}88cTnHnt^NAw&^LY}Nb2U=Ovpq5nXBNxZp57s6 zec}x{t3yA_S?*;;^Bt^gwuMzK##zm31*=&vW%cLW?nVtT|IecS$DqFtec!YbuL*lm zg9E4mylB~RT!Zl{yypen`{1gWT;EY3$6E&EaLbPDZh4c}$;l2`CJTN5F2exGNwI}I+J%x0?PA2WI zOG)EFC#l{aBc(47lKh>!B!8Pp{w9m$uCiG6b(SceW{L7KmOkeSZ0H&2AA$ZJ)P5cM zK8zfo7kzI)-;2&6gLxh2|F0o~c?bUgHj&*Oc>k|OWb~H`={+_et;bfR`oxtKo&=HX z<3tiY#+&YAO!7b05d8uZ{p0UA_!w_U;B)W+cpIA*{^1<-k8ei?vj<~{*a1V<2u>as=)ZlYmbMY0t_qW({@Pu%^ha&vHHp#FlUT7SEABY8b z;pJX{!DalS;Q5~bA^Z3R{P2R4&_4+MZO~tF46ljMZ$sZJ(EP0H$bjBO2J|7u0DX7< zlF0gNBIEC&{}Zx@$Ir6|2{crIF0cgdAOhqAyx}k=INt}*{VyEB+}j2HHR%6B==Y%i zb(gRY2DMK??IS-z2J{8?fIL9{^8?1UxK;Ue+D@i=6)IKpNaYlIoOm>QG+F8$B^As^qX25n##7y^RV zT?ICR9pC^s39f)Iz%Sr`{}M8Q=dcm91)r@4G>|{Yy5WDlK_GaZKjh$f1!xA{U@lk$ zmIA?ZZvwl(A#efQ2H%3m;6J~F-64;F(8A}i$wYW;d$a@SIN(YNw1bd8kS~}5VnI3( zJa-)sY9ZtggJ1+K2jgH1*auF7>p-Z*Z{Wq3&?4bGLjLgGOR$9zMPb={|z9VQ0{T8(E1EGb_VJm1` zSz`JidoTeyR*2TnF2dY+Ixj-;7!NXmP>WhH1N4A7&{%-HV-$JE8q|0TwD!UApGFRG z1O5F9Bl6^F=J6GN`b!|_ehhBC1lvL1&WzXw`GW;8G{yX!fP4<~L)HsF84RMqbRg8C z0=f;zJKCYsi@akV@{VC>Eyqrr_0Zb|tz*bM-k{HrwtR@zeSqH*G~WR?!JFXqm*9!e zcQiu0s*iT)B7=bkHpBOfPzxQ@LU|I}5=M_j)1aFP-D2c9HON<*q0ohX%|_a>0C~q4 zeS~>&8(HCdbOd@==q)56S5cYE&=W@IH6YBEQ{d=JurvDZ3SDP-?}_Tf_FAY5+GLGd zm^;#A126hTE114lpGx;trsGaDdGx7rIenzmK<_KJ(=FvbdRuuuy`?-#Zz_+|8_L`1 zb>*XULGdcRrt~G9RCz*2RgfI2uxP(Bi}otAXgAo7=g(m`^xp&e9xBLP@Mv#`%ARll*L<&S+dcHCF>1YvetklYhHpqq2~i#A9#Nscnv>A zY`0URKRk5k8?@syTN`@M8u^2LFkP7#L+71R>5NN0opP-Z9d~UI9dT_J9dw-~KH$1Q zyw`P^c(=<&@eY^$lC4e`B%2&RldPNgNQ%4I%C4|u*=05?i#zMej!t0N#dv%U`=ADs zpgT#7mv;~%OMeE*(|5j#^off)z3pN`S3Di*tj{DmIVpmU`X`DG`e%vv1r$qm2h>P+ z1hh!E1@y>l_Ma!a(SM2Tx=Cy0#{G87t@1r1x7_C=xiPQbB+WqB+QErgijIej))U) zk4Telj>wnU5MCj-F1$g0O?ZdGs<3{=x@`9lElm`QURz}vL zGTV<;X8EvkuNN!L^kk(TPgZ;m2P&iQ668;4Lo}8cF<9rv<6e7lSYJisUUyNNbTHPE z_Qboint8)CQt9tIvr% zsy-|Hruxj#hw5EZSiK{J)!PGEtu266ThXQ&lUVsV9E@=g^doVq2ihOuE$&7t*0^c# zTPavuBq`Ib>87+L&572fPZFna--V3q%MuwiE}mk zrj2U#By7;`j60~^7IR&DM)VKb%~7n?7{Qtip|}GS+EX9GYW2aa`W&8&V<8-EK>zP1 zp#~XPBi9(QNT(dHaOTASxUEAza?V|i0#Msj227vxM=9L&yDnUhtjHY>AE zV`fI1R##fTPJ8Nz?u_Yc^_r6R>(?i~psH0Ec4ahcRYtN#WjL!>hOydn zI26YMYQPEb{^t>y=+*T@{M6h66+UFW8Gq0m|cvU z(-%)=jgn|qFNt~vhhr{G#rbB8!AI!#b?Bb2z&gJgHK@cIw;cOiD>Z3x^#oc_?JAmA z6(H@ejF6vMF-@tnJYBV|tU#l=q*AM)xJkFRs7JrDV92m6f2C1z?k?kk?2E>EnGcL| z)7c<*I_u>mvrcXzYv(1fR$d%yJm+Z*Fb)q;gZB&JIiY{14xY0CYs`9_YsK@$^(r*f zXhw4zokV?&e$wvxP`UQHIK`IQRMm!>T#eeQGVRLBdfl?}PJ`mI0i*nq<;FQhJ4`YQ z&YPs?ePxoK!-i>@te=+7y6MwdCnK4)G80+zIYJGDarhz|pSxIwaj3;OG-I7T1M6%o zzed4Os{+kwGos!$2kLC|mbA7`mTj6bRk5x)S*5BeTfMxoM60;IRyV({T|cL0u2DwS zQj^q*ZKla(ubCzm-!n-pWTV79Hb~52z2r>RNl8NjKb^In^K==G`TuDFvM|*D0`!k} zU>rIz4q#CiK6|iRNwAJ^s=FfP%R3X5OFA;t3fc=bb6Tr)GFw{p zQ=4ZSCN+*4C)95-iLH6nWNOu28x z6#HOM|JR^@q!)e*tO7%`u-2Xp&o@_-8s}P4?OYd8<(x^9(%E6Mh5hl0xwFz#GJEsY z(`Hs`C3iRLCUnlyk7-|Q7}dJjD7@vgacJY`Mxk|V7+TE+p%ttbUc$POg{%{u$J)=U1pQ4yNn4^*~P^KO`w^1uX4dnsXWgJ` z)(NR#?J1?K^&H0Q(PyZ^jV2rqJs5|1@cz(T2bL^==UPP6v;>}Osf6;E=}`7EE6P~r zMyX2!#K~ik(uA=@x!5IH3Q>zol*2~qRVOd%(Fk0)NNdv225p~#6WZQ$?r7lyT(mvA zK|5=EH?x*+J!?&>Va@0KjN|b(=HXS0{TcN82sC#thUWo`pxrx$Js|Mp1#58s#dXS* zxXzdo);Un@I&X?zJ6RMl9xDkQPm>8*Qz#d(sz$+YWv7z&@&zg$%hsv7jviNa8Tm}r zc>z{s^H|-npVggavW80stGmy5!9CRA9gM*xjQL6Ezp@nP-k0N;1oKzIdqcZs9nSl1 zhUeLabMHGeY03_B3c-DhgYa2G0o%jLZ+oK1ds~jgb8CgvZA+W1^QJ+$i5th|?bjWZ zw_Wq8oXrYWupVVat6^52FvQ9>b6Lf%?*$)Y9$v#7IJ*$9&CuTm%}uaFW6+(u0doP` z72Dyvc4Pm;KKQc_pcP2 z?>Qtk-T9H&WXs{~q{%Xr~-R4UXda2Y5mUe14AQMNKllXilaVoyg?kBr>`fMFtnrN&kE~>7MT* z?Xycr^VL11e)3&XKm3?9_OeKICyQ0LvP5kIOEuTL;OYR5N9Z3#-}i08`vdB|Y!{9P zXt(W04WONM1fRok0@oj$LH=-o$o2}({l6t8)%TQ0`F%rDeBYiF-uET>e?^h(dzmDI zgG}=7Y!cnXAAR#x9B|*_Bmm=}14qF@7KzcO=Ul*?JBc3^u1VX8zORP%0%&(Zy9WKv zgLV@BO!zt6gW`2u|8Wib58lQ#4M}4|=ef?hSfq-@|`rQESB4|%PkNaRiI{?~l zH&KIs;o6Unkw1J6@BJkqUje_%;Z08y3lKBljEml;AnV8gjbH@-K7{}O9xsN_{2qJ- zK7GM4=_7m(i;r*-+|zvzm+<%(zz+&MdcncncwI*AS3>{kUL@#OqwcxTpLQJ? z%q`4AXivP0F}VLM15$^s{Lj$*4Lkz>ZiCPc2~Yt9fJ-G`V5(3boOy}t#`{|$@}8vqv& z2kL>)4iuPw2mjOqkF4c{X2V-50&)g|*YySgAOa)-;hcXJXaR!Pod*_yrC<%%2n5f4 z5S#(m!B^l3_)6F;hT15IlD&s0Z!fdHyhr z=gYu2*bH_9!AV{Op94YVKfnIOzl99o2O#7C0=R+>84PjGRV9?$a!!E>jA z0wC0)8FYbuFdvM7>!H*%0C$fh2{`#*yB{}oCb$g=A3ms>HWJvcOGqqalX z{ns0$Oz~z5nOH&+1@cJh3-@1Aj4?N14 zSRoCjyYX>jTv*ha`2bkPoYUqf}wPvj38_?{AKArflgjXsB;b#}fgR9y zdkwTh31b59Yo>|}25r(crN2~c=%LgV{hfrIB^;X5pqD{6LkbVj4<(89nzme2lR(&pZ+S^t-qCa=pChPy4Puw?l-g!o9ou-;A$>h*tZ;;+?J{1l?5B! zM*o3b)Q|HKQv=PIS)94>?OX9koiJL)PiE-==`} z+E&mmy9U}}*G^mQX3=K51+>v_8LhM1MB}!HXqELFwA}i>2$_dy#FE8}%vrnunaq$e zi{~4%c<>+C9b@1jMmwPIMf8&=_BrGEEk_Amx58%~+8EHOiMDju*^~CWhR|;J7~1ZU zLR&m@XrpI|XuW5RXsuU^Xth_5c%|onc$w#@c+6wHc!~Rd$*|ic$pV*ql0oMu(z%W- z?YC#?EL)cLS+iuO)eG25{;9U`_VTAc*4TKvdp8JcTs%MLC8gsYT6Dn2l6FmUrL6%0 zv@tM})&@-ztq#f%tq3X}EZ619aviwwHa_)Qw$+JcS{+$t#y_wx=7BG20PQbBaEc`YOP?s{N5c0`#@Z@K zg|<&IrHx?|X*|N0Rz`-3mPW>jmqeyYMj~^i3nNNph9YWZ2f|zB=7jgj_lGS|=nY+^ z&^={`Vn@gs#kQbN62g}x>O#)AIDD;`3e+Rnn#KUtY zVr?}I>xvlo-Y9umA7enP;_PTyf+sCW2o^0$h?Wc`BuNM2vt;MQ7s<_vtCH`HZC2=x z=~3#KI;h+ly(J;lv)xyRT`(wRjp4LQ>%&FtX>&=T)k}S2kNC!kJO4ISfwbGm5U~`QeiMF z76!3GQQ$M4W?{itUPZ(EL-UO+T=$TR8suPYg=OI?v}ZI^gBE64&|tPR&Cc-?^<{@j zy0c?tIR{5 zMH8u~$XnD=I9bwKI8~;(AW5zvKTEMTuSmHnw_3G4XNFoy)+~*}%wf&EjP+XCsfV;P zQr^)@Px@6eeHyE$#j#pi46CF?v2uDOE2T#~L&(Ga#yt28HMoKPpD)2Ulw%&0VU1mi z^%ed-uT+8h$_%Ni+?LuZJVnjrL6U~DNSWHwX>wJi>5Ang1&FZ-YGc~jF z7HXyEuGOBN{fbU<#!c;{v|qFmr?W<4600R9uxe5qt0cv+a`IGG5)d-UpD_16LJh8= z|F2aF`*7g3s*nMJLC{w%rH&e1no(;-jkT^+SL-jXstJ>pSI5bfRHrHwR^=+?R+gz` zl{cuTmvw7SFB#HGDq5qRP;fveHt#K+nCzdmVlr7XCJk4lrm$*k605{bW97JnX9%YN z@8cMFA2qmw{-3VLIv=b9qhJ=iU?S$(!2Yg+FP9<+Em34y%V}vRXtMt3*y` zm8cX}77%jLyEq2kE=DF=i@DH(>mI=BW{d+KXSPDW9c%4QWh(D9rs7U}D(LhS<#q;% zvpS-r>FtTK)7vr?5?hOu;%C&T#5A|7MK;dU2y0lOIk|3+R&e!I&7g{hnn9(wV!Mdd zgYsE5IEPgxXMzk?7I=as@25Bhu44?&Licbdj)xA619XS**wuynqZe!KS+bPZuS?ne z$RGM$DYf5Ulrk$!oY)sHjq6R5ojS8XKC-7uDXgniC8Tq%YGC_vwMnhJ)qR`aQ1@;4 zQO&1@)qE;h)u)VAeT!ISQUNRb=RM;a%*78-gDa@{tI*#Mz4bFO4qzT=pM^c&bKy7U zmjd8#nyoi3E=*KZR2l>Zrj05P$^Oga4 zu7&XaBk;XrvJ^R{OW|V^Xv&xig^W$2pwTc27)=mOT9PUDSzIFZ9BGts8}5^JSvV@^ zIJ86FVc>N+huJ^K+4r)%T^Cl(Z8%%j!U_%ztT3_u8K2qDwc5|JW;0pVyo+TmI-YS0bN_YJ{1h|~F2OOm z7~UU!9>8NKv}>T9zXslWJ@!9r!nyyg@O?W}$Zm%b+3c_->m436VS5l+Y>y%H?dfES z7Z&5KZDhE40qJktNV@CKlJ1&sNq0GmbVgaMvxp_yL)eosz*4=r&v+Aa;OrpYBS-Nb zxe9XunxkM2`rNt}b72E=pe@*k0t+9q8~Fpgq0KAsokx^N_lN=M9I+;?BkrVeB$(8X z#F5J398x+|OA4>dBDwu5Nd^}+$n5xxWH&R(t;b%BH7t@{$s+k>EK)$51TM`(4WNGr zn!C`~b@_AKL+o6629{_c>fFXB)OzRqDz*TP_7^d z#3N|U#~WWW1{i~RdK<#NYdGM3$Ao8uuV6bsn`q-R&OrYtYQ1kG<^uY=0{tD_C0rAR z8tlitNDg8y9>E@r-I0gS$N zn$VTM1MPd@EAS2Y9{ePImdE^Ifj2@=ydh5oB>)Hd)9X;aeX$?qA@qez2JA=ex9q{| zBIemV^t}x=uXq)&2k3X~73{-7pWXk34CrHw0d%#YtM~xdo#4;G_&^u@43J6S#P>g$ zgaR}TfeVNPrC<<1HV#5C#O#ode2219+Z44B+`NSO(UB|Cv9$kMI2e{`2cG{{1|E_y*4fSRLM03E6`b z(98LQH~t+0;y?x{2Gu~wA38xl7z9H8Ahcr~YytbgNpKB30586t;Q7x$(ESp8@eizJ zjZ2!~Z{?5ykQKnc33YjzKltM@3Z#HMPzLHi3+M(yJB0i}XvcCOyx8Ji8+0Cp_KLjWHA+>9<+jP z==4KpKBmQDsI5Tmu^!WOCo+%II8@$667&7jet(6Zz6(AE?|?UfkUDz`yec6GOBEUSA9TTMiij;gO61#s~BBZ|u$a1+{pH zdGQ@`sRzg(zN8XpRnce2K|aPFo!iJ`-ow6>caVeJ#N4=!>te3rI;G1b%@aV#A%qNK??13P^i80x2mh}qk9Ht`5aW9!jEyaH-S9FLfRT;BOX4()VLJMi zk9?&R^{hcHo3O8>1AXg5<}rx=jbaqWp|umcK~5s`co%vtw5N)oBn|( zAb&AeLM^1oU?j+-6p)Rn(_^tdM$#NLok;H^i@AwoKI)_1fHdTYRHuoA2Q!YmeG8x`yel0hx#8HN)2Q%jz{ zl~+NH_3#%a&>JEbIxn3>ugZqeafLWKqL@ktm2+voN(t>%t)^Y7&9pU zv{8LEtykMYa3;Y)1BEto^$ZzN$*dG1H+sxCgZB}@j zF~=I$2ycTrxbLF8GF?cVa z1KNO>)SvPAsUzNsC*o}u?K)$Q&l)t*ru~+dwA0#!w%AUh4fbKQ)*+5oPfVqij=8kV zv4qB)YG{d53ynDS&?2Wn8gg7pgA+H=T!$kx+wKPS+WsKwwq{W$@|boD7PXnNsMQ1* zKZ9LF&vGEVz1;P{TRz@)uYgl{+dG8s?Qu}0tV5(V$PGXs%DEXtwuUai8}R@l3A`;x3Pa;tuyW#jUR2iJP5S(&)(I2Hb$F z-j>C6Xj7flGuRC^a7GQF`3aUEZ%x560~`v*nqm@`JKpj%?rT6R{B3A7(49tt0%$>S z1kDdlpn1V*qB+5NqFKRZ;@;poNq112q%){r+7>u0oe{8BrpbT5OoQJQnOdK3WU9Sb zrqZ3I6|O8PM|;X0SyDEU#R9nf*VApE_o4Se7`#8;@=rvg|9H#YjJB-7K9yyYwP_^O zk`{zJ(?EnD&4~=9S&^}#nUN{t?ucwjM?|r-Euvc565b-y6xJ(SAG%PkcFJnGs>yrg zDuQ2^D-C=gSK`mIMZPRsh#RsOc(Swr?J00$iGYxU30c(L$(RSwzcdXsNWi)-9_KY; zu*Qyp*N9f9!5A}|6FZUm;=HLRK19?RA1!K&OBBzD%aAt37RWTjR?61KG|5#@?Ut{I zp07|EwNjxtVwYk;*agMBDPJk%gs^;e5X)twEm?jnljXzGEbnIs+dTh34L*y&I6(jG zbSynnu*Oe@&q&1j7VR0FrcC`wM%0sRN1Z7i)Rqz`YDtL@Hzp@Y>XOr>HA#6gl}TlC z<%#w3CDS?;3KIqt^W&B)<;3n#%A9&mDLwL@Qd&4Gq=vG5Y6#1v2C?k)0G3Ie^b8>n z`z{FHKNemm6>~8Y^B@E3>vZHFX*j=xHud8k5#8zf)RtjQEtziAnCUO7%M24&XT(Y> zGg72w={d5+X(e)nskI7u)7urZQ|2mVBrjD?OWdZMlJJ^xQtVyj#Hp;97|9CL!dZUW z6qcJ7!m`tXSw=v(4EjMRyg&MUISb3(T#N%)or4-=;~X2l*PSD!)?96B%)_-0dCpXw z?@JYVlSQR@QzgZ@Nz#JcOxfI=BDw7BYK4rfR>jne*~-c3qbk!8xriv16<{2LcQEER(C2el^6e|dIDkM8> z%{939Lmk%o4YHKms7qOmxc;KinNk~lDY;>?D6wIxIKDnv5>uBg6IEL(7g5t7A6ng` z7*e@NDX@Hll7H!OCBNcNmHhJmR`knZ1;0#|_e%rQS#DDDGrq?0_daTHxg4(x_1Jd+ zz2&vYKR_>@H#B0ubqoAf8$4gTIwiLwe`vR-_;yc8B~30?zq;dxyvYS4-Eojve;eF_xar%w@mRutCfOjG)NDWrD_1@*>Kz|2&U z-^>EBPfxYfv#UeKy>q^-OZ!?`rx{0NCpLd1JF)(W?8Iu8btq@qi6x+jWgYXM@d;{h z4P$U-2IfKs{9Zeb2lRP1Xv6dBZe%ch`22@Cxc9(-h=S&;lmC1Z@|$l*KJ(qlYkmNE z4n~msU?RB=gYY+B|Z_JAzFzV~6=dtelwc{!#{PGe>? zam<12N4>~y6!)MQok~_qQps`&t^--zNM;CfOct#qqoG&GVBlY5*#B5$Fq6dwouG{+ z1}!WxYI??XjKNuG9-W8xhq<`^1e$}O8~v^yKn4Ttv=Qt98H4Xzj&uL3;knkzk?A_^ z|A(%#0I%vw|MhoH3{43nK!|%n+}+*X-JK)^NYDfb8rs!@L(qM5Nd1oq9zvO2#;8@!4CfF=)5 z)O#?WbNmwMZX_dmFXT6l|E7KTd=~wllo*cHFbs(4J_EG>!YTCkqQ9O#wix|x^lQ-1 zzd=7lKlCO1$p6J2kauYV$~x|gTYBH$()@+vukbtk0lfWxG3B*0L_sC20L1>UXSL{z z@gK%~hui+c?YuvX@2i|U52`@<58ZIve^|lmb@11`UuAI0e^8m_3!q#EmHGeo-> z7QTQ_K;`1>WFy~p=9S8E6?{#8r;|X4flMfYT4)31KP-SHuoC|2KOE-k({L|54*!6k zLH|$be*;SUJ$MV$Z;b7`FtX>B-7VB>6^A#3Lki?W1vEfADE~pl@jrbK^(=~i%MmjD zb68am&^*u4Oy7=0@jbu&HGBwvhgU&)0jjomk`Z-b&8u0Uya#nu@u)Tlf;dohQ3U!n z`49fX?R^-(635Tf#}CxUclaRRkkNicmiq;5@)?=`$NYPr-+q_BejAjw@;y{7T?6F_ z=%(R+Oqx!79qWK=qp2K7?)-`sVl#4nuJ=;%UEhrp5wv%F(IA z^Jt+yyU^;#hgeAYmgBLk!x7t#-VyeCoW}`y9If|h9re@_{oad191nu>BKCpKhVsuq ze7(=B($>7DnhFgtp>E((3(RpoX?J+rT8qXse!HR z+B$&magP75(L>&$W`5<49UYF9eizXej+<|xvoqFjDKkSKewe^?&ouLzjH4FKDXTrL z>PdYDqZvcVQst~RS56z0%AJNaa@?p%jv9B$VdJ@S&}6afHyM#V<2K7Kll`*Y_?&Dp zx+WV9-q&XL@o>0NDY`(Y-h7LG5?;50})*Ni5}Rl^y4 zLbkJ)DgC zcE(v*WpN$P<9!*L_AgnA-?5l|C=0nqXd#}=AfAk3Y$~;7inr&q!?gOE8>eGe))bDM zVy(F3VKGJSw6v8Y*0W{5T@XG?jO=hsm2FPBvc;)HHageHsB^QdbLo~fF7suT%aE*a z8I@t@J+jp4v@CLbR0i$ek@>cNNWTqkGk#3(Okj^p4}Oed%m;G7{zvz1KXU&-*7*LM z&wa@GX0r@(8ZQT2EoGO7i)@?iC!4$?WYjxR*7;=0YM(+G@vV>*z6~<$+ab$*=gMN= zC9=?Wtt{}_CG)&bN}ngLna5kw?f$!TxG=*xG27bX_}T(7wOSK{2kk)FKSlrbaMqTQ ztm`7s59NFw#QDt6P`3M-%ccOHd%(RxtAj&iWk{?Hho;H0&^%cZRwfI>YGoj-Rpy8F zYWhPLY5GD}YvzP(*K`FR*R}^fpl$VkQ`_wOo2JoQr>UO}?mA7KE8xQ@lo$1zCrf}Z z`awKPodoKiDG3|3~1YRn+Ae@B#$t*vKXEcs$eDM=yC}EZ?PIQsMBtMy-94>Q{6QnmeL*^tG zXu6UsH0{X^+Sa5_gXX091`UbJ4eJs%8&=03HmZ!h&!{Z=6{FI~UkppabOy!2I)fr& zDe?zjJ<5yvf_i^5iREAx{Sc19?o9T&r=y?3+A7UN7V^xR`5BJVm+37%nW55^87CcC zY0{RNr)kM7(>7+-8PsRA8`Na<8&;(c8I`APG%ig!WL%VV$+#flCFA_KpN;aOb%wbS zI)hwd$q9iVomQbP_k0qq_ksSF$5Imx!FE`eL(WM&3v&#lKX;n+6@?)fpuv=?s(NAy#LQ6b;1leF)3SG;&UKu9k6q4R*sQEQfi}UBNoPa-uX=SxQ}% zvs736NM&`Hlvl?|Np-rWs48DuP+4J+TiIxsUD0inQNGYPwQQ|PQpsMEgrf5%arw`g z#OD5B9Gj&xicNa0CLJgz&!}QNz;;*zOQEL*{W|V_s5h3X z1`8=~aF7!A<`gvsOF?6_|jQ zd~Qo|`xGf?w~^d-H_2}Im(2DENpDY-)V54bQd_Ymp|wUE*V1kf-8|ngqG`k^tYMdN zNZmQ(pqiV;ftBAG1(xcJ0*iEpf%%XF**e3ZES=#;^#7Nr>%XCM7WTqu6MYfp^SZ5# zJ@=hlZ|%l^=$Rntz4#BkPLkB?EeX9L64w(eu{~)L-IFhobE-69-L2Y?u6~2S&J_m! z9XkwtThADJH$P?Q-SDlUSB=ils}f3ehTg?c2n9OBcc_DBDer?F^o4fvZde0LppWl0 zqg~m{TECxb&I=4Bc5tdh586oNpqqpb_)F+Ogai*HO3*;I1S}{Mzxhq#Gp|?U)jzE9 zn7d8u)_Yp(+WmyqrQ;i|ON&nH(x5YNsf8-2)ET%|=nURq+5aSZ_w|r>&%ytImFNy~ zoWu7T=92p_;F|A3a=xYHy~7hEVEJ_M8+H(%VNdZI4i?Yh81WcN6ZfG)aa~p`&P%(+ zamiA#U%Xjt2k#V{1&@pM+^@xYj!t9U0j)Z%O%pWew6^dfb^bSW?w-%wIFEUm@(%O$ z0*;-0uNLhBw9|&!12RJ1yO!(T>nDrTdP{L!?bAq$hp>$cWxl>+|0cP+xX0jos(tSPD?S{=`2%r`iSX{aGA6tSte{Rka64U z#dvF<7;Ro91{?R|FUYIpg-(o?>BM*u4Dy%`hj=lv?G+~ff09({=x{Is#3akf$|&OnbPiF+ThLqu1C+UQBki!2cG$(W_q|+u zKS(CDL|9KDo!@b0C zKU~o=!A@eo$#e#n11EoP_MZ^PBRMoOlgopP`1x)+{S{7#U+KM%Q)sKbD2L$?nIT$d zsM~`aH**|5%=zI2eF4q#yZ`IK_}|AfD0rJ$56}(|;R8L2{&jd9o`9!KScFYwZ-*@t zjwdI)^&ZSL^u|Ap=5z2Oyacbon}CFTFp-6+C4Ptpo&<6HSioX=4i^lmHTg!kYV z(Es}v#~ zRaW~DD2L%I_DJa67^VL?DD5}l6;QvSGgR6(U#Z)6y5@*`xHC|*Fk5Zv}b|^ zC=WuRzNgyAAEF=`a@Y$|icSr++r;~K(*1hTSb#UM6pazesP!nhjJbaH* zG-}Xjri2}|XD|8r0{n=j=!~Ga0pDX6x%yGcdVzdh?Zo&Te?YH$h2up~t#uZZ51}(P z@&_%i&$(2A!GGw?m~smn5^hg#mfnV#24jrUTg1C(zW zIr?hKxP?4@A7wm)-XrL}h2HP~XaB`<6pp|_H~_k-=#Nv{44E+u(dM%!Xq&g`r?23} zJjcG6r|}an|F7wmv-uoZOXlwS?wTQR8N{vP%5Ds6HTkL)qV?Zb@KD~#29@gMHNr@Nc+ zdKMq#G-Kr?dxcK0XXz+D$YFetgS5$hS%cnow2tt<+8OdR4$)V%lHN&N$8jyJxy9I{ zo;l^{jF?glm_mtxnT%6_G1^lZ>DIKWE9LW}Y~hqGo_9{CYfO(Ee&B*1tWcG)GR!9M9RX z#+|`w-*lqvH^YB0$A6gNE1PD9$*5(Vtecf4tF3Zn#JWUQSXaxiO|vYs>6Rrn3uK|q zav8AMDD$ij$Xu%n(qnm3x@X`f;(N5?d9>ljwBg6J;>Wa3A_lbn#cA$i7usMpr+Y7Q ze^1s`?yM>J-flZ1*=B1l8}037ouj9$at^|0iIO4LBw6a3C5zn(WuaSz47fGOe76qi zck7p4w`DTNZB)8k_es0U-O}pxq%=ExA&qwIG{g6(!;7iIi>W21S_@*}bpIXYe%FWf zbs)J`5as8%A3tCR-{aPLS?6vlBeSh#*vn0pc>BpBpKuxQjhFep>C*3)Cw+cp(&JYv z-F|J->DMRieoLgqcbzo(?2&r!b5iShL#nv}w9*a7*_mB$jzB!+crj(fq)uyJa;pD( z2p$-xx4R-(-f-L*#(E-{r62LE@Sh+{1KE2Jn5M+R(Pgi^Gj&K?HjbBJHIoYPNLY#dJhRN^5kYG{W#8gUcOrumscWWx62Q}qUt28B%JGDjOr?ds3f79j#f1=3=)M>K)fLO9U!2|Gx zey7bpqz+z)r!OXQO(%)<1dQ@}WgPm%GCx);y>VvJ6>lT$32xGo=qF8y;ZmOzFSSYO zQj?S?RY~Pikyx)OP3+VZCoIqw#;?@o#ckK-#GceO*}u)-tSW0o5_sDRN5hpB`>Vu^|BOl9pdTXXB{b%r6qNiG^ROAUAm9dq=!gV zdW=-0r$||Pj+CUANKtyNCO@rRlbbeAo1MBsn~}WLAT8;HL2~>f21&6W8YD*Pw29$5 zZ9=F{lMn>NlHjk?C@<*MH!M&R3K?Zm70{oW=pEf>!nL$k6W`n4lV+Ijf4;qB0zi$wh z@~1W|QKt=ygJ_*LoLIsmblOkoi?2}b2hq8+knsp>UC06VHw&`!5X)Ub#^VecijX@ ztD7My^$wC$KU)&&10}95Qex`32cs@qBI`;dytZCKYPvN+)r&L%Rij$Jio;r;va4F} z;&-&(`Tx;+<><8DnLsSwDL_1L)86W`@I7c9s3h-(VVDn{XxE}$T2HRgg#XZLByp|y z53TqQt*#Q@>MLQbp%T&>Cqb?064+8G{w+1)+uR}EO@rduxK86ye^BFAds*XJ^|r>f z>^~aUVx7h{A95jEr*+HHXCQ5Wk+;;?ol--gBbFb562&%*hpZ3^dp7R&nl{FOD5+#J+vM*tOgzHjV!f+q!>? zZIw=ATc*?4l|T^`@Vs@(|1dfyfbA190)sFInw!b@(9T6WrH8zCE_p9rjK|;|ZI<`}TKxNIhW_dc$qB19b7azLVS^?M$@e7f=Ta$$OWQa}ArwtmV^W=5iaE zvD{TGmivkMaJWnzPLe4@d1AV(Mke9KOjt4`;}&fflYw($Jnwli>HS&8cd=S*hgN9T z$wXp$yqkHUmy{f>b#w4P_9n2QPb zDH)BriQ#B~XxSSh>(hB7I<(qcg%=|u8#&>eV8Xk>g!2u?f{vsbdSMQ9bDuT8c5;9* zIiEk7$FKGCV`%1k<%{WyL+pcDNgb@={)qc|`6y!%6vdz4vhx?ftMDc93IGuY1wk zyn$xpIFItS?q*&(z<5A2@lL%5 zh&Y~3gIatKI`a)G@NxRYL;vLO&{qD(UBG^nlY0pX&CMK_Q@{Pkn4c+Q1)7=n&<^PN zU&e!ZfI2|S^ie#RYiK?OPr#FK6P|+?;k9urh|D<|JK{?O;)SHM=cAbm6g2EdRNRMO zalr=d*WhV*j7``p4Z69MF%HcjG~Lm&M$7DJK7Zv|bY6g$l^#&N zV;;=g9BFQnBNoUSypSpQ5%w%riDT>`RSbUJR(uia;y-Br6TX8lc#6g;nuPFHou@BQ z&#m`ye!5Cue1tg(ZJ(!TgXh@;@+$rCP2OK=y$>J4$M6|^4it|BSic{I>Lx)(a{|~x z5adB0(7AL}*e(A<9d)Ao279o(b#&r58-gGP(xDKlpb>88{e!$#8H>7NyB<{LrmkMA zY*JlWS6Qygw!Z`YzkhQ420n&&;B|Nj^x8HYRlcvV1r>)6ghL|y|NRHOAEV+Jh3&8( zPErl`;yOHuSMU*j#J|Sem~Z*XhwuhG2g<2Y9)#ZWQ2xUVP#%Ojs+im%5TYRka-bAy zpqZ?#6OBH+j{!33rRa>Hqn?Mc17G40UlH-u1%Sjv-z4xa@=-v2%M_BX`wC2@R4-v2TFgW8Aj9&x-wruh%D z-8Z=M|0)^sOAMap>8Q`}PPer$^ICZj=Rx@pdjDe*uf~Dlt>aY6XoH>`-OLZIaI_Mr z)eLgtd_0db+Oig3pb3o*GcoXzfGm99oYN(Z~1# ze^NvGC+uwCs790WBE~-E=~f*m{deI-(NNZjl+glBJL=G#a{1$VgrgNlnNlfJ4mw46 zITa|?q1A#`H$8GbqjVWs>xg0}z4Rot@(>QuKjCTi>38%snw8>%G zWIt`P3om9H{@7+ssBAEZk#&a2vc@P&Rv8z_N|SO~Zc-;h<633uxE@(NexVGGUnL91 zZI^zNW721IMS2Whm2NGLqDJqswxfR=9eTf@|A7UkH!ChPSaXWyc*v4`$DC9C#Bs89 zyoGF>WH0MXXUnQ70kUFhgbYuMm!;-uve-OV7Fv|ZfJL>;pWZC})4Qc_`hd)tzEZj@ zwn>NiF=?IlfHY6}yENYMtJF`%dte_*9ezxm2^i59oLas`|4nUu z8ooD5JZq=lAuBDdWXQ@zmRS4XvxLY3+ZdT=mn?JbvZU9(Q0CZINSA$sbl7)FoBe!g zv0skou|?`_k4TNpWvQC=hE!VqCgn5m9Pnbw@M6mFV#-X3!HiQ5S}%CuL3xsEc~gGa zJR5Cy)>gzb2gny4)=6vbI+3&k1}cYsFMbdcB%8| zml}^DsdC>Wc@Hr@XUiV6l$19TU_KRe>u=@@lCet2>CzE)7VEOePT7Tp8dM=dpOc?7r z7~%C&zBfQTeE}Nj4!A?w1Nq#CAZKaBi>VI@#%qa^>X0O<49S%8kU}X9sg&Z7W+@Em zk^GRwk{i5UvVsmsM!-c$^Lfm((7pCpCeBMCt-N_@ai67Q>%I4>ZUIAV!&)ybFif!ENw8jS}NOFP7| z)P*Iyo)?2Y@w9NPj~yq~apqDHXD6lc?ou4@Cx!80k{=%{xe2L~oscV8@uiXxUoWZg zU6LF(D2cIaBtB-ZCN}DVCOZ6ii3Ce>wA0U<}{HafA_ZQK>H=e$bNMD4N zNz?(y-UP}I^}Mc3Hk8t2Gbu{3mi!bK$xZc^?9^b%Oy&NE)Fequ&6bqZVo6G^m4uWI ziAx@k=%m$}$izLG@c6qmp)t>CLL+~a&@j%#AzT>=1Y!xLUX&O03_2H*=?f|3-KmTN zm=9f050xxSOEOsNXHJ%!EKAAEa+GxLlS<7Bl;rFPNz6)+gscpS%PN$ZtZIqMY?Fx0 z`4W~fBEf09G=V91X#x_T75}&Oy=A`#r38VbY`K)t+6UEe_aEIPZ+vs1jY&WTt0P4P_mUc3@`N?0sJu{t3Z?{N0iQU}-2K8e=$9L56-0%^Q7K}7+% zedlGrjci7vO2$Z}^1FZYtr@*oK*;~tE%WC@hxogZKinC6pLF;y|`5Oh*Q-vaj4iT zc4eoV;oTp}~;H_P)3yRc5E|yK-%yhI>|TK z7h~IFF4jG^V%6g+mOZ{QvnNcZ&q=`V$d+lmfmv6Zn05}zr1lMX4aa1B)1xxJ{xg|a z&8oEmN?ExQk7=P!u2H@d=xl}Myq*Ug&;VsT>&t&tbXd1b4R?F&@ zcnqt^{n0oCqm*-L6YT(PP|MdvXs35k2ffrmKXtGG|6vjS!&36zVI$G5;H0}^COCj6 zgILT{>`A*QBY`w7S`6?j7ffVHRW8`fiK+7 zkD!w8=k?*m&8HtMqz;x)2fUrlO720SXvXV=z{kT>CY4!u3Xb3fOzaz&Y)0v1>nX@u z{&>w+CcbmT_A&$iUo<${^FaAAeOz;&M>|kuE72REu6v-7uS*B$i;Ky7hp2-Q>R=81 zV3a!8$nz(+a1Y8hV%W(7Y!`9tHUr|=#yKlp#3yF z0$0e^(cDZOFXzYeC~Mnk>H*CxG-J^7zlsO*5Or{kJs9X{Z`{`6Nb`;*121r-yRm%N z`x75>{ERyJW)`1?<4xi~AN`#+`Hf2c^&*RC+T&;RKZCb;YQzcpBz3#uF1qOj<|ed@ z?q@tu&d^8M2lF`d!A!CbiQgRJ7U%HEX!pfXBzHD6`*&%x)Q z|Mxq`FX0_{8B{K=JP5u2V98PW4hr>J?ZNPXzxofkd|e8)&;tK&9DkDC{hR#m4+hNd zbl6|%C_j-k|G+Ed#(WKbhiBn&xCRe{Ue}zX8QcNNhnNMl^ebfBU3VGNL~FQblE;}dM5 zo)4g>?!I`G+IXA)e;+He(!Ky^;S?wzLcjOJm?K?e3`2D9K+}@$<%EtGT`q`siAE<0 zmn#d60{o6LG-}Xj!i(sjUVACu07G^Nk6Ct%YIv*AwwLWQ5WxUz5f-wnCHlKpC;FRj5c{hV#(rD(8xlgfV$*an8b0OIPM~Y zJxd&?>8B^jxQ~I1;~& z?YmH;XatlOu^ev4pvtfGnFhwDz03Gh7s&fhllPyXe;%bx4$&t2@gMf$P3^)X+kp?V zjX1W_KQ_}pHquW=@j=$%GOU(ibT*>1pVqm6Q}rya#JBhm-2VY;R1bjiBW}m=IodCR zntIhK;0(UhF?^{5>=D|FXSs_xW6XcR6WU1M7^RP{Wvr}`TuNMw=TS+S8t9encph`9 ziKXbMX!hWBoMrUfprt-%$BJ(3jypB7Z=+&Rb?~+UQ$DBRdzfNRzyYSXJ+#So`sZfa zWCLxo7XNk?`*T*XPiUCFv5b9FOBgGQ7_)=4$pTHS%+s{WTura^Y8T;otYP%-CaTlY zN{?!$Pc$;x8qinN-$KRkwF$XDOMok!@=ng+y8m=8!I*PS=hU-)ys@k?Fq4&r)-q&_ z|1i#57ETC~1rsA>-oymypOhwjlXInKa*1@CR!gU8i?o~eNXs3Iq{(zGp2u#fope^J zC-6j?gY`X?Qy!<63!Lf>*|XfRLz~wlRxC4!X_?ssS!Bj@ z7pK`vzl8_)()i1q8DY{jBSt!ACQIAQENQVUlqSncX_(c>K96pxo;8T)v0BP!?vfHd zL%GNtM{(*Gl5fULhwqU$5ypcF&r+I1JD~NbBTEh!>fep>!w9dJI}#1S zb8M`o)7C}W?7Z<=f~CnmQW_i*q|Sl;2afDNa4eGw$9gGq?2r=21ybZNBKh_^B-ieg zWZOI@nX^8VjF~#gU=K?AG$5XIyqIsP_gB%n;?6S1lREHXsRhe;J>bEzn0V$8Plxkl zX?3xbCRazPcbhG>?mYXzJzOf?W2M|9MM^z#q}ZcG3O#Bi-=j@(J?2R^&&|zn+a_tQ zCnd${x+FP#DoJ*n1@S!+X8}G;BJq4loj-@xMNabv{aDZVa~_9p zx>Mpju1SpBClce#6$wWmmKYnbBKLR32SVp=boU0)4}z%!m=ALT(dYXOP!nJ*6#-MF zG|*a#f?TA4dzA8mgCr+7QnG^+Br`Z&(t`^mHMmlegPSEWs88a8h9o9%i$wV!mk8fS zB;4yG3HRWNgc}e`II)D=llxP~$Iv*9<}Q}yYr;tfppVxbz-<;%9ZJ3vrj?>FQ^^mv zl-zJf$%>dQ84>=H77-??5wVgSkt&H1c@iJN{)31{i4O0P$nd2S9=1tBLyk#s(8Chs z|DgooeFS*|u>`sDlx`Ptf9m`ZbWWhVEs`a76#W3Wj4mxu9f3Z_!f4i*F%u;-X1b)s z+DS^Rn@mP$lygM`J*k>Hpm5*W2n{3DNwU)V$96Y_!h z;(hq~fj2uJq=iMdA~`LA>IQ$n59`#UtW<@d)Ee zRWJmwQxy-!lX`gt{d>_qfYvBFOA<-PpcQJND4Fu7pwDtJA>B}7GVYM53`=|#2MNpc zkdRD&3CawUz|1)D&qx>Fj6(6rsKN8-klAU2;-0!5zu}NLCtek&xc9_4n$;1$hYLQ8 z3$eITFVCTW9_>9z^aZpAp&Ob~S>r=~2L4ATxkeWE{O7XH&BK4lw~&B*Tk*?x72kXx z@y-txFYc9^ou4A^7&fkXm3SU);*>KW4%ut*8xD$1+7+=*ephS~xMCK|3JD*^o>=Uu zmnSIeX*9Q|GbUjkn(a^rWoYN*PzPwo=acUgvj3sfSiJCJW|z$p_cBLuEt@SaWdY(; z79oz@D`j7rEw&})VpGyAR>kwgvT(J`$los(x%Z2C*4r{Y?N6DWjPF1!Gvipj;=??G z?s2p>XEP?z>w^}khGMj{&`w4>x`gKrl#_Q>k#ph2*wy^Z#9bQQt)R1>JaP7a@P%PR^Wm?+|F>AG#DXs2e+UhTpS|ema zOR|h_$rBU27^5aW52Rs8GMf7wa^~p77%%4TV&(;OMoO8N zpqbXG5vgo6%txy9ckXcI|G?H_+ zu=k*yefQnu-o51B{p8*Y4Cug*YW1Zi0yF( z{r6+Ohv3CXJ=C(k<=581qI!O%j{P4{4FzbYwvzi(=D;4F!7&&AVFCWbApXM=^3G*K zl4=5`bmr+yZZ=e=D=_JeL;-cNvVxxz_|QsD!iR|OQR4cB6A{`y&VB*ds%!yz zU6i+(`Yz||oG!e$KI#BnAF9)3DW3&1%(Z{s&UiKYYhe^N5XVO1*hCzgZK+gu5?I<~ zD-+um>OwtydLu!P?xwI0Aoc}up^f)3wiji9^)+=nJclYH+-`ImC^JJ!GAM5hx`8Xm zd(pI8&pj|3$^B_s7M5%l0N$1bA16CQjOK}S?rAK7sE@@l zUc{66guh3BGmOF-t}zZUCg)QR=yp=(n#IgZ=%%0>K{fivHhv6t3 zhm&v$&cHciPQX)GkXf-9AddUOn1r+NNT`d8!%T+!(Oqxg*Pwk24!|C+aSl-rly^SI zwpDalbaS`T4!dXvG`$Zq7EaI)(3yOeesC9B=iwf>1oy!epqfd5SdeMBATx=@p(SR# z2Mg6m7VL#UWj{v+K7Eb7BY$$T*5S#(c`|m|b;Ua7N9wq3Cqo$B++&;@P7?!q-c+CC z{dh1}xerHaJfd{qIy??H;7Pa%&%?{`zu+DE$wxET=P{f89&sd4m9UuC6zJ=Z$M%0H z|3hBjNgC*`+{aKlLZ76pb(ArW^2J}qgF(}ouxFw*N$ETT&w>K%3~OXx+o`PauYWsT{`|EMW%B1RHPxFHn0yq9GOXK&`}U zLHQ5e&<`qW8iLiZ3HHD-P*?e%fKSLM{$&4x{#m4-a8w?^)1W+v>!8ke{3?ns@zdw{>zkm;tvm=-OLv3vA@rI`TLS{hi!h^HR+PyZ zkHwozH5fl228|Rnve_F@Nb{G|^=k1Wn&^fdcoKc+44^Ya3#=ov+acFzlZPnjRU)~c z$d&J+{14?;9f!kk2=tmtThAZF@&o%0K4ag(`(j5;x={{4x?vcZb1WJuXk_Di6f!)@ z(5OM9kt_S{X!PRk3^H_=Qv(~AGWOCUXUW?iBbrbDkEZfIZu=11#$x!d{%^$b8T_5P zcmZGP27UEW_8mM(u6u&Gyn2n?`M0rNy8+#Gum)65zFq!b(N@Ds4e!UPi>uVdJ$Ny9F-}g=KTpsm#~3F^ z@TLyYM-LFkKH}I-96O0)J8^8KT{hD{HZoq;la;UK|JBsR2u{!nCXr!!>JW-*-^K0w zKo;GK;XmlA^1ldb`cc#0Y5cKccrgbVvwP{EyYR<$;0bNRf7s04n2q=#qxc}}h+_@y zvWhrX(m$3nUWRZPl>4w4^@T(?KndpKi1pK>)F|qM-dptCis45z)zo$k)HHk?)M;TC zzSLI6>?Zo>C~czmANNCtNqH5+az2%Cu z(UV&kS!z@_{{__+pM#oe?=|F9V!~;}nA5HSr)9jDHFz;2^v~tA$uinxF>}{Kywe5r zjd}FZeteKVd=Rd(qgRGrEqbl|>qBQLy<`Jk$3eb(iPn0VT`TI-hV)PVskn9g9qqT# zew0)1X*2wfshn0}4X3~5oO+fS8_HshDY`SMPkTHTcRUtf={5+KF2hLaFp8Hp<1}e8 z&XFdQVrej`W{*dU)Qszus&PxD++T! z&628Vg;FuCO3J1+OUbleDV(-M@~3W)9J51`WqQA4aPw#Sgr6jR9KM4Q7=VV{pYq>C z|GYKJ5PRz1f%3y3ujjHXn#1=yXR<7{m?X`7rc=WVd+w!iml{i7_Ll@p#jGePvr3Q> zt8^){%9jGGa>=u5lpHJeA6T*fVAgs`wLBn6Gwzi{^VcL{+V7Hp?~yPWCc=335mL^_ z&^e9nP8aIHmCIgkl$~R@Gv{+haxUVjw;d-n+#6JBXCvkIE>hy)h0hWwg^uBp?-(b! zj_f~h%#lo|Qb~7W|AAwdBsngW1c$W}XTM)!Y%fWa)hiOko|vc^>^U%psX#pMQtuC= ze;nPdoMKmaavp~{(8kw|e7}ZxDqRev%w>ucyUvmVH%G~Hl zsUBI9>`^RB*Jx{ggM_MAr3D~h~2LeV$J>oOWb&33AJD^BW1jd z#$hx!deaAesRQVMCLi>@SmW}uVo#0adrguYuNjhs7n9-bDrr96cr8JawI#>#!I*35ju)pcronj0qC|m?-g$NfMu!Z1IXN z!}Dkq_ozN`ja)9y5j(|^#i@PpbK=0h97ntlCog;mVsR#xYt-=x%D5S=W$5%lOBA^l zlt4~2<&Q-_j_dC6T=PvBFM)~E#6Qtmd=s6-Cuz2LB?X9QQn+{|#*15Grnn>)i&H{_ zI3)ClUHmY9!*;QbJ}Xuc&x#fMa;$?`G5WJw^TCI}6M6vs!<2Irtwk}6NAwz@0t(R0 zKsPawyf+E|BZb^M%}BiPVrFN|6!#2!an0a!I5K?1iNVfoYgCPVP+D`Ogy1`Dfd3~R-?5bf$;#ffSn<^kebXI z8|{cRa;^;ghiul`c@xAT-(2kTt;IIqNo?{w#VRjQEb}5|W?r&P&&?C_+-jMc(pa`;3$^FreK|3^y zoGX_-=LO__B}Ou%%v7eA%@Ff4JDFDICT3;6az|Mhdn@8)VriC)$BQv3ZWZIA0WmBX z6>Z)z(PUo}gY?hED20`4B0G5FfOzgiYco1a(wUdgYJhT3K2!!IQ0@q{1JU*@#)By% z@2e#5s~sof>!ylH-7GP#a}>kc+3c$b6iqFisD?MJE}}4vOm6)&>>4WeAd}2PyzwV= zG9AVOqyh09qI^8YLk4na2dIS-$OYv?#h@L6wof_!LlquOExB(4xo@+fXj&&TnOcA~ zFtN9K1B0uT#%$q_o2wb*b12vf2HIZYx=cJD>b;Lb^z$K?wIv!W3TX#ufhxW(h8##L zp$^dY=dC>I@E<7F%oeWww{zXOOGCwtXF@Z>OymUHYsa8sV((+1^-&kS)J0Dzzr!zg zE#btslh`g1*L%c+b~RK0e#|B`mVmk+tPv{tvm&%JD%gWkgAYcTd2G5k^Hx?}>;dV) zgPALw)M0>y$KXUx@>3a9Gnr)Vc{2~Z1tz;Cbn+#6e4jU1xPT77naSrovAxaUL%R)H zpoz6@EpG*#=rvGYwHGLluahZrL>prfUAJD^fM&JettKoa_oi4Z%!u{2@`l0ZV-#)=B!r@f$4Ih+#kaYCj5|jCwAz z+Q!t$v4(oiAEq794d1|6*veQ$kH^)?WQLY@Ob4wKa1u_#Ik+1x09|g(6S+DLKg67c zI`wfQ2r^kHQ5S#Xjrsg!dH5TQdi2l239@#|yI?78K|NQmBL>Qvw3|7J@_A8p4ron3 z%YG=jljh}` zkO_sLvIBK>v=wgqASz>90qa3!vFfV+Idb=F>>+rUEb|XM3H@_O-{kl_-1Z@o#*PKgi&ICjJ z#2`M;O1kSN%6b65;5_;J(+r*OX&wFZKaX%c0DD2XSNifRZT&4(^b~YWD2o~1$1KX> zMB91N9Rn!`p9MnR|1kA&m3FzG{&A5U^8!A|UBq#QI8IR?cXAc}7+L=jCX<7dbU)_h z9%@SY9y>wxnA`g=loxS3hX0`b75p7;QWw|oAFj|>FX4}=XHeXQ|8NHX;UwPF3F6S- z2a?V7R6vYR>b42wM9)bReGRrv*!w}pq|CeaL z25R`-2j{4ZJMm(U;>8@KP4+TQcH=+n!kgMbAKgwITd0rC_+%T2gUl8$qMAIt37sx_ z#Q+*BiDVnO`f++J&%&hlEI?5)ALcu5@1LXa?sEcik*G=@#4YbL6 z#>ra7>uP+ERm8E9IF=K~5OFLej>WXgLi)!5<7GZ=GMDf665Sk1(M8Ybq?8?uq;^yl zZMR_N9`k^nMmf$}u!kZezBU^wEGLN~VpMKg$ zKkcEPb~9c&X_IzdweVX_MBG3P)>E^!^xztD|7x@qHMjT$tv7(_P)=yjHz4(h&`bHOHg{+eDB%zmqULNYD=+&Xuj?R1{T8WFeo$sDx zL_bcHUxNP0!?%xLqobzx^Cp}ECvrNT%xNALbNXAr>A2sJ=Poc-=g=mdv`IT{(n^~& z(IySFNj-hEmNu!OO{%Di3Qe(;X=pVJNR&&~24E+|YtvKa$Okv40!AM$-Or^L|riJ@8q4rNBHy z^30u@EO`Eb1m$M zLCYRO>ir`6`_WuyM;$n@yn;4puxCwS%hHpdm021{@k~=Gw45P%v+N{imaAm(8Oj+} zfs$q&E~(aWl5EZX18eplShN4Yx=Es}dnLkZNWx}qm5`YyC1^S`-Lx+xh&?DlrtCYw zhY7}qd4~GG%Zk2;<|=3UfD6lDPPI)OtD)S9bv8dMAii9?36f=RE*bXLlIq~ZeJr!_ zTKpuzF;wClV7Pz@zODk+(>S=T_Kr=i4ono2ZYOeFUxMR>bNn75aNc=H() z-r*AH9WVag8SJkp6mRcZ@nmzphu32KhK=IleoUNP9~CF3PsJJU!v!D4)e4B^0Y{dk z=&wghU8e0~Iotr{PypGG0`U;-$9ms?BKNSEOE6wcP=J#J1bB#lfS>pVgo<}Stat^a zvA?1~+ykn`)xSfW{1%D>_ej}$9~E2AN5t0s6R~q)mE_0@#h$x~iRC`pejoa)(OBTm zx-x(<040zM>5ve}Iy;DKY{6Xj4lxmbycpk5OMDi4@d|a5*`Yq-5gIIRq0!#d zL)bC2QtY`=!8UjhzhS-13OFK`z7NYx?~lb2@59QC6^b)H1hHJ8j(4HI60N!Dw4kFr zphC!kln`>xP}bQjOM@cFy(109GitJUL{AsDXd7{fc4lvfr#SL?P7cu#Vi%n#Hc>fZ z#eGtik*zW#Vu4tMuM_i7?mi59P^S8SBwn%Dxe8wI8XIa@L|(4B3!^ zZX95DNl+~LMm%|6l8Ak(kyvI-mKhn-#UjH-%rjhMYPy%0r3bOMB1R_T#Y{{qlJTka zVv^D;M#(G1AaRdq<1UFd`b{y6_(M!W@g9g}d;lw8wAY|BpE91F;iBcaHG&r-In@-Vi$)!y zQg<-8SQ#X}O2K}kgX^eZuA_T@1^{eg&YlJe$g)~Tj$VB`Pw7t=GMcX!)oTq@i ztJp{k%a~-#EWjF^fx%o#Cn_cIl1wP4QrqZUiz(Pv-uNu>yvUmqLozWWKpfY333*8} zV-lTOD28lE;d^l$!_f9a+Z}BOv@J{6f52OtRB4&WC#Vw%%w$rt117XO25_BfRQ|ZO zh~H_VlP_Sx-^hSJg~0P9bHt#$m<&kg`DPik10lCUB^2;=Cb}sQg?1oS>d9MSg^N`M zdqAk(33cRsjlv)@<_{+UCs=iYYiCmLm<@EYb}F|$hws<&dvlrm*V5sS6W24uL=2_q z7efKLKN`Kz2<4PDpFhjWW=_hd4$$_c%&t|m0lF6GPHN#9813kH!5r$M*96~%HkoVA zL~lg`?L?uxnQZ82>V`Y@(B=M4erF}We~7<(f|!3D^FGG>m>QidqHa5(u9)UTQ|$#x z=@%7i(MUwvBSuR#Fd?IhSK%9d*!5KSbB52OkbiGc=8r=294f z6|f3c1Jx|+jaURu%(0X=5MB2{+5pXI%h~%dg3cNkg^jQo=;|!=^^4%$9QSHj5RT^rG>yeE zah&jEffmKS4;uD31v;{hpWnmD;aj{J^tZq$G4P{yhEE-BTu7Ny=QAFuHs2M@hiF=& zXNs1=&RbekCrJ&PPT>d~gA;HH&cI!8!H|oDQ}8H=<6&?1Z6x4TP}x@(bK&d=3-}LM zz@mQ&4v}N@(I4oxwFAVkjPXGIDi6kcJ7aMVa{*ct{;G2tjdQ?s^FO?oD}4PRJPMD& zlknVRHqP7PNd$25Fq3^6og{Kp;A{NSD}S<>CWZ&ec?hj@n0lbh#hVzelsA%c`l9Pd zxu#zrhI@Ix+ZvDXT5%m7zlGAf3D3dH@CLjCAHi3|@smA1NGK0gC;}SzkCWUh^BMQ$ z5W^QL&r*Ja@*gIH%K4{*4LE}r1VJ>UKrWPk%1|0XWlM8FWo?T=`5@|<9J|P-PU0C{ z#V>fBtmbzt8U6E5pW%2Nl=q=Li2FeAKPb;Zc@N`3#iQb|1y}F|wHG7~(jX5?xF4ko zkDvjaR&=`YCHm1?%-)QZcoQ4(CiZc4`z}0#8{E6`Ik}|%9+-zY-V1lbSvUpy=U)g% zg%*rK?F*TKh66stY~DM7JrNN&2=Qp7lb`3|50v1qR8r;j>XEjneo~j$qzD3Qr<|A(2K+tLJeRqEbfd z^O+!IfJcerF#Y2I8S*~*#x4f&4*p^*9d$D`yAj2;pnQlCSP8dd_z~@Q;c0jXE>Rcf z@M2El#T=(ij^YU&p>G@_$2>?J2WXdl;tj&UEmRCY!CP__{RvAo0a~hhn z@K_wANb4pAT5riS2#{REFv&KImQ150_IG5-|7Pz!!?P;7w*N`5g!GW!3+X)}g;XGf z&_fBN7gFePE8t?;UZ9>^ z;8_IkBx~$%!*sA2Q$Myj{GDaV^cX$en&^m3gC?T0fwkyp=qTD7d5~xE6>W`!MVls( zqIHuvk!q46l1(j-^JnN)}tCRHM;$#ilJD@3@#9ucN{O@wJaZ^(kHOt`um zox<)L;GYZUSf-@|9k9J4{A_#Lb3bQaCysTnVxDPXAkxjvMVf`3Xl=>67cIR-vQ>ac zvec5h=DNBFWB1B-lHNczX}l zm-vbphY%6%5GA4HhYDqkr&gMSxoe;pdh`&Z9(lyABbaE>ncNQ;l$Ss247_?+X{3Uxk|` z>kr5Yc{C-DPunk{{E2W4_GHTKML6Qk{2FBNxeZ8We@isQvW@oC7ZK!Q!pOyhcsYw; zFE4T}{vyCTO!#@ni00nO!rMDTczX8`?%w87x^e<2*m zeK_$Z>Dy$i)QsSqp!EN8#)5 zE`0obg|~l*@br&nZAGGR^G_Ep{@uuV6bc8wLBg*2B+<-gsj&9iEv(#c2y2%Y!p4ym zw03-3(~NsLZ9a$kje?`EAMM~z4hm#|)*unYg783Mz98nAL7Kuh*hqMXSP0J$JK+)H zD%?W6g=XTlauN%8F5G?*#C=i3T`}6iF3wok#90dKID6K1xCsk> z&&ezHh@kIRKt~c)G#U4y1yX{H=0O{B*bofSX6-gj+1%`?lo0dr5ktNs6g3PO%Y&bQ*(X zFQJzlAas%=g?4g+&`e@3pVX6z^r4_C3_6PnZNRb4QmH3skyMOX9C?^|)N>R(74T$7 z(+(gFB*7UABKaHww=dita62Uud#11kqqVlsZEGwv(=BN@I(J(fyRC9$3QXhVv^E$s z6Q?R9*D!=iO{ascC5Sss!Dt}&Ay^Y5crIxMJj389;j*5f185BrV#ou62tEhE?G3jJ z+;**r{n`?9WM~PEj)n}P4CHDck);M$96SqKWN~p%F6h~dbNX@aLC6UHpeX~36*oEtn<96d8z(EG z5sO-}j|&R$iM+9#zl;I!Fq+Vy)}nL*?TL`mxCHbB9pOu(zA0R))*^+#%`38mOIN=8 zhu4%U>0w%t4~hXr^Th^1vmq5RWq{$v)87#X_rkbzGG*Wai0@y(u`146$Tj=8_7R%k z&jzeT>CY+opc^*skbxaAUjo|*>da6l+~Bo?*NkHHO5iL56<`1u3@X7;Ee06uQH4Et zUyB&+L~emL8IPl@mtd>KE~5r=Q>1SU-N|+L8*(3_91SIWw}$KHfNou|19fiAHkLXE zXEPSS>p)#C%K6^E{~w$~*-|B@X#dWKOyqO5CIgK=ISLa7J)6b_sq<))dHBU#Dm1&4 z{gcU=)NynA6n*fI07F@0OuJ?yvj@B#*d|fuNbKwfuM5(x;533$V>CSD!6YyROan8) zY%mur0E@s9+GcqZzLY`78V@GQVN4WhvT>QNptN^P(Em#LOf;w!n-Iz(DEnnFBko%y6 zAM8rtG>E*#5^^OM$;(tGQE(ERH(;FxI&S#0<|Bo*AyoJ(mAbNrV?UF}Ktnxm7e!kBF#G{q zx2>j6V&72g>a~nK5S&J;UfaMlD#1^;!9}o=GS`hVZU|XP8X7>c@ zTiRk!0S1BLtZ*1d?qeD=_l3-y*APSQCWgPlx2nGoRWz>uILY<^*aNnMt)P)##oby! z zfIqp__67I>JS67+ls39Xtb3I*={ zj5Z8Y84OpQ@q!+(=E75o6$ZmKmj7qNvj)qYB!>Q)*uBw%2eMTeJ%wN3Q}w?LRF_lT zUJW~o!91Z)38s{7Lmiwc+mqalA46pb98qw@!I4CMpfx=0h}Ape4L#w?=QdJ7Zy849 zJ(-w&30}JgWgiefH})o#Su6x<`vfl4l@EaRKyl9oQ@{lL;?Mj;6>TyM-x!LI4k1TY ziH^bO;E8&863KbAhBX7OE^zgNtC;c!;;kw(Gl#suCcO6oo}=Ef`|tNpWbyePT&F=D zSOORxsb6FMfnk$U^x#nW2kVCXLm9qNN{*}#d5{wFAVugX#2)$BL#=^e)d5`H;L4+A zD)>}IPGC9~T8#%E=IECVIR}+@=m0u`_u<(GRs(fQ;1?50QE;;i36_L948*S2+HtB+IbjC+JlB?@L z1fM}hfv6w8*6?-Y-=6RllIs}6Uf#jb;Kf^LIrWP}KZC~nLmXQ*)UVUP2jCrJgzb%~ zdlR@pPyX)4ZKta)&pl`uP_ij}*6=w{TQ~T;;q!+t1imQh7Eg<$vZ^fu&h8B@)Pk0b zq9>{+s=^sJ!^jbaG4iRAS3%_;_M>4X{8Ql^Zif9WxR-;Tpo=+k3if4iEZtB`wAD2d zY1-zZmA0)&)o~Wdx*j4?*H2m1LUl*v!9-WU<6 zJ4Xa)Z9wrUde)b08#D3&YIu|H_V=S0BQ`{Y{q z&Y5!}>~uw#orwssvlc=24kFOrP59gU2w#UF)^|h-Z-jN;tW8BIl7UY+WjZjq@mB z?PCmg*(g@ad&u=7e0&AjMAUcH2+XPGeb7%7^%&k;>r zHwa^=)56&PD`7(J!<0OX8CsUo<`du_07t$j^GYxJ07wCGAPR(mAa7zmA7ZZN#2n;e zoc)>zM?c;{>1QwO{M>}CpO3Kd3l>&>(ZbR%NtpX(h^Bs7qKR)GVc2{)G5!pp=T$3o z-H!=fm-|AW+=n4~7-O_7#J*$T?+Z^h9G&540}??DhycNWbdR8_=A2FaiU#>~d4`;%{&fMYE~&px!=#kpRg1r0vL{?u~-9C^Wvi*U372_PDTu`dYt z@!11z=LqKe6mJ>B_y6%)LOYScCCL=g3@*tIfJRNGz~peyl4I>?)GP|DV1OBmzQyR; zgMrZyfrbz?1QPqhQBL{2xU2&lsch9cq$u`rVBc;4#KfT$EG;56^5jfb;ZHDVvDX6(3K0i zbmCY6=M3lkd64Wt*GK5V2Fai$^Et}TjpH=X1|;ye$^k{Tzy^HwhTEB@<#k!Yv@QG@ zJb%y;e%wuT!xlYs8*i{#rrc;~lWa!{#L058g?i8|hxW+M#?XT}Zx)r^#l4^sTGV9OXqR?80Y?iMl?)918v(4-uB(;6G#om&b%I8IKpd2J#`!EN7;&R8cp-lA`N$j3ywY_hk=G2JW)bBcf+sJ2jh<;u)}!lFcmvsS5xY!sW{hy4PY}+ zJUfBn+5`511KA~YWQ|v{Wk262!2MC{tfeJ zl>>PTsQkx&u7POGKm0rQpzPyxgJRqi0&!T)mJoN=SKEoa#64Bfs%DGA$ahYiH0y@s2<1{*s zV~@i)@P1h8$r9|u0$WkK0j6a@ zJ3&-%g!u9>I@E8H93qz4Poz^%1iiE&_EvRQU2i#{=QIp`)#M*0kc$~hn~WjW9W7k2 zuoq?f<9Q(rn^CaDF@z?;(V7;>Ag|IDo?a*@#u@|Q8po#vc*_=)si&A4e|f1F+duIU ze5XMjSOR8%319>m3I^k!1L-II>96Yfiwb^g49p&gKC7LZEiY^w3@bFt0v`ZfT zkwd@jjfyOkswZ{2(Hk}r1!W9KqJY4(+ z1fSY)CU3{UYx|*W3_W@tN4KH$8hF`|e^9xHmf#>9%KxSTZWE$hi(5MArP(-5>cYN` z*rEfr$iP3_(I)Bi*S7dZ8hxb=d5~6&VX0&ilF5T4(IyETX^Fx(Y7j$>qA6LeB#OY> z!yEDs-~TK35Qzpg^e+Mv;2o&P^uz#uL#BeDy*~F5U8bZQYb|u)GNxYUaM{4+2%8&R z-qg<@z7Y7L$ZW^+Z!0)eb|wo8m5|jOO4eu^7F)%cN9f@X*f!=L0@C2h!A~#e^TFD1R1dYz#vTc8^j1-gJj`jm?69j zdk7DMBH?CGDO~icg|qH5;iS0-Z@LA3r3Q`P4y?e|)zoz`JcU-wCAi&ZfL7dY6WGRp zDE5b2GPgiasHu(!F)br!y+o}#&_pYY~=EuLmAgu7X?a5c*i&SpJ@ zqbcuSH0Aw^CR0T-;~HUOxJTIN+!QvNKQr<*emk%fJCBFIKRmh3@B>?H0FvyeKU*HN z5d1u_2nK=Z@wZ~_ft7)1ZpH6kTG=tSWXpY<|~D%=|N#?#8Z*_KM4yR)*onb55vZzsbe`Dz2WWXfd7G(AR2^& zV0*$t;LCAu;7KmV-L8pnwX+m1_B{V!??SG{oAn)m!p8c9cc$A{J@!b4@B}g1O$NQ zfOH4Xt`K{Zi*e!^C3_chVdv6JG;?thHZGpR%Eez;x`YdJaxtbZZA257F2c|yPv|=j z5V}s|g^t4#p=GyIXxdy8+Lk{GUGgycXJEiAP(~r6FeIa~$5sk@hsylO zmzT^OSer8*`}c!C+XG*q{1nQM1>qowzkPuha0kxpv-3fIbLRbiTEZ~UK%n`0ipnSHi_v;oC84JH1K&=i`{ zdN__Tg9x2F7RQdm7I7Ff4v}$@AesFgIiAmXm2~u(=-Py%y^WS%(1M2M%%$MT<&ut+ zp9*3?7~oYvtbIZQ+^+m>4>!x=glPovZ#3UOQ%%uQhk?uhN1(b{xxQz!}K7)2QSINYKy%4dG~DT}*ipv405dz$J-5<$!qXOz>)G-s6i6_^fhJ zHdKotko7)7=uk8_EjYArVr-FSM29k^5NmEU3@9D2MF%c!&jsq`D(!l3^Xtob)ttW$ zU6;B31$IJ14Bsb(Gd5DUHgLqVjpXkTI0Mn*!`3B=HlWVtxR_BQ{3&p^25mum&=DP- z(9wl9$)ruXGuZWXrefHF-!W=<$f!pb_V?lVIFc=^(RTq|XlRRu6y|o6-vOk6SY$`S z8HN^rw(iu~J^>$u*9cynKY2UB-4%2PJwY#YWTPV&9r+eGm>m-^cLqL;SrSL?BZHe! z0mnv=ms!sFXSo&)1IC77)@`b9OY7LGym;VpWQvR+01Hd3q35J0xFiMw6DLN)taYJ@tVt{i` zr6DI%iAh{EemdvWlmB>zE;RJxd$n{lq~Tuh#<#&2Fq;?6XOE4|sH<)ffq# z+74T%f@s>Np&X2IE0B_!k$S=Yj=bF<1sx0Nk!&q4H*QY{xhD z`jP{ovG?>K-+;mDR&o3e=b?c$DP3vf_8Pq#1h{VOZxX?_ber9x#Xh9LN0x z9Ul^*-o!cI!$IE0Nau;@PoY$;JyH3OML^{rW&xFhP#P5f_uy;rWdk%>`(a4Amdt=1 zh>1Oj%l$zZ^@$-;N~HWYM6elrYuklaC!w;XTD)KVqTAbrjFH^Ls?aeEdkn!p2J?U8yEqqOnH@x)pR>2|4L7-Lb3isw^;i6i zT-38M-CNC84Gn5297@bRg#1G#vF>1eV-Pw9qGKTbF_0LuKapiWqUZ{Iq?{XSDLNXT z1IgodQck|03a%Mgrxx#hn_Jw=h91=g?mux6o;^SfH)=Rg+(Q6E1tqB4kv3%-VqG)x z5WIg99R>820(2ChBM%)p=*UJ#FLd<8S9=ht^Ck*Ep`By37&6xbfI~cHN2%Zx}>>o%k^B1SV51=s%9mIAG9MgdEfnsf@U%K#vG?2nA zI6;ehx(4$b{33>2VGG8LXxbzS--yCDBJt4(#txo3z!JgmMZwn+zBcrp&g{)aVSlVO zp0k#cb2xzN2mi`Fc%wnJ!$hFI3d+~V{)W`w2!0UH=N1Os%k-F%>u88@E!G`q7{X-= zmlce*@HxTfPQ85K3nViX-e47PtmlO_J?If`_=GDXgfsULCo(@ytdnx0J)Gco1>8l& z80udRS8sSbnqd2;OxuCVdxSFu2w_?j#IZmF)*a~S34a}9;iqLTe6?&ub1etqt?4Sf zv{-+j#cxn*g$g$&{;t|7!dbh$aMbQ4?6t~-o#rUKc`nwg18;+$8uAaeXqbbYtEhho zJU!rTXGS<+&XgNOfM5{7K3`M95fcrbU(puch6cjRz(jcHTMBmrTk`}2s51xc+^|m5x-;GxJq(5Z7>A>LO3!l z@B?o33094{mr%9={Ot?8IpzV}%{7G^xfqwGM#9;|OgJ^Q5e}w~Z{sT z)`T85*Z>3pU*OF?ci;+~Srg-Er6uew^@XjKiD+hJC9JLNg_X6du(0+KX4bp|l65gn z_)^%&Iz#AN_Y}IkD@Mn%N@!Wk5?W>(gr>=9p=J0;XzQ}}K!ZF!Hs}k#`ii!FGupux zKd>X#0{(pV0Up4WV@|*U*f!G?*0wsr(#}Yj*_jJd`)0z#-kJ3sp2E=HUl`bj3qAXI zp>3ZgG|9yXyJ9XH%0ztzLHk+)mQzfa9}zS#b07~h1{;*Y-xHp6coXdD2M+iF@CBay z?FO8I1IKNFwY`QgcO?JdWFQQkP06!Z3q5BCq2tVZC|#NhO_vZR#xbB37j^*!RA2}} zBcp&l8k(WO>M@g6G&JEkPTIN{{%&xzaikrb=nFvQe|&&D`&@t{dh9?mK3lNg)LB~? zx$3jF#6)O$SPBgf#Cf;?Zx95cK{Dt_A$edB#+-_tTJ)Txu&*f$4XlS5f^G9DTRlFJ zN)a(21o#0j;O5LFz=7j-fF)7F%oX|WTI755gr+x*=|jh9uAB#fYSE4g*nJ~FBKzBe zY|h~>=rskyuA$IVXn8`Rl0unJz@JUI?I}0OjsD<{FM{TL_5d#IcS4H;u;H^g$4oqF zMsL<&_%fLCTTUW?P920ngRw<0C#eNA!J#0Y{plc!bNX@aBy_FD$xox_TkL`c*2I)k zc2CM}i~M-X4hR0go4?(G3&&I*%$BVs`shH3D_b5(FvT)Zz+kEDeT8DsHZ2K?&xsjxDETv1L4Qrb;IDMXl`DVi!EAe zlRVL*Qybw>bh0*dvbNZwEf=RDG%bOB9jIhJH=z-nw*+m+x%LOjKtmL97Gs3qBZ{WV9;(7*s&Qh~m2D5u3*>-2PzZ`OnF!J@6(;05 zXp_M%OwO={dROegj_fPp23^f$t&a1*LKhl(5D5_>7A+Ogt;tQaz?haDk!d7=b-To02P_O(x^eld0UKHbP85@R(5?U&D>^ zW3E9%8EbptYDryV(GmgFvsljQv_U7H!NGh?L9iuIoMoUdP<)kO7#IOYgK=OYm<*0Jj_5)|kssaZdnK!EB)F zxC|)1wLtN00)KMu;`2Um7@P#>!By}9_`;Y)1{KyZe)zE5Xb}~?rlKQr|J5Uj0A^DRs&KbzLRYbNE zxpp3zk6LmF2gQB9$Gc8eWgl_gM$WDQ3&BjF)*?u_6+gX|^BZIE2k3Z+j>qWumf7!j z%#fclXI8(9@|0-eJ9Maba(v6Y`7s@aUy~H~aNN6el-uaIj-y`1IWH0QpC^(%&8HKr zFxZTaWvErpfGBR22T(bP-~Qmg&-P<*8{7gPkbk&AT=g!qU-c}?JH&PBcUdo>;{x`$ zz(DyHF~nI0jZ<{q<3xl<7$^>+V=ocoZk%^J4zz{L!AAD3qgKnv7EQ+%6X8`k2=xw% zA>bK&pM#s=3b+XHSn?hk#KML|1LkBf>?q3>_<$e~0b+24M9ObP`RQ1_6Iq7t#LnuS zpz2wa!Pt8|Ei#{aZ=&nGL*0MD#eb(;zb2+`d=G`nL#X;+2g-#M_ZF}gtN=@}#X{O- z0c|p$*lr%t%3O5JrCsLYAM?0z&Ee*ueiwBH(dRS<@5bLn&B2{ZvGY*sJB601VJJ~= zEBS`u>^HjnD?CVL(;9zuNY(!gs0Zu8A}|wFg9%_P@!e?JWE3&xD13AzI!4egRYX=* z+`xwu+YQA>c*7zs&<1blKxEgQVMV>XYa2>!mk>@1RI$dk`K~|G8i+P~41UWG@^A1e>Aqnop zKfq|!FuYD+tN2TR;_VK)0`7K{r-}FLQ)Uxj0h&>^BX9%WaQMR!!c-y>jySv{1&$0j zdg3j-hZ1jyrG;bYoh|5fD%%?M2h?X}1Jwq@K`F=vid)?-+kjRe1tj60iL^-~F?S+) zkOX4xmW&{*S(4_7l9z=r(jmGI~ z2;)T%wg@CA6iAx{;2Qz>XaGLyCtToR{Q;kRP~weZFOkTecBoUHQ_AQvlCu_IIko=b zJ}s$!oxAa=MAZ)Jwp;{PrZ&?hU8bCR@PjZA#NYlpOvAMZQ#7>T(1U~b9?&LUw22pO z;z@}fF!2@!{tbjL0={^-(m0Zd$|5W_6fc^C>MdCCBhdKkVfJjNz^|qxdGPQGL6K_6 zd$M|kg|rm=9*FoVmQ`q;zg0-tAtRqSaUTiT-;-ef~> zv7*;nFhZF#QkyZdn&J~p87EZ!!2(Qxzc2Og4M&C%zSjgBfDpjr4Z;h!bL(~Cn3ExK zzk#-J(AN|8x~x6WH4%0?mcmwt^#?kRtnY9a);i6Fm2QZz)QJ)1I^-a9ItdeGRQ{cOX{DUDHM$-mr8rlPnwkFK$Oz{B_Xol_Cx&xP{#2Fm3Z=xY=n`jFg zV_jiwq%W)ujfJJ5nXoXl7UqTy!pzW>H6Hv9rcp5KD_RI6<5Zz<+*#-v%HNLcw*$>gHH4L^mauH9 zE6h#wg_((=FflO^O`2MgYq1kXrY^$3%vwN63USE&`XEf8lAm9VsfwLv`XMZzbWv(GC%(Yq9qAQx3 z^ZrTR7h_;)EcAI!Ntd-TI;@S+w(<~~Jg1~#9nOS|-fi6u^y1>a1WUBM*>Zx`18Dh- z3F9lixa2!`+BygR_HZP^sd7L8fJbc_az75%#9HjP29}nZ!rW3@m|E(JCYE}_z?%Gn zjgio@H5D4_Bs+WH2AU&06fnBjwFNyXq=Jf!qaZX`a)&VgydnP~SQFEi_RfMo9j^Fh z^haCz0`LIN{B4gG8(_t83u`T5W~C!ctaVvmqbu}m^@Nr^4XK>Pi9(#!Mb5w*1OY}j z=hmPb$NO*|cO-i>*d9X51GGrmi0?7Dv>P0)xje>>`8`$f0V@CF$ltc?w?>O)Gupuh z8vr9)oxkm>v}5;Rrl%rU154AQg1wcnRl?#=)1M=P+6xQ8*f` zn2l1u&Tu6=Ft_IN0N@2&fCKxQp~afxR<@eL3^XAZrSGUo?uqwcxZ=?6bSh75;e#zy z6493nd}$9~Uw}{eCJ{Jw0)-4TBPe_^*Py}mYqX%jk=UP0+B-2eV!|*NE&=X*c4D7B zI+~%whI1^~n(#O2P@&_>GYB3Ul*i!N99#G?xCCH}AZ)>NN`mK<8WI^HA#7WM3=Evh z@u3X53pjrtdY)h(2^r6{a(WtN#v)xUsqta!#@~);A>Aiz;k81KDSsPsOxF{Bnwy&g z<)I^p0WJg`Vb~%PTSQ}v7zC?_%haM0^*~9BH1_xA_#g)0xpe+|u0w-A8d!_c1>O|O zRnLO)b5sp$fLys;<$oOD{WAw;#4%kT`2FCfXl_lE2cpo?0(-=vqa`|$aPn0AqLm9< z+9M^BhE8RF4?12y&Y4byccSSroe&MIMaiJtmeeVX)BV_b^0zB|&S=5YC?G zLXLq#a%hj0A6q4$ln&R@F!AcE1>Gy0btICiDnkt9FPx+KnW-V495)2k?s;Dz+2@E-GdcKZQKP zZqECGK!Qj*5f{bQ;c)xF?MR(XJ8>&fyvnv*((py6;v5YA}aH-kF}-(snwuq~o5!maryx2mtI>vW)a7XdE+S1x1|pSJ?VxfdJ)C&63bUGNcj zs6+0uEo~iLeFH^$Q3L3JwhmkBo|L5gQlZGBGJRwN>jjZPVMe@6fSRm#&%Jdi2cdot=}L zUr<<7(x@elj2$;&;-u;+Q>RUzIcxUZdGi-6TD)ZG@|qQ^ zRLM&2?M0ZQrqT_n!KF`wtvEeB|h{6DLodIeYH>g^QOiUwP;1d)Kbtc>mVz z4?g_p<4->Q?A{mmAAI@MqsQMo`S$6v?|=C5r=NfM^|#+&zLx)g|NF1%i+}3>zpVW= zG?mBeDVI0K=S`K1bM0#6w?jLR6ppiIzlBqVYuoFqv;FdRcDy2dczF5!yMHg7OD=hj2^L4 zb&0jAPi&%7Y*oEt7u{m7>KBJp$2d;UIIX(ITdHroP3L%5^^TjWd)%Ra+*KXqb9%@_ z)kVHmedIej$#d09UZ`&J>b30OXK0A(16tay+yMToH~fG7{lDB^LPJASQ%g%5pV8IR z)7Lk^Z;XtMo8UT4O-;@49t%tS$Jz!DvbDp9a3Z`2H^Pr_B;`pBuGHvD|K&^#zJx1f zv{yqZBWbVf-2B4gK4lgC2M!)OeB_vM6RW4rm^F9)!X?XUR<5pHw_y{*r5Y{w?!ze> zeB!@cqR}TBoI-g8Zh>Fm7RRya z)vVzUujkD8>`MNMJLd|vTt8N@|DFAXCok+Qx_EkX(e-1s#UKA&$Z2R;jD|JAW^xM} z_Q!e26Dd)@z11$``>S0FzP{CC_`Uag&%Av#XT`hcbL%c1&)a+YVE(ZqdkW6iZ!LId z>&Bw@|1RWoZv(jm4Yg?47HKOFqTzH()GwFQ+kN|f=Yj{Hb*uX5PS%X~-p#JL@K(;I z6DME6L$s-)+qcDYx+=`#4lz3vnDe$Z=99qmzjC1=gD3%MKTpUvAk^HjmE>SM+G{x0M^ zPkp({*Fe^VG?DwFEMK0Bb9wqfV(6z&Qqpeyl%8|`N#}tFzvw=8+ecY5*4@lrQ1f2y z@&#A&R?obczkc%h!n$#1O1A%9$OWEyat#``g&50&(dI8M#yfuXd3@-dACl6pzG&O~ z#CM(g?Rwa4dBp< zhVp0&)9>Gjak~4sMcDOU6Wg5sJuPeh51mRkKkhbU)%~n7i$2Y+p8ZkojHw^w&zbOk z(frXjN)``$uXNepgWcOln%YRcV#8ZS?T>)gBJYxd4}fi5RxM9^*-8@E9ww^=GPI?s94wda&yvvSA( zoKraZ$NZA29|}u{eqUTZ=)2Os{hyZi|GSV|yo5Z+8n{dTde_kIGjCJqP_>)T{q*eX{%hT9)&7A$NO;7pHtR zZhdaA|JIN07W-v$$Bi=BYnhA+nk`#KO_FWmN6U^$!)3Qtm9kfxK{C5-f0>)!R~ED@ zmqqQ%WJ!m=i`R$U#V5BdG%r83GdL_=%<80!QX})%ROsmb7Q$A&{VDoZzk(v+~v-cP`M`~Megm^ z{rA1OWk2odGxEEg1EzhwZP=nOH;rEXMeT%5pRJg@{o|!G>TfNWckuLtS;zKOO+WEh z@sEZ^+x=sQf9&v|edr&5_{Sgq@rVEWz9PqQ&zi+OYbp1v+Hf1WCB{weP6?6w+oi~Z zncaUqlwJP(VDad0_w}9mXiw#``&);vzqeu3_PcAw?Y&b|edN6bQ%@fnH|hNLs=p37 zo)~8~G0rkHtVP2%G}I@D$V2HV@@UuYzZ~yf@$7ivn6HnP&3<^G|H{vI4X*oS%h39d z)>j?5y?XSSD+|Y7-aTgI)w;3cFaA}?322yuhUI}x<+^Zdxjn{J?oSSuN86^nKG~)F zPiL|!o}SGg`_8@Z$>w8*ZM+ufKY*=-9i4?fD8@bUHRMb|IpuD^6LZ}*vl zc}I`d7o6RU@?dB2h{#D4yXjp)TRcP3ZhWZ$1c|0-jw+n5Oo?YwE z{j0lO`+oXK&j~k)H7{MxS$pbq?v6u8au4p_mwRf<&b*6jw-#QlsVlp-WJ~3{e-&~H zG4Db&tPX4<>%uMN-WVr&Dly=fx7#E>xzoPe{fAxpesHhH_;+sgo_FR-_UeOYbGGh0 zp0juJ!Q7*@^?7HO?aaS4Z(GSbv$pm7%aBviu!vt8T!V(K;TCd#jMMA0iT=;uYn|}b zy^PFHzv*0Yjkx>#?Ot<^T+Lpwi+s?gQ#rfV9L*&Ml6P!=eZiUOyGq`w-dS<+uR=~k z!xF}+wSmTRTe!JA7~}B!g#^E+w_3-4{wO{3&bOV)FF)uu`oxD>GwZKqFRQzhy>899 z9C9GJyXPOxCkIk^bke?(lVkUmpZ=?mGd%RWw~+_#_MEouX77b--^p3I4Bwo8E^o_>Qw2M#j~DG7bEMC~5r@kU|5eCY zXsGelmm7l&p6?( zU(H)N<8uDm>I;P%$DS+RQgym?$B;0GfRmdf7+Hym4ZMi!{=jDlL zgD-D}HM{X`MDw>^#l{?!N$J~Ox9hRy=PredzU$F<<~P}u)nDaRjeSrsYUCHiV~5@A zGhy&&Ws~}U(sxS5-TqVmD&$I6O}WKaQyvJ@{Nb%I!&?viZ7=>D*8GT!iP|9(Qfp<~ zj*DM*&YJa0kNoQA*(GD2<&}?kTG(&cwp$9}S~Q-v(eE_`pLcR<=V*(hBtmP&8C88Xmiyo~f5CgXeu z$kc!enGswnJBJp_Zec~TXLzCP6;U9wBl2Z#WS-2A%9RCw8FDydWHs}LIsQ%M(oie8 zriHWIkmN5nw~LjVyS0;>@_NhlH`HDmyMF!Ip=&putbB9)qoL7e|JdOlJN#$=`NtprAM%GP;;hMxlk@ydU18@uSsRg+hJv2^B!&lb+x^6}ioJKw2ZTz{-;!T#Mt7VX_NbjhAK zg&awoHI*1=AsSYoVMDZ|+@9bocc;b3y`3}U-d?#c_7)BNVRyy&Z?{*@c~m#D=E2%= z>+h|YyzTCi8TEI_nV+37>*$`L(@$)zns;!^h=u#!6mk?ArmOG%{aC*kY9TkGVJ8~) zqTyhtj8}(x=KXvqfAF&dr4zr|J7E699Ya@tzG>u^PuGmy`_b}AM{X{hcItTbl+)Ws zPC2u2^z0)W#>_kTrjVnFag>I|XjqMg&Cw2WcS3V{piK*Tq+`bK$9v>GKapGc?eUW8 zM@RZDx?ewV-Dg`XcYVBJ_~Bc%V^6%da`NdTv#ZZ-nNWRh?Sz@f)=ZpzwtyNQ}pIk9@=CL=09E*mT%psQgvHk!JTcYjdzLq}nXsZ^l zPq)wb`9in+rKAVnFaP9ZY2EE3ee1998+7>XUBgbE+dBH(q4g6ktY0wU z!t&WuPc550r|R5+UE?mSTRHyXl0{R_ELu4IF!)*HJax|<9 zHIv(;?c^cW)ShY;{o+zayYH@d$$Rus&!L~)?mhk1yLmP5yj8gQ{K=9%Ck~e#KDfW% z$vt}po!h#r>O$?7ahDdZoP2g(&9qZ*3OSy5a}FA6{8$5lh8<`)9Pjn&Tx!&hSKFn3 z^I_+_`}eyKz4LLeX;ieEuvup5$RHIwj9Ynxb8sF@fCafoL;=U@A)~qhFq9gUwwA^uBoTr6mk+8=A&VyzmeP& z+Eng|ZYEF0d;I)P3hOZ1rrrCdL+-7|nU$A5?OA=C*uVaAcJ1b~+1pkh&)K*1Q0|d= ztPz?~-{;JvJp;~--#hyBgnbiEzA0oi8W!M}tNjgSU8srN8*TmSOuXCoH&R0HKWvls z@%Qa>t`c{jy4PdEzFWQKZXp-5=0eWKC1-NC&t;9!v?B$FCLAm|HgbQ(iDCPPo)~su zosm^Bc!{;YurYr+}`s}~pIUB14X7<%_n96#o38#y8jyln&e(14^eFKjUJ}}_u&^Lyhj)tYIfmrXaFL#C+ zu?E8Yhj-$f?mdbNz4cQ{>np#dXB~UixqRoB-K*BI4rAG!?Adc~>dsW>}$TltwGZwxuhO-I&vvIZi6H4xzjzn_X}`uJ9~^9N63L$AI} zY<2o|TKD=NJC$twru*QU2U(-%eVS7}?W6n|6K)sI9d)y0;jrtaO9s7HvAplq{wqqa zRIcuGWyqR0g`DTAEmwQ71|m>f9*ER^ej&>AqX(fbSAS{|dPXKBACPT2*ZtZtcjfop z%IAO6Yw+|hb4E<~qG0T(dqoq6f6{02;E&6u_5Y~vjPg4JW*6TcGPmf~(0OkPx!6TZ zuJ_WEyMi=d9*@v@@P44#d*6n*o_!S^d_cx0Y?ZCrt$N*|`@$F9@@72m)o0>&x&1~z zDI7HX>*67mk4lFR{IYyR--rE1mE9jWrud7>aYdgG8ULn`E1Wgt7T$)jKUh4w5U6+a zp|{2PUxHi?%E*8%GOopHnUcCtX0)FnGczZ??A2@ZuerIye=aPj{IR5X;PbLR{eGw@ zEC0S|gYB$Qwhh<4qX5yv2(X{u;MGbI`x|gO|l&>F>N%hBaR$TZAo? ziLujVdddXZC2gea*?y?Z=`>j8cNrjyGW)(R>0bV-tVij~imZ~C{d*PuKB#x$Z-cYn z6mkn14ta>rZ#2`q^2EXL*lTx-?XtQ3Y8mXYKt}mbmGPltWt-?K*)gtCc2682dnNak zIjQ9`zg4L$Y|}>;rxnZ6wuQ1hJzw@~moNLjF=V}icyiH3y!(}z_Q_xE40lOqvo+GQ z*#hbBG*yOqjFB| zxw>tbT%DODSLSt;H5J)%$*@wnVEmxp=gg@3W#+=M&!?@J_-yjpDNiPDp8oatZ8N_b zvwQZvk^AP}9I|iTm4W-_zX|@)&}grJ?C_5r{ywn-(V5c1@PGIi2L{vRqkHS@CMgn4vEgOd0dz+LRxk6~5fmZ}2bchK~Jy z)!6AzmQP*q)#AC!AIw|4=HAQ|>pvt1a%t46tw#s1*uJ;2cJrR08`ixEU;M& zY7GP$)}Uc?oQK@jDpc<5kSurg==yqRLGg=i^V`jXm5Sx;`$xKS8m-pV(rE^L4$^BG|WZAvLM!BL^P9I;#fPF8Y=7C zC(C`^yT06;Tk>;#pJC5-4w(GS)?o|2+AwDI7po?1{&dOo-FFtw-+O8Lg8hfb%|Ecc zYX1Jsqn7XFaNV0g4rg9J84dGT15ty9b!gZY>n``CvKAyGSsu#F{Pl2lpXY~)tDf%f zJMHnF$|VoBRIUGP-PrASR!`n@y=M0Q^YiB&+&5+Jq0M9F9$GhU$?n<-E4RG~WEC2w zpkY25R-j>hgpJ%D>n8WI9_3KGBzdfB<}W9D_xb)r;fQaJmCg9_K>y{R?;5i4!!4tB zTw6b({_Qo>4xFi(b9m3}*+ve3(&AKh&2#s*olS% zDXaxaPkME#OXg4KvPz$x&mZ~dT*<8a$17IcJveCNja|ccyj?eT@3{@t2T!b{O;!kgP>iYCr_tHnVdXK($qhR)jS4(TI zz145yg=2$uoZLTr-@)Bu4(-`deXOo-#);+2XPlU~aPFZw3l`VE3FIj3FdYqx(Xa*$ zbrDu_FB(oH1^#*=E#doX9lJjMs9Wh5pJk2y@J`;W_ueh8xp1*;!$}6hL&q!k?m0B# z(6;^Kk8RvH{lwz+(@)I8H)bqbwD(OQN4x9E8TjSW<_5Ahh&2!qmU4fr^Xt<|0Y6=7 z)AGrO9lG3q*tPVdFM5u8@57u~Z@pKr{Mf~k_5040ZQp*p|K5#9Di8f1_TDNk>a~0S zA3L_X?d}c~0fUf~?(P@{m|>C`1_lNgVrUo|hVJfAKtV*rK*f%2cPA(+2Cn~_=lT5) zMfTbE;5m4|+-LW-Z)V_gt#z&SDQxrW$ZieoN@NJVdg=b(#r2A)wT^>(NTW(*wF0$dvlM=&usipqVMj!*Sa}Fo8`FBRQ3Xg;}BpeQ|N+fEu#5OvPCg%s}j!3mr3et-vC1N{*wGbKF(lN z$zTRz@2uh8eT!b*-nZ$_2Q{V3-?fcTd^I7qy|whJeqt*qxZ@C$j&mS!7l_$mXGw*D zr`=0^Pf#k^$LTfnW1Kq5F@8O{C%pFG0Fi4ZK{{$E+BykpWv0L(`5A*}6fpy_ck`7m zDvD=8)3D>akz*ZZAc|hv^2CoELPd9+VU!WhjEUXPdURWbnEjk zcI^u;`8Po9>WPrGZX%Rzn+Q#^lfFwb5Z87sy83p{=Dr`w3P(XhuW7{4ru4IgTh=>U zR^kgsUgQ%OLGS~T(C@BCl=p4VSk_Ith z!3@N%@q?%2rr&tFeev1PyEk`%(%u$O*D42nv+VEY4oO3{?ol5c=^<~3Z2n6(U+#0V zzt>Z0ApJ2z;Q7ca#QgzR=yKmL!s%XMB78q0GyeXqjWhb+OD*aC zCAX;=y?;5Vs^);MUNV@ONB^|33mJBF;eU2@^ZDdXW)D)R%nvj=?LC7@dF$on_Qsn- zeC@||dKKXFZ-AWT1gOOKbGL3Iyg9RJ;-#1CX7zmAx}*hU)|cZ9Mm8wRCxNzl6qx9Q zfVGi7IGXtY(TY9dX2bYNwx|7|I#9mRojitF&ZMs#qRSUwSEtYZq<;exqlU({grQ^W z30EI4nsj1l&8&m4X>kQ?U!4uRwj_d*Oay4`4Fbb`Jg`vbfSo1_oOI|w)bj*4eKL>@ z+`-d`1axB}uuPnRW9kS#X8#7LUPh2)Aa2~5HtzJmqABgLa#kg*TaW`=mnVYEx^R%+ z!UvTd-k>AP0+Zb|u-Z!jI|UDLQgR2EeIy_$yMnul3y@WufU4>U3^jZ3QvWx=frW&B zE=(buef`(?j*20ax4MC`lq*1yC`k=7Sz57@zH+Yj{}rbv;316q?;>1s*@HZQB6VQYX`Amt`MC-frw1T&#*%Fx8Mry=fFDN zNB?I30bX0cJD<+L7o1~)d(7j(SE;=rV+H8wZzMUbF!Y;on7L2J%)J=>MYfg{WSc5L zrh_(Qkj){L;{-`TWDrHOe#WNy3`gehhlC}9fsm@ucYzJU*Zcz!&v=KU?r=JyFEBb{ zPt%S>j}>5|21))P9DSB(`)o+Vta%RRyz@1spukuG^6hjW*WC)TSgw%4r;j9u`wS-} z1P;ZD!v>-XBHswhV_$~Uik=2FC*9(;B=@tMQ+t^$DMwlDqOk%je69~b4U!B*;&#ld zp@w|Sc^B=Mf)XPID7MjsB3ElDpu7Ld@%8#94ha}ai4hJYq{P07%}IO_S)BSrSSh|0 zRFi$qr#iQrS(VqpX~<~vK9D?Czza15;$1{bau}!~OAfOjsG&?_8Ph}l=*|6-8yGw&j)-}invnb=Aw&Em2J>>^rA7Vz#iiZe#ii|>vZ5BBn(RhiQ`%So zHfoUc-D4#=jO~~OK@G(UE1^Pt8&vBnK#ipy)HvDwtRj1UE%)MmEbU}e>RRk6E7loId z%!{u+mYrVPnwnK#jdh2Du&nyr@S?Kph>C)-0zT;V!capZem63=V-7kC z>+BurGS>atW9@)h5az4nZXpkk&_(~WdS{-m^DjJE5mJ7%IJ)LgK~jBlj<~TbIioQ< zTHGj(E~re8DJvQ);ENh0efK2PkcAqGQA3TwGB~J$J2|xXd_QiaJJe_8@V?)H_3Q#E zqHAJ8W zN$)>rJ7yr{Fax20ISiGJBi&lNzw{aEe7Iy`|LT?<>)}me@bwE6(YaoxxTnLr@X!JO zispup+M3$vhVts<=G>yRmZXfd=J=GHnz)qW(y;=5s6o_GY|?(phIQD zw-Z{s1}_?Dzq)H~|M;EOjs;!pI`#U01lg)N=F%<;+%Q;3t?J4i_(jNbs{I1Z3%+8qRl*4#_by`bEQC59$eqs4o z0X}Mw^#0RPL*e#mP$f4D4k#@Aeq4F&z?JXc*t_3=kTn;DULtE zsEEMKK``pzcZXtHNz~}wnO)0iDJ&l=AV3WwN&aBdBq-f61sZn!HPWdtci_D8iia3z&%@@Z}<=~Q4D@pMe7>)Fi0u>#?% zCPEtK4+=L=fEwwE(7tE#K;PcKZ#|V;d0|LN`ULJiI1D-_wckx03kR(|(_h$eb{7*f#|lKRoB)}t$3y96+yk+5 z{I{;%lOEsLG5gY6*;OZgDC{^4s>%(ZtzZ1h#9I8-iYOYir-r|EW(VOO3SZno;q{mr zOn=A-Bj0C7knZ|KIo}D0vAZpdwZ0V_H&#HjVm!`)UQKx`&_#SFwh4^~V)^I`M6PS~-s0d{XG2Blq@pst(% z23iqdVJHB5bH86M*50^-g7uB;L>s2LP`)r--9NM4T!*+G&Y%3r4ub+u+d(09tUwOF zg|uol;aA&w!sE+J$M?TpF|F&zhIx&!ZFvdEZV-d~4iTvC4hP+RfncWY3$|JuaKb$l zL<5`wF{S{;)E($%Bw$()fn(_eK34X>_%^m9!M0-siWd{0X$9f!sYQf~&*w}y_I3I6 zW>`C?1hWTXoWY2P-J8N--*$h{l=TLqJxs7vzxPwC31Bj|_;I8fpWDOUf?so#F zrUS6G?7&Cc2K;r#3e+ql3>}?IxO{yop?7G`!~?K+dKs+zTMX+L#KX2_p&+~359HUg zKy@<>bheSfc)J@|O1pxsj0-r(I)k&EBe?Ez0Jq(CK;CT&)IHX~+G`0M`LQz6K8tYW z++@P3yOYPY!OV#jFn?+mESVXPdpSa3(?TBXSn374R(OKqYIjgyO9bt8&S0>?5llAP zgT*FWu-;??cAKriVT&a=Z?OQ^t>)mq)f6b(#>%x56A68nCKC=nm@uyT=d_7AFl&kk z>mZ@9d=?LXCM?)8p90bgNw9mdGbk=~0M%u7uz$G?Xs@sWy%m;VxY8Uk4Z;Quw??aSfuP)CaW-oP+-ba5!JYS%wbH0 z0Q9^_I$gNI?zh>Kh^5 zP!`f`R3O<+4-#0G5XX0d=ty^nOrebk^Sr)=l>NUtObh?L@8Q6g-ra%+te)U&Sc5s| zc~WqSdWJt1z!QD95Bg0>4kJdA6~o+rI%dtY)HXu4fh=TMtAdzl0I75vNaDGU#D`J8 z$A~z?QJK8Y!s5We;OdZffsMjfe(jMDygQ<=Fgs(;&^n`!GkU^~GEWM|0#H$dBm*JI zVZ@<^WSO}jMh$tY8z5g_7V<1rA=lXuvZ!_=>0I}3$pY3FQ4H^6Ose2RWM25&u=40v zg1Y!em{Yrgb&pewmXsc5yQtHvE2^E{6E+rrjvhh0^*$o+o zIR}yta1X~e`5cWH3&6m>qNMjP$v`Bah76fGkh^yYl&GwSG94KxH¨d*kn=?oOYJ znDl{Mp5JS6i11lbwCG`6a_XI!oQxZhWtkU+wb?xZHQDXHwb{+Sjp=oMZ3)%bb{z}A zL=FFc?>`Ci?wP2eVDDlmS6&BI+B>1fMD=H_t;yFKlFMKPjrFG3JLqXnK+OG&@T8lm z3F(&;#aaFFg*m5Vu?`hlmDdB%c=-GkX#ycC~hnO3q6*k=Pl{|r=W&x8QjT% z8mjiKg*vUB&}gjst=Zb-Q?m>4T?2*vtcn$Kzr-*0Mt*4W#jIHIne^10-jv+D?xeDU z15xFL)gk4DrGo0*qL8Mvg3z{vu>fAEAqa^@4XN9vLyipY!`Opa5Ui6nX-bbAG*tQ0 zZfP=b$jS9(ySw+pCPv6V)jqKoN(CvW3nDWy1CrY*78kUo7ZueflopppmKI}FrXVMx zAtNi|VA5CsHl8o(yT{>YA{{m4p@yAJR0n4r zD38joD@iP>EXu+QC@RZLD=JGu9f_ivoCHx*+E@T@^qP{syQKG@i5d!V|6UR+vn4IO7AY28a=yXL%e;>g>&IFIru~uBet`Rn{uEjFsrUkm|sy9S6oz)UY1*4 zSe}woP!W?_R2Gv`l^31TkTDj3iyFd^MErhaZ<`9mxPS4#8Hg3%deyfMT+~&3e&5*e z{&OprtM{GQeV5z?U8iU<2aj@6>JIy5l^?_^LsN8dc71YLdR=Zsd}%?2FuR~4EVD8{ zOk6J>3*d{sI~++y4LPWxOy)012I5zT(z2nmYMWl&*4g{;wW0paw-(O*&m34icU*(o zFL_2coM9zb^!j8LbOq*Tc7zwFw2R8(TQe&n8VV`{QAa>wML|GbUDj9t9-c4h{iovo z#k_4(ph9LkH0_=J{ixEC50}+8JbtdZ=k}n1-lb3GjwjyPF%Lg=324M#d&PBnQo%)U zapqb7+@w>XMKQgxrNW-{a>3EU3g5=Ua&A?50k^z1Yb=02djDu79rrI5;tt4a*{N`F z@2t-!_APjEM|s_y4;s6#4D0IleKWQ17_gx?Ja_ajyYCvFdy_0myTV9|zu=u6evY3n zI1^UncUn}!J)K#~I$m7LXsgI)G}UE|1qej%ABSXan22*A6QNFa%E)2)zuuo!oPYPZ z{Q4_HYP-*Yw$9NJ6WgYbR+O@rcD~sUokEjulVYQ82dAH($2co2@I0GP z=y5i?(Bn*XuE()@@mPRh^!^D*&V~t4E;Rv~Ma2F=}e<_@SkQsmH?uNh4zuU2>=g02#2eI#84eBbmi$X6Q%q`-++d2wAJG%u8 zlBnEwWES%^&D--Oi|6*7%XfL|AL#HTSYY!wGT7o#Qi$oJtT6%cOUFSjzLE4l8HlZf zS7$d)xb$M(jNUI&i`qbLT{S4|C;(Lj+(n@g52pG;ur&)DakBCKPIC16N+Hrd)7{9Q zyvU?MuBXd@Kh5ETfMN44oMrho-plN5DtjzIDtiB7)X=b&@afn(Nd{u#sgLVs9)hil zYGCK;0@$-P4QDW7K}#_VjMW3cTH6O4^}WE=m<}Fh6x=`I{)=htI>NDa{^@J)@FT#% z_Irqv)wd{T^Wg-Su>d&>um*w|h(pT>H*QTEcjo=#$sNB|&#Z-w3kpDLMHF=cZvDBnG5c2E(59KA^mr30hKAFqZZJOBpw? zlXC^9-7dI$!U^2v9l%q;4j76yz*e#ZpMB=QS2l%Um9YSIa|kzkClbzIo{{Xq3QL_pb-6uguCN8&mDXUm$`VXhTY$xCGq7G`3U+Ia!Evn-5Z4)i z`#L?KtRD-Pj!z(*xG;&(dUrgb?A`eBVwgHH7V97aSTK!?KNC8vo#g>r=D5I)xeg#V z-v;CtSi!!97NEA!4E8TF1?@$~ptslv3>O=M@e%_tTcQV+OLf6!sW#Y;32?w&K^~i@ z0u%Q~bFr`Ij~*vLaR~(PUjspATY&E@13bDsc>AdUJ3<3kDcZoy*8@hC0nl2EfZAyc zlw&5~amp0j&Y6w4UN-+myk+s(<$=Wqr^lAB9iRM>|7rN|9sQrU2PgbIP|*8(VWyWS zH4_4GzFL6&tT2r=5Mr_w1dg&0Kve`^Up3$gwSbeP2kcxUU{#m^v&jq?hb@5CZ3&bU zRwEu~t-rZlviU-~X*)o?Yxm0ef!$N5M}Gudum&Q@CwZgy=cDHhM!zYrOD+);xh1Hnh%EyS8s z4DQ-bK(Cjoyc$wz z_3re0&g$jgrkx7BL^&OB#O>E9B4nb#R}lHD2I#X1_<&c=~* z&KZ6qr{DKafG3_W>HSOQ{v}zEWX#-)F>{}%v=TEhTcFrz@6RF|t*-^H79Vrz#J3qf zjOR%Lzeh1qA$KE_BW?(DqA!P*#-0zZkM9d=k3Yg|k3Wbtm`0!O@LJ!Kf-2uL{67Iy zJl_}nrX+t5kG1nO%-m<=?!!W?QI=_JfeOPt-zu!MK9##zzAN!`d!EPUJP`8(Zl#2W zT@i_*`eVg$eNly?-iWHi?(hRi2ZIhK)%vw2RPm2Qm+_B>mhevn{RyC<*YraT;aCSr zKn)qV`!El8AC@RC$2`jBpLP1Xztmf3eW-V`dRgP)@vw~LeKX%L@KR=o@Jw1v%!#BF zQMV{JsUxl|xh=LarC!*aS{BrjQY<(WmoLDCN^o!Rp8y7G@JA%Q|0MkTnTdM`3vl;g zx#BXYSKIilS#S5pgJzmhWk&J|d_SDSemgM5J zhNRl`irB{Vd|^|1j<78uQ`i+H7WRbw31H&+0q8fQ@GeqNL-vlpa6c=)mqcOduNKt} zU)y!$J{&R8d~wXy>VBuI$JG`p=WGop@OUXdyt6PYwly~{u`w$xwMLwuUXf8LE=;Z$ zXT~&$)1nU~r$lweBu90J{|R8>`I5eS3~EWmza6R zHJ+Taw!C%1#r^zY3g<)371H!FaH{+8KeZP2BMM+JLyqs_>-Za#31dsaTv@T%46!Sd$%_QJWo=(wH8R+%Aep zJQDpUfQ_Cz7?JEA5Tk}d)KG=_gBFDaUyiA)et%JO`;&+Isy82)oA+OHAoZMa^J?#= z1~eREhgTl(iz{jjNy@2to!p8EPz{;8&UQvA}FYjOyFXwRFp8y{`Ux*~3hMdhf ze;_>>GZ53ibt=q$cR_i{gI6kBZVc)uoq16&ZVl7caaR zk{s9{o925qL(J|g&Sadd%cLA?$e=VGO!TZf6!#~{mGa!$80Oi-lduM>|T^8yD}Sryc=O4`{;RJDh00n|u-b zdUz83YGNwoN=};FmC7_?e{(AFSi6YW(Gl|}APDOZ@#z1va0g`BmT}N1J?_iVT@&u# zmYR9-joi|c!%ACl7p8n8=;@UGGPBJ7V(Xgt-jNaUlIR=wgdD_uKo4Ww^^PLn4v2NV z84>SvGf8CsPoBu;X0^!ndS{Gn|Ix@l0inyrLo((M@^A+vW*~mG?jXEABQ^2Lvn{hu zf0A9+2@0DVL3K|#XlvwviE%Q{U_{}&I0OT(G~YL5HuEKoM|tWM;QE*s?C?+!X8j;a zXmLL!-1L5axaq@&P_xHJg8u|WV*Md~Df)j&24W-j-?tEO55(j%?>Eox`nhvi6YSko zj@~~P_fV#Qu}(Btn}+;ywDJ2!a^ifZkmw&76psN8jrfkoa(F9X+q{nOws@7`WBN+$ zWAw7r*Wgv1&!2$!MFhw}4V9}1Kik$49$Z~8?)h(ghRjno>&bF zr{=-(nMqg&iNG0*K-jt58}_cIfyz2EXm2Ee(PkI0*y;#&Qn-6!yDgBUt%1DL0_ZYk zz?L-uUpXTP*rg9)yY(RIk3hpT!quJ$guV-t2_5$*j;sAOeL^10nUVyHrbpmhOaN?{ z%K@o{G>~254hqYNpt`~lv{u=H!D<^YSz`s3Yc0Tbof$Z;Hv!iTM%c790P03PY#!_2 zV22j?|B>sR1j5-q|{STNNSmQN?a+8K_pX{HTG{cQ;{v&~@lY!gtJ zV+6``4M1(KKJ1^T3)=H^KyRKl7|z!Olll9>e7-tZFHi-$KY}Ht$>1!hgW$WPXqaVX z;vOrO;zID+zZ|^G)&YaK1*r5L;NdR|ZjrlzlqwIzd_{1nRs!c1WpL_L0mqZbMOCoB zp$0bh)xi2G9{*bXr_~4bFP4MqLlz%L<$oIfdp}@}&rU9q8U)VLGNfhz8~d9ag@xd) zu^hN2>wxXN6&N&Wpz?MBIb0sxla#Vx)stASAgTg;ETN@@`3ExfST?1P?{C%+K<)Ru$4;dtHfgZ9Cs3KLMWN84oR1-Yvw85=S7u=5Nf$KSaaJgde%ju56H^;|@Lk`ak-`l-1 zdSUz4=&9{{!_feH)ZmWw2L{$bxR_7!L%$iYcL4;cE(d}BdI+$U0zZ#kz-21}J4h9n zv6?_n*8y6gK2WO-fzo0OKG!g!Knc zM6#2EkDfOK_x%fT#yfJ~a){R60Ff3uKhh7=?K(g?M|$mX)%_9aru$9Tdv2FWk6q4@o;#c&y|5b%a6#`+L%4VsL6`** zqK0Vnym3m)@K4SL%;iX9E=K_(C>kT7-Ui=;g3Z7B$J!0~raOP+7LY!$D?Q#Zn>=6B zJ7_Pcrx_2)7g^WcuQ4xp-10o_anHSn{D|01e&ReD;EFv(26{~%`tA^{2SwrRK^)E= zB%ydvN_Z}~wchk4h{~EWMf62Rxf0o@B zaEx(=-@&|q!`@d}jjUUq4Xiul(ZKThBw(Y*3Pc@}JrE+?`Im~j4>Peon5VW5^7MCn z%e7Scl?A6OE2hF>Q*>E9mQ!#fe&#yKfy z;+*xb^SD zq1aLXWj@*VQKpys?G!%qib%*kA06-C7m*&=8&(k99a0(EDQF367aR*~@aqYy;++aA zwX{u_%LQcyz{?mjHQ_dk{^uO6<_**;ips`R?rUjK2q zo89d~ru*eAU*_4=V4o9-(fsbXWI;!Cc4%v4X?SyZLqvnHGonIpG@>Z*cxZlLA3rDX zA~!qmGJ7<@6Eo(%=sCksLp=VjNW;B@Ik@|JAr&o(xJ4>Gw?w zxxgI_pyByE^qb-MZvtu%A zWo*`w0)OAuoY25VacpQ!T53c^N?ufPQbkNbVoPjRTt{qb?D{C@p+>QFYDzds=dru9|3_JY#3o+2cxTKJ3Y;Zt?OhsrL)Y ztqBRssEmn9E=!G#FUc206;~$+3)>O`QAa>}S7boSNkKsJ>A=wdFFao$@x7=aVbGEsY{9#*5rr;RTT;R$^!{p)WOX?62;Bx4dIGU1&s!9P(vvC?u1q7y>b6y3FZ&# zWF~z+yl2|$euckpzffF$@y&ke-lv8d?RPEB>aRLFm-V|-^G?yc(|b4p30?fqsKa3q zA#L$7{DYbCJ}sppc5_n#qo!R%FFO)NFYFDW7n~G~2Ke9|2qoSd_b=vd9ETZ*@gq$# z69$g$o_zm~?BAE)D6HuFsIk5Ct-fmGb90l5hxU&7w_PdeS3TLH3toN^XL$m_sSu&x ziP$JkPkJn)yExACXhXdFp~G=*OWGsNp{uh;8G(w#kfp zbygZ@Af#uX8IWIbbXZO5z>w~~$`7Un`LAtk)1SMzi5`0}!tXPXp<8_e+IQ#0&M&j{QfX9FBE|JG7k?I3741I2sVJbUef@83&oTfAK#Vh^>Tyqmmhj zwNv`v?wE7z>)w^Ept7yzr|R-m?|vu(jXh#8&=7&8VfcuHb-*`QXYV132YrA+cYo{c<@`E;Yx^?7 z*YZWOpXu{_KjRm*env05ct#KVy^Zc%~P8~A{$8S@v}hVq@^KpOUTar(@6v-u<>TYQZ7 zG#M0A4F^hSdIL=~-NECYx*yMt3dCXkAsaIgl}iZ2hZYj9U7begd$(}Hk?(7!H^7#; zC9rdu81}3afwGhkw08x7sUjEba2KYF78Ts}+<|7~0&G(U@H4l;hfj-NF*YV6DYk|` zbM5qhRM_kMXtUS;(KRZNG7Ilu9?pTxBV0d?8Hn3c2}g$)PHciz(@SCFoJ^2fECRVz zLQvf32kP6{U?4*WtKAgbf$0XW`-nhMaRjEiEpYc+L4cMS2(^tNUe^H9_4J@XUmI!+ zw4iBJAZG^FKxPqc9K}5l=cf>kJ)1hN1?EpGgJsh*u?`}F&GW-RdZ|C`S;Ye7byU#C zU6{sOiD0$e2^^*E!Bxf|AlCWK3=L&A17NEwv}%>=@w?(u{Z=Oz)_ zZjUEaycs_(3#LsJ!R#raSO?+3%9$+KFq;BWbBQ3kz!4M{*@Eg~E6`kG4th&X!FZW5 zSS&XL+ZB4?v{DC1tF(Z!N&}dyu{pg)1sfB}IFL6QS2_rU(>RNA_|kYn^*sV1`z2wV z2qsPl#X5*D%$vf5CDX{TdWH*ZoN15s2WzZ9n8V&Vrl2&}2-M~ofaZKX(4DUfh6}X8 zbb%&VF4O?qg{t7R2u);1lp*aIX0eft9A7J;MOxX6nJ7nL402OAC}|X@c4;4bc2s4Rroi z1%tnp!FaY3n9o)K>)G;PJ7+Y&7;7NbTc&{H)~Vo%bF3tMjw30~1LFS0;AFlWoQP|{ zfw2K>{WpVkho!-^R~k&t?*x(Uwwct$K2o8Q*z%D`xY?60? zb*?m6RqO=IMj5a;Bn##}vS8LP2WGeAz~qS>7`>GP!=YVZ_{qz_xBT z*tG5e>+Zc^eO4YUugin^Lj^EfYk$)UzV>`zgvD(9kLiw{b2q@^_9hU<$EjpIZ-UKC~WI9}QfwGYN=Te~`>!cws%r2YojW`@DYp76V^v75G_f z1RvM!;KkSl3_k^+hARUlNgX_LG)LS^wSSQ6bicc{=?yz~8+>uICK!sw;l zYvV_D?+x$Tf7H8f|5bZ5zz%nryJ7u7GKaxM@9&Rwu|S-47bq@&rPU3m zx8|3e2aV4NBmaKQS5hs6I&Y7m%#2*TNeQ1rY)g+&mlz6v6YHba=b3<$^yKm0lB z!#+W}L!4-%kE|5)5ATyzKhS;jHT`o0Fus zmffTQ)6oDYJl|8|z0r3E;LjrzzZ;R5`;WoeT)gT^h%?v>F*dT_BS`x`hcLAU1O1KO z@r0JIy+!uVSz?zb^g_1>)M}5ro(IXdJbTF3DVNC?JnoQBdORR?lb9d%m;cPv-t<{)CifxTQT0_4#o%!a+vZ+)fb)%z2)E0D z37-A@OnM)$hNv7RJ(gLsFSX z1$pd_fO79PeiQeAf0s{_-vys4pG%xFpBwZd?p<=8_aowHfIFVemUvDi5&mk@SwpM6cHHOz-Bf zV&D3ZT3(IdkYA;s&#%b8-#5?yidVM(O{&=MzFWp9fCp-j_&ou7?-<;-myCZuGV%S7 z`Mdu9R;IXUpjLhD%SL_a2X&Sz*UMcD`wKj6PGqr(U1@wudr}y)MHI)Wk4^Kbj?VWh zkF4SshaU_m5cURS3C{2{1Q)$igRV1@0`7W@20rpk04{0>LC+V58dA4R!u_l`hp}tc zmuiIt?^;w>KRv9w{q_NK<%_kB1}DqNHb)Cs#I|f6r70trS(_T|U6GW`D^AGa=f{@? zX2&)KiDSBhlEY60CWf8&6@^}9#RlK#ls$LsjD5P`_44m8+6yWS+|$biA{xE&zjsoR#{ercVR|?Urw4hK%7z> zl$um8NK7~q92IYAQIC(qeyBK|vTdD>v3JEjukRQCuL1 zORouuN;?!HOzI5@5%uwd<1TUq(bpNH0d)NT_eb9wi5ik|_hAm^4@!4T`dTM9^=-%A znUBvaF1&t4eM?`Tp<-9JweG=o7t7ific47?i;`dE%gQVd=BAWH`HKpZ1EceEgN3=3 zp~1MEG$8YMs9);oK)}WT%qA`~xh{|!R~_ybQJD}JT$UBgFD(o8 zEp7?(E<7IUm3=D6OWe=%O1fyF;mLW~ z$=9CA&Fy=vvf;>W-962h%rz^|*qarcAUS4sQ{9s~*!0*oe~$1#C@-ifE`ZmNA>h=N zgfMFwLuu7Lp_G!7L7oNYc%C^|*q+%}ynY9G;~j)byf=E@Y|I~&ZX$eb+(vkHZ2P!> z9&Vb_|9aP)p4a=5UGiB9DdrT58hYH)3D;IvdA=SQY9X@Ey^{18gEc^~g- z{x+Lt{HB_2@T#4z_p+a+cmD=O|I$5o{foZ`gy{QIk-~+9k%k3?8>glc&fl3v=owlz zu@yG{RRufd=fmz5DX?#2G-yhPfHA(0!&;RM&e~M)FmV6HFm)c`TH1~XY%PAoIGB7( zcQP0*an>DfanTw+?xHnx)k*8sJ%`bN*uM#og&By-xrCcXCJ`>+97gY(8RObVmQJdH zb$=DWws~nFyEFzA)(Svvi!bO)dx7O{DmW^-14)GlR1L}A32Wf%nnQ%XF(eu4L$0w7 zE@an&L#F%Tf~h*(H5m;^nT|D(nS`e;6A71&Pa>SUf_oS~&m3O|izgStYMjH^G$#gj zEDFZE@PU168KAX^0>;}&U?c4U&aw{RvC9S+do936!32VoF!-yi3u!7^P^7vamvO1X z5p@-~id_3GkTaDqR6CJyr4us{eUk{CIEPU`{MYy*tb?S(GMvL$H!B#n&hrJCMGTN% zMg~>fg{iyN2~2PYru9ZEaM~={Ibj0yZHD+zt_wjsv>-}a15#wvumPY9HL^<3DK{EW zHlA>;6*CaWClZccoIq&2H=a=ZWy<(;m^CQ|>mUMH@s|&5_?veh z2B5n@4~!RTgT*3Euw9JJ@5L(Mu|x?NOBH~-bPohBlYkMPqiS>uwSbtEMr41^7Yl6n# z8lXK}4GdnKL2B~S9uydLQ$W2#+z0*}dar!<` z`AZ2j{!#$#zxIOuj9p+dLl!J%NQ2$X(ZF@DlfY!_WU$29O&iQ_+RDuUYsJ68QfnTV zn=b+rm!)9HSPuIBD?ul6HE5@-0nNNMu)lmQXfz=0YeD_kI#4@@+(4eJ1C@8{VBhDp zp!5@gwV?P1^iYE-K0{jJv#cHN(XyAF0k#T%gSF;7uryl)X3opNguVg{{Z@f~#2U~| zS_?Wk>p;6~9cb0B2hBF5dp+zwvmP|ABahdE+S~P@GKBnC56Z9}_Wc2Ue0DR(8icK+ z9)vwTC+w#=%FhOS_4#0Hycnz;mxBd$6`1+10h6$GV3e={46-(Ye#u7AtK9^82R4Cj z*JjW;ja=Of+7CCw{@2K-&7kpPGpNIV7{Pyk2N+_0)e`Fu_L5o*&NUOIX8>`}-{7n| z9~=yqfStn%u%)a9EAMq+5wa1?;x~b*cncU8Z3W}%tzgu=4GcTBfx$^BFt{uQy7#3( z=N0lv3bemV{nP@f@5mQz*!DZX7;7Nb=>MHi2MKqYyJNoEWB1?SrZOLhx=X;>b|pBu zuLTG8Mz9Uq0@g9xz$#4&EDCmjd4)8XHDXQd&`zw2$$;?%88Eyf^UL6+>`#M_vfuQ- z%YM-Z*^m0L^SuE`{|+$4^CkYDh;|OeV?D^Kb2vwdCLiSojep3$Gl9J? zOtp=LQMxfBPfg5ipxQ5Ar zOTwNJr%d^ujzx;!?W^|<+qEcvu{omh*{VOZy})V*W-MeCZ~ca4j7zf{lJ zf%5MF8}wEZ?@g7|Akg#jFryTJzB>?Wb3v+0F_*Ijb2(c^cUJK{ z_#I%6=ewczW=LufSO*Ej&t(XHH^Q(s7p}YnB6QaL471$&ML?AM=ucOA$K~yR(9$TQ92J~`j&jz@*{P4{NoYi?b(mr4D$=UpGzo_2j>-9!3dc7*iFsGamh{~&2t z=XZc3o=-;K>xI5sG6NBU-_Z#CZba{%4Y5j#N8&Zte2F#NIuPZw`*kQq^%K)rzRNLorRx!|7Sdr>H>r(ziPX$^K&oXtbF8GlvMHm#H!r3S zn*0uMK@AemDe=9*cqdW#`$DuG>mYJ7e`Y8w_>`lz@@J*!2Cgm?2O@@z4Ja4sVsk9qPOXOzUA&~na=9{DV_!= z<2e@H(R{nZ;X>!uuy|5aaJol*PywYnpo&_~Z>8Y^LRyjEO=^zsJ@-tX$1dsI7j~)M zZ>)a@xZxeJ(C_ilZ$+R65$+vK!~8+E%=FKNduG3>+_&U$wa$i{<>tH2=R2yO%=9qm zN@ZEJC-CfB;zC^Nqhs8vBT~ra!W?RGXgNJUxS5e7IL6ErTxFyM-1bcNe?Us`d+I3S zy|nus;Es2|#`A-aDBSs%guQ>U)TE#JJEsno@1FIdL22Q=Ce8JiYfR)$m)fg!=errS zXVER1(z*7v$w9=5gb4TIxCGDqm`r+hR54Q=QO`;ZKjM`nyyz7la)TZ#xaS@n_}C>f z;JL%^05W=hiRTQ)o^vc}NX6cN_O^+`#XG0Gui5q26P&}ic}RV2f0L1HPqnS;;Zj%q z<^rlkO*Y5AT+AmHriFRn`(UWz#B@fgsF0NySHq5vIn0TPKF5iOxW*KQ-JyhrJai2S ze(Ll)fPy~n7Q04|;abzI;@5&8aqnoktq2R1Q=- z>(!P~%*%>c_Jw&oVs>`0N4hwgnv|Zxj8DyDM9$sjef5ow zy5%+QW`z|DyX;aQVp?I4M?!uCEhZ= zxX}-ZzXRx~LE?KOQAaZN{&Ut7ew1w{yldJv?oscy30JR6%{VvF~Qiy=$Grkh^9*!~Co2s;?Q|r*`dcRZVjBSZjLeXkS*r@XFkr zWt;LchYsbX^`Fm4>%N+n)_F4_t?h2ipMZ4y{H2)vn=tcs#J8QbQ(Jf3(`f8qpxxk}NROpE5(8VeW<=C)EQqaGUzt?At|={dvO6nd z&5E3qRnxhN^P6`1{7vHqYB`yY%+lXn}W$$jjB zc=3T4<=A62wwXJIqSJrdD34!oV+@`0w^@2D+^zLcykFh^w9xY1dC`SC%M!A;H>9L) z?MhGBJdzc&aVk4vdT(~v)Wxi@)z?zPSKLg97`_|xCm`$J`wVaqlI3=27TicXF`<0Lab7SpDGs&cISt|!p<#Z+p^Cp(qe_wITnZ~e`3Ux(YJy^ME_ zf05=D`7GZ*}n6k5H5N z@3B@<-%{*DzGgf7eJOSK{M_K>`l;K;@#9KgyARv^Y~CI7wSIfY$L7g1FPl5BJpTkV zvEdqUkk1oza(O3>oWdG}{nuE?mfyV8)gZw#1PUB@4+QUF=!+ENJ20}q4&TS&sTL1G zT2T{2+IPt7@EORLld}VXbi9a2nLf;uI0yohOnm(Gk}gDL|Ks2afeKXPjgAs4q{4a5N^vimfZOkE>n`F%n< ze$lD5z>W7Xgy~tB`;$PCGa59wu@41rAeiv`g0+ArI0|9k31RFxA>sg`qBamCW{JOe zO(9>x2r48EphZd#o6l&&_+l;C^hYkE138IzQT839k`0*uM{nZzj|i##PN5W`2gv|l z=0p&oN8&t!(LeZs8mA}dE^-4yE@v?3b^se5J8GqB+km6^Aqd*W1=%AGM#qo~;$V1392n2VLI0yT z==~G}JrDz(e-Xu>3uBLGM{^Fs8(qwt5AxS$2VcWQ;BCW$*A)4|Ekp>M5=FovR}5?` z#lgB&0<3x^!E#g*EG8wve3KNI&q#sU5#*c{nBI~Cn_A z;{Dk<2jPd=CJ=p45S}Xr8Y}{TOCIob6#%awVQ`NZ1D9+Ga4MGsho;3~-@O>@hNaO- zNu!gJ0qY&eK^d?*BLkNI$bk7188CY<1E$|)ew%{KPZN;-ZUT$H{Q+>nzvaErLCn=b zgwFXObW&k>uQ|k=2ZEdgz&}6)d}77HD?bv3>D^U1k1@a%PK<@n?05_~9_QU)if*xe9H%K)4pcwRG z(WX2QZZ8NSzM>EmB?*D4GT>Jz2R=0l;MJxCp8d+`gH*tMQgzmCv)XUBJ!(IlkE{Q1 zx~%@y@uB)B$G7V59KNf+MhEoL5mcT#g7Tk0?M(YO|IayySgZ$0z}%gLUM$g=2jXo6 zA;wb-qQVzLM3NkY<|;vOrRr>8i~3Lh9?c(q!&={bS8IRq*`V{uYnSdvk0W~T-7o6B zaeJWu!tJg86Sp6_kK94~0Ty-L_5h7r9-zJu;En!aF8|Lth(xTzNJAf#f#=;BhCGmF zDF`X9;*b<91M%^Szhbh~zDJg7eF<;S{TSM*|2}w-@iu78=ykx9@k_sLrq6v3nmzS7 zXa3Opp7~wxw`Mneei&c(1;%AR&_Cw~x@YmC-9q5CLna8v+&g#for(@38}}m*b9Wwk zu{?7@tjiIHEPvVW=`kvwlhd`{ClumlSlS9E^crX|k3xO?{m>>#s?_B=R#ChbSpDV`a zvJ~GrP--dwWwzqqioNANS$D5>1yUVNkNt=LZc8!&fsNuE)Y&xEzZ3=CBX_%dQBp+!hIDTO+}AA@F~_Ob~;)ckaA57w1rd z-;YYXdr*zfWsNc4_d08_4>j&`FUv#J9~Z?L+|A82|0k>1=1O|4!-bS3&S#Q(-A*Qs zcpQ(P@H`y1#p^)qL9cx=7rbX;9(nDE{p7Ye_M78$+%LPev0ybB3ziE3q4@dZF>_{M z_AkW$u0T&$i@&QHbvS-C8}fc^wG@5T;v)N~Awd0hbu{B@S*peP!hE|^c@@sbur}v# zW~b-=^g*9JX)AqqrcC>7OWy6jIq8)DhQz!6Q%P^V*Cc&%8&CS`v?B4R-DuJ;+l9cg zOQwG_XEx@o60CQx!G3#9`2UY3+HBuC4Y=QQn+ZMcc9g!|?yG*KIl}O4ZIb2jifo5N zB_*zV3u`=g<+u85%kA;soHG(Qoi!P>E^~YEM8?sORp~c^$I@Q}45z>I8cO@*Hjwts zsXz6*<3d0LuEAX9%oW8mXMEpZgFHG2720fud8?i63}YN? ziMQNepW(2py1;E)MWy$~(ni0j;?BUyqM_hb1*<~G@-~Hyl3H|DypuPgPLtf>oFRoNE2qP#zBxO6OHuw**2uXs;XchR}1 zu7ZaV?fK6GTl3!dHs^fsXwLcQz7P4oAB(vONRU& zR3LB1l_?K4sxV#Mt-*0}tEuSz^-fCL*ZAnIUm0e;dN|HubTG|*s4v&Ix4Sf`v#Ty_ zNk@BRPJ<{a+upD_YDNpX~mZ4lYsLFP9##ne}i-W?(X)oQ0$zZb;<1zMwE0W!MmS_94 z4;2SB_g9D2_qIk=b@#=Vc8$dsc5F<@Yde^jv*cPrcJuwHtcE9{S+y?%vT9%XEd(Uv zKICKWt-(38;n{z`6#0zj|Bvzh|MgS6c=ubGb^k#_!L2j4a_hEvXpe0SG#y$OY1ciG z=-M`(;nTFDAh2enGOT=gQ&bVQpU4{+jnC>^pP1IOFEOS2a$<7l-I(OI$6?9MF9MUB zUj{4$q~bmlVcxCBd32%&Std^2V*US}y&UAi1x|A0oIK0Uzx4U14_V8s+Uu&hY^T3b z&(?68wvF*FjZYgCv#+qj5aI>bu;y2eKK{w+h>d{KvY&1nm% z;bYEfy@z}lZF@s38)jl0E4L?m6mQA)&D~TSl(C^UJb8V4RNT6u*r>_LxUe-d@xiOl z#RZSuiw+rn92Pq8JTSEXMZiKpHjZ71*}DxLz<>bxw1$UV*@5>Dj@?QwZ$2F+Ky9Zt9AV#>!^$pDA{2mspzh@!4 zo-ZOBp35?gKha_zcxWu#ao1L^;ikJr#ozvn!YknxSr_B&Q_iKk#GT3aj67B88~j&G zfdBFSV6P)<*q(An>{>k^n1HpTjS$!Wv8#*rE|Wv=bw4o{q^3{{@^D!`~9C>7Xqr#KXkB> zA0u>fbv>1wnZf*j0{bvtqm#A21t=pwrJ1{bsB$!a)8ni7YATlh*_ z3OoXDq zRx}jc@I4%Ui}Agq*!MDC-UTm+J3^tdE!3)7Vnc80g)keNgH6p7d?FGtqvTL)rOn0T5xYcuCFHK z>=vwn*o*o51cgjoBxC>`L<>5I3ZPT+fCukkh%m>2G~UBdVhaQf4qwn;3b0XQfR`y0=iv{Fu?9kc?h0zGPN2hPkM6++%sDKF#K#9cy)X_a?qkAx*Gr)vZAFNn){`C&- zY%1W#rT}5=vJlU{7}D4!pnzQ*iWlVI9tzoVgpl#mSO;^7kb2yQ0;~l|enm*c4?+Tf z5KmyDxBwf)0iRJD5TsgyD3cjTF&Tp_lOZV53_z8p2bwf(&}G&HLuNHFW5&V*W(=e( zGT^}?1%52z5XvG3;R}M-0cl{14#Gi=865HbS&n+FU}wq>)^?m=;l%|e;oQJT<^kP& zUeK=L184jn|VQHh8L8NAm?~N=@#;o2NXW=fZR`RtN{5Rq=5?- z{{bsiCYW;&b1udO&(fTAS;5hm9qep5!Pwd3>N>#Sglz{Gi>-4_YJq zpt*)0G&UkT`9b{eOf}p=#5cF0Fg6?``yCCT77Xi`*yCBE8)hqSWUeN~AJ13)(2Mz)Ednnm9&91b56)47;E*N^c15CK zQ!55m=%g(B#ld0}nGi=GBmrhK$WaL}xhOGfd`|+aFT}qYeii@B0PznD5POfjWq{~H zfD`5_Ps}xQbr3=5K|;{QgrFA-LN69z#|ysR=!3%12PKPvYrX_HS4o0nixfEYpc7gy zJ!`j0`j_pr%um}LGT&_u$$YaqC-cSfuFMCES2AxczRJ9^0O{uzu=uG3NIkIt$%Oz{ z9N!1?e-QeIxw{xqczLw4mvu@Xus-OW<9_8g4ri2K+22-vX7^J0k^NVt`}UxC#~$Qw*@N6a4j{V_ z;DO@@VD=6}qUY)mur4MMvwxxiC&ZcaVqK09L&ocYaG$ z-}v;Zzw{o_cfCd^rE|;mrS^5VubP+LK>dO{sGf5N@?4O~>0cq%@lI?_kCi;kfiHnqZADyE3IwD{7d03_9 zlaMBzhe2I>_X36tZu^fjZu(6dUiY0byy|np=#uvhqqE*Gj86G{H8|l5x<`FM>#!ea zU?*Mmg}~O+R0zY|8;6;1t_C3spNl-KUCYO3BF~8XXO6Ytrz{VNH|e2r&yo{V9wuaI z-Hj>HyBSr-xE9%Nd^x<&x0qbHv6KcZFfiRw%ZkP%5Gcu9lOopZ>={( z{xV-51tx1F!EhoH3>E^feo`SC$4|lRpNn%T!G3!c_&cjwlm4w%pW{uPDc|E7N3lB< zzVcT~BGk|2C+VHc&M`ikQDS~Dt=4L9$`ad|{MB(V`L|795?J;of!RVp7_LDA=DjS;oW=Uft=RT)WbLICP}_wrNWR%Y}dlJnv4zHORrdRfc)99?#ud z#$!Oun;=Y?J9eqJwSbDbEN*}`Xtk-nk?&y$|C#mvMT4%l4iGM zMN2&g3YUBL=1=-8&E4+Xk#oXtN%kGz);af)_0_p4^QV17#;o;1KqT%%3a&x^T;9St zG~&5?hb(#5r$Bi$tW3Q=uEBbG#7ywepp(o@kFVP1j&S{{)_Bv^P3hJv>hm3z*Oa>q zR5iHwRCIWCl@0o~m#p@0DcTaySa>9$zTjp+UBO%Ln!Hc0)w$mst8#wXEd)g4KBVCq z6yh9eF!Q!ZlAlYZ$*Un5a(A^1<>I6o{rI>E|K1UY#ajoxRi}DG^;UPqnvS-oS`W44 zIP^7@xGb%&^=PkM;@wi+=hslVGN876V^CGu!JzWeYe8ituYF64K6sQAesL}?_~Eb+ z5R3bef#Vlr_O8RT|2A>*wNHXP9Tg}4;MwcEIK6W+B)MZ(Xky#h(1ayV zf)bkF_#`xZ@JOip;<6BsjQfy}xwi&$cN;(Xf%X44R@2FaO>}bnAol+|w3xbauQunJ zo#rCLTb$(kHu`9GOocF-CS%NNRwvt)ugrEXT2btgJ5uA5vAoScd1xRgeqc>VO#hD1 z$llW-5j~HCBD>!BMz(+Oh;04rx)6|tdr*YgzhUnDpNG5}#{PqAS;(pFc=nEG&s+Zz zr>s4u!9IG>M5up{y-deWPqn6PL3%ZtqfARTBw80tWjJK5EpSbpsPar$-QpXwsxKgX z<*J~N72ARXN3fa8@`r(egKvBSdp~#tE&c4W5Ri$VzZ|o73)Ua>age{qnaDYG5Jz|8 z**D$+n7SxPR-RK~9Xw^o*LmDVvgxp!QuP6U?UKFWhIzZ=%`1RiK`@!i+$=COOs!*$mdPp2IxJRP<@^l;q#+0|+4tCRD@4~K<-A{@Vwh5YQL zk!$0aJvT6sBQs27=W!ZYdzns#p9+wkC-O`!kF?pV@0$t~-?f*_zU`@ydNW8Z{zjBe z)Za;rkSkfH{+CKDy)M+-xSd;S=X7Sw!S2)+N1MO?a?er+wB^2${* z?xml6#EUSM;OFt0eoxbNJ)aaBxIC^ga(LKjV)J0s%<|qQbMw2$&CG5+G&8;a$<*`; z7@M5~qlJJ5g1?tB|F6P37@P18#BM6tb{zXaTwo#tw^>O0tN^9%_hMSfFBR78pSp{Z zewgybe76@4|L!3k^eu3)@7E|f&#%dfu3vIg96pz;+k9@(wEQ%vW%hBMw#kRR+D7lM zY8k$Lt;KlpOUvLHXf6bF5^`e{YcSRma&!lU%pAfRh|?6Z{3?~SKcJC1;H8v+I4v9V zb~5IEd><)S7+7Epgrk5jcnNz#kf;kpW6#SJDI3U>#uoy}m_mn~5sb()V6(hFoRrst zTk^W_R!-;F&jq=?3?0lQ?!i{f-{>Ib^7jg?gIJ39AZi~|NHK6yvOtI_85T1~gA!{f zXtDc)A*VN3a=GK33%vh<{vm+h8r_3ABng>d!4(6lM0BA;R2x=^X~HHkbvQ1j26vG6 z3vvY=$SJJDI5#?7t}1sKHP(ihwvG|HRw7=NDbEH26HBH zuwxPiHzt1YrQwAR8XlrA2)>4h2`tb-SfLBC(qslpJr*!Cp@X3v8|ZnmgJu{9s3viO zVjd^RRdIq$D<>@OK0TR>5b`B8VkDO!&v1{xg`UrW;27*7>Kmb_(g%9Xx zAQr|H$G1cWVS_Hl278HF>#%@@F)NtZuz{fm2k3@yf@T6IsO2mIrHVzM(7Xubx{+mz zKz1e0X^Io1w<3FSZYOYFmyriJuQxcKZ#WlV0}(_B5&Q$@=pU>RJIoaJ=!5LES+GSj zD_B~wftecz7zS~IUfd$k&f)@%GA>YSIpsR&ashT4Xa9DD1}h9bE+S7m<65 zK;|{h>nqL+IIt1~=L8(T#TN!Fm2iA?5RO;};f(ppNrMI3BcYSBKquvlPRf4~7)Nsf zBb^)cig`e%jt8{bkv<;K9N__tH9Vlc0olm|YKM?>+@NxY8f0W=K$Ps9`2YmUC~Lon4u4H-~>A#F0hK=0rM1IFv;fwBXm-R zE&Raf<_Cji{GdOMO!0%>He^3P=$z)C)xOF9Q|me3SItj+pEQ2+y;ld`x5(>10Cw_J zaKVh}i4MRA^R5p%Azv-54>Dp058Fjp8^jGxp}b(9$PYF-f?!oH1Qv}*r|@sH0pVX} zD};ZTP6~fF-7NCeWUt5qPRhp>>w^NZJ}6dj);Ux7r&F=$H-}oWFZN5sKiTz4 ze6SsscxSU(@{P>~$=6mhk}oZfOFg%^CiU3-snmV*Pm*`cXC-c$gZOm|5W8XlqL&u{ zTyT6JJZlfc{5@9#5Q%FMjeagl5ASN3aYC>o4+QuL{P2wu`Qn`__Q5M(@~ubZ;#Y3X z($8JHWS+VV$vt*nDfiH6z5G4LoeFmxjw;-;zpC)J-BX3j_Mha>+k@;G2aq}GfZrMi zkUG8q;Ev<_WA+Zk>^pb%9gq8xgsw74hZXzfaLz{C@O+K*6nq~RCi*HQQR-P>w#*~{ zQu+J7^@?|V+Ldp5_p4m@TA_N?bFJDXkFDzG-4Cjtb-SQ`((R%8F}IJZhuvqD54eNk zK6jAc?E!K#9w0j(@WRg@gt<2ob7wsMOvUFS1HY%4nsn?{!|^r6g8OZftKhTvAn^w= zvC_9A(-p3V7b;%~tya4b(yVbdXsOnzz#;7u0pmKy{HJvf`_1Sc@I9%w*XOq0E}u6# z+kJm&Zoy*1O@5%V(GQe1ECBewd_A_exu#rCvmFKQ zX84I;Pl=Shn3$||CO%i~ub48eqtSIbha%hb_DA#@>`-mHImpnhdwbbsBGq9WO0DR>b1)^~LxvZ0ed9xUEcZDqFTa6O+ zRhfXOXEOyJ`jwbJr*&BE9~KYKEcpF5ZNa6KA?$fq_T^01eWTp8gdCr1=%`-d2dw)NSFtY7Lbv${P{ zWwbR)d#EXq(OaKs(p6Jn(N|S>dn(?yFTl)Oh3nBQK;CxYy9bxClJnzOdoV7~v};tKbK^2A zp^1K1=@mWx%0pcd+P&@Z2AwVGCQF*~EF0^~Y-(%k9V)9koXe_~xfWGSxaXJc^vEea z?U7ad*e$!{t3!6tZ=0+_u*fO^^Z7sw`j>RfyTwQ?FPUA!L+yQ(A1RCd~Mkk2^~CuJBgu7!KBG8H(1g?@uzW>dmq!U0P&Q&{^e>)86Ww z(bngd(mL*u(7eSnuIadEbi+N**!oY-v9&+#VynSAwhAog1Bv(?oAJ#N$9o7g|^MMrHg95Dk zuVo`&+v()mAQjgDv-cDe*@0)D>-Gqcu^DCN!5s{)t}WIgEgM~BYS;Uzl&=lfDx8Qj z$XT6gl0KemnKV{r8#mhE5IMZmIdu7`Tj1bEcfWxn?mm6D-F$mKI{7XAY3JVo*8c5a zH6KXF&tJm)Z~nj4gSl^nife#p@3;n=_E5>nql?JU5jm!%2X#4G_F4$m&NxYy?eJD8 z*cPImwIxP3bz`z&!gRK2^!gIZuyu8|L6e;herraYyv8>;yRSU#;=1Ctv+KwQ2e;+F zZQKXI(xV?N<^$Pu+y^G|t&NInfc{~eLXJ#R$*#FI!1pl>on;|Q&q`8SPHVE({AI#h zddyxl@34n-=D|RvoF3x{zqb65$c^ML~N5A_uC?;4C^{lQua*}oOfez6zA_z5OF`=w*{6C%|& zl$lGe8F1!awiZag=pvqY-cKgxT$p0`nK-qe)9G5iCkymEPE<2oj&{H3oam7D$5Qx0*B^q)sSjzuyw5<1?6h*gNd8$q~D>dx?Y16X0 zzFf!r>a?!u<-@wh7w_p9pZ%z9bPBYMj)NAWIUlGez?k5?h)Q{>cu^$-h5g)9$g5JCE`MvWM@_ZL2>iRZb!tqV!V%yin zvX-wK<;`C8DHy+;P&9nLThZXzRR#S=Zx!_J|5DJs0}8sg|I5oxbTA{B|Iq^-LBCgiU^ziyHo3BBuXqxw!7njpEur{u0;veqUVk>jyFQk07r8;lJF#Jvh4t9Sq*b znZZ4nI*h$A@h(O?)?!p*4M5Hl3Q2)QBo>6JVVJc8K!w>0?>e}FG1fp>v)h9+hc&tf zGYH|rmqKtGplsBEGF}ZVxKe`=eihivuM8*omEaM-0=(x_fG_{y0(z+9SckC>9mJ*s zgp8qwnakg`n7{Mx5|V_q7*RkcA?SpBL6qtNGECTmf@Tj|%r?Nl8VGZ`DcGTZa6|v# zhyEdgT@6y$6`_bj7MeJvvGJ=Etml-3y_^zog;N~v{D+g%gd9Kzv2`Ch7~F$ibPx^b zAPR30l7jUp5m<{cR|DY%Y{V7k;Q%5OYmlO1TL-EMsABztHccNGG#xNU|6s?g0&dLm z5Wpf0F)ZScNf(9+x&XA%d0{!78`h(L*!&+3<9(o=y9t@ZJs3m>(So%gCD<1v4etd- zqKEN&OVEE1;(%BK3o*xiF$RA0526%ZkfLa#d(Z%7sw!xrd(fxKfeDio*f5ErcMyO8 zCT@tKVbMB`4ka{vSvd`xmi>p_cqeN503jo|1|3)nQiZi3+4vm9VJ%1?u7T@ELTqt- zbHo@i04mV|R-%b>QNw3d1%!zrh*RW2nj!-V6e&=nh=UGA5DY0iU`1gEXDSQ$Q1Op< zDuFm8@jn1VjS9x9)PG%wi6#vU^=P1H!VDUAETH5`2boYhNG8%jG?xy7m2}{1Mvu{r z3?XAIz%|JNoLi7REWmM$8Q3o}1KT}jV0}#kmTxqm0n@+#Q1IUbsL0&U|BE4xZ;Ji9 z%+Q0FVJ~wN9U3re1${p@ z(28OQ^;C9HDPRZ1YIcxsMY`ERZipRy5VDpXWVW!O4?_N81F64RvC@GRBtEeI68lB} zE&}u~!a)Bl1oTe}02VmD4d#D)!~y3qSD#~tPRa(on3X;HATM?>L@%a~UQ8z!-BdXz zXf!SY^$v7Y{m3XM)(3H7n&g~Snc@7ce2n9l;$@ESijO$H$iHX*Aor8~tt_y=MqVN> z767d0#>V{bjJRUnbwwBBihj=7kdCiUX9GKT4zLX71T%C}#+h7TSc0`mwcNiAmT>>n z@8SNgx19T{-YV`dy6d?=>F(hEsC}6Gz1BtUH<}N)UueAJdZPZ5>ybKe-AC>rw-*5H zaBLUM{~nmXz3`_GIw4kz#r@MdmG_%f0pDkfD!vcqP5kf7 zy7=Fi4GO$69TRwIx=!G^@iu{{#s>u+8=e=qZ}>pqHsig(4aP5ls|?`3gq&vp-?;?< zN6gvoSYPgg=k5OJAA)f$LU2EVaXx>d<^wKR=kJZ#I{i^lzkV6grOvEuj`=*IoZFPQJCPn_TbuMCkp zo<(9e-K!=3c59Kk;<|M4Mb~AE&%2CEpK+d+Ips7XbHee2%u&aiG6x-A$?SFfv3R#5 zNbSUne%l>Ee5)gf%?I01QNRz!55deAg+CK8`zPUZnxsbinV?61A8W?`Jj#jtL4-g5 ztlCXf7GW};gHXW;sNgorM+HTm1aB-Dev^Sti09z zsq!X|uL{%XU#9SXcI!M~@uUYx%?G^jvj^h%5t#GhF?XlpzGPtCf2K0^V}=&<^Hf9D zd&zc-t|xd2T!;-5I~AQEbv!at_HcNi{Qj^ir9Gid%DX~3RksHZs%;HgslF*-T4UOO zkH(bW8O;g52b!z>KBBOay0Rdd)y2_D zD+-d;mgi+@4dxW-_GVWZEX`~->`3o6UXnIq+MKe^tTB1Uye{dSc}>C-^XkNJhSiCn zhiF$Ng64c61lK6!%gk#sCoC(|cUYCDow6!Pd1zUZ z`o*Lq6&Q$aaSCY92f}b4;&J@B>|H$PAh^h*Mpkkev*+n<4sx(Zma=21F6(r=CGYAM zXVK9{U+KZRFvZ@QIJM5oRIRqM9KGg}5=KK&tw~K`n^|T4fMsdkDyyQLt=0wECv0*v z?^)+&em2X^1f#r6(9g{P-T6QSu748t`^!PfILVhfc5;0QjhtJ`Opf)llHG%gDVzGW z=#$-MJS#dKMTXkErF&XJ6gwJY)LQG3wHj-(^lGb$3@a>{df*hN&mvx=;oHH)eMqo{IV%m?D}ImpD^TgpscHBiVE z%w4AksN~Qno_nvxzqi-&lNFOnOarS8IJ#C^@wbe+h}RAK%2p1ADwhnzYUKB&=w$cg zFw&Nm7$SVgrhw+U;VvJGzDXBXIX%{H*%tyNINtXXh97zft^V?L0CdytEN z>SF%C*^0Rnv*(HB6tWi`z{V-eJa{i=aHAY`>3UtZ)^+B*brVh^6|1}#7p)9d$Q_GT z%@|G6N*T`5OITK97(G~H8a~iw5!^Rq<=?Z`+Gpur8?Vl*HlFQoEWO))n|dz+BcB#v z%m-5E_JNR3^_abJ4bEWpJB)j<6Z7Y~P1yf$C;Eq-Qk2&1nk;o&O}NT8*$EbI@Q}z} zA0V5yE>*MH|Wm?GAZ~wfsos{1{eF#Ka3Kxe}a%Lcm}-+JxC9}m!jp6FsVJLLMz+PV9(!U z&6_pjBAl|*S2BJ_sBF}>Sf#M7scM0n^R#?7R_J<8w-~rg4H!DEoiMhW*kf$7=8CcP z_!~o;u~`F~QP8s;0iF3kJ|W-g2)W*c*>^eKKfq^T2J_!E=D!gFor_iJ12n$WS> zv0KM%+f^OY&2P0$H_U38t_Ka%b)Y^UC?n*~5?q4;Tmy6v`|%mriVk4S9_&GYcM+OS zF_EIHtR(ZM2uZoA!W91xgC2R^nlt2@D{sJ6KSA#+;iB%B5+s~2W-YcqUn*;Tu2J6N zY@ed(=`~75C-*2Dp17vOIPy-3aR3wz_91&fVLo5*Irtl0*cse|!}tu$paWQscOga& zV)nxfUWM=G$i7J@$uERR>?^2Y2?+(8Xu1EtG$1~r~3LcpYn6ySAO7b!Fr@4_zdnw2e5uG zIvA`4X+Mb$<^tw#bO3SKpEL+Ph!1)YH{cDWK^P9xh2R!l5MKX>W9Y&5;xo7j9mE*cK`ccFQHu^D z4?RpG)}w?x#u|uM|5F2Di}SDqZej{T#1JGY`XEQq0TqfSXi?OFK~)3`sth<#CBd6X z1R}8RA&rR(%4uxSL1Tp#$QJYvNB+ZsEre{_h2H~w2KtX-KUDNEMd)FYZxa%R9>(hh zA&&0|v79A1HX#h45nW&>TEI)xaV{z#f&M{;$bk}(1`Tu%`V_pJfbPK&-GeWM22uD6 zCf#{=y(8r&8 z8dT8IV*(Wu8pztxK+J;%0wFZuNOLI*Byt^jf@6O~e*f=z{D1%be}B%6Z-frQ7(Iy5 z+&nayK-+)@>XtN6a%Kh@e`b(~W(JWoW)LW32EJ-^8ZFGgy%g7HkOqrZ(13H21{|B1 zfPFU;upMIp*2`3&KcoWfJr$UK;(UNIi~K?t^b`3p4;-HYrdR`EHpkptutvu~7oC(D zGiW(7gQ^b;C`7P;Ofm~d=COcy1q+BaA{{Is(#H(KBg`PQ8rNzX=ePs+?hp+VHVt_0 zGtKh6Vfw-Oo#`t(P(QN*(?{e3@_rsTJ_Rf=+gV|qy(PK`bIiMD=z~m*nZeMO1#~@G zKr@66s`2QivgjaRLI=4zI>@#n=*DE0(Ls78%k1KHEWZ|SW%((!pXIy6S>~_ecbGql zzG8kS@{RUN2$)|81M@TF=>mYc3TU-x2e8-**1M8HmSisa3>y!dm zK`(|CwA0u?vw-cFdKKFb^(MA&YMpFf)cV;zsgAOJRGDCVud<2tt@2*h*Gi{YUn<;U zeJcNo{!s24>plDe-9~Q81MA<&wRwOQj&F}S*9r5t3;uM)^>9T;=ZyQ|XvPAzj&!i} zVf$qk!T!xSnf;StF2{RD8OIw&9p@{9HqICNy`0bVhdH0%2ByOHyO?rzR|+9x@0 zYu)0!q4kRMiq?0|3tGT=204YC)CA7I<^gu-ADq!YxMTM9M0{~hzW984<9>LcBXqT; ze|7R;`(PKs@!BSy^SM>#qQ{m+Tn{X(x$j!EaNjmx%JYx;5YKh9ao($@)4Z2VcJW>? zInH<1_y*re;}?9#jKA|9G6LQMM!>TV*=+>e^8rWv{O*{0y)k$CWA+chIfddg5~9X5 z8=yz~>|;TH?dihy)GdJXzH2nsE$398>rVN6R~*avFF4iB8O}ai0rqyAhO5$vB*yA&%)cRL1?oz2yV0nfelu`KOb;G|KK$@KIYv}%$`x0 z{bTSMiB_h5j?iLy6=KBlFvyoIS+!_KT(Q&Pm(9E6I7`WV)SS?qODjjM7l3J86L!UBs50oKya$)o}gUuU4f;N z+XHGBZ}D%H-ssmYGwnMpyUu4qZqj?J+-k3*^5dR2WP88o9 znJG0LQ7FAGyh3&|tWkb-NQc78kO9RNLE}opfg6>V1?*QD^uMIi=l5K>$M1(iw;#xM z`GIt&A4tsyU%QTLfc_&2vrZDOMLIwEnI%MC8C#+-8eOBbJgQZBD6&VjKVnp^CwxkMY3Oc^&X98&ZNX18 zmIQxQX$c0!=3tORWSWEiFW@)#9AMsy#mt+ExjUPOe9Ge?4+|END~0^zWU(UUK#>7` zXTA;B#vFHnbyS@IYKhvR z-4uCJr#}3Fc3t>q_1bVysR>8Z91imH`F9^8a19bLZ)M}x=OTwI z4VN0qj+gDvOjGDi&r|M9D^+bzsncjpZqsT?9MGvx zSfyJNw^gqy_PAbo%w4^*=#ScE(V$)$4JwG@d?0A5#@f`nU){Dk+q`3azz9}jAI@t`~(ECt5zBA&UY;hA$DD|u4N zM6Ogbk<;~f=e~(fcH-Tb4b7U&YZ^>BMr$4T2CFhcTJ zt8*)~Dsq~1OS8N63o}O(^7si54cZ2W#LrcZiY6b~W1A5+nAZW?SB#26vH;+5pLx>Im6} zig?AEvNV;7l05a&;xg@`!g}4jf=+|%yyb=&IqQs4v-cS%XI(K)%6!d8%J{95oB^82 z>7X_r2%mcv@K3yKCi1R?Lax@weCPk&VDn}!~vP3((yjCxxwA~=JWXLeFXwo>YaJNZJ{w0&>yjO-X zdB5~xazQI52h`{DKhFaGnU_Z;cW@0ZH)7ss$8+~?3fbI`f9nqNk&!_KYJa~TYiExo zcT1PEU|olgcx79NbZKj}d|`8va&BXmdS-o*c4}R+9ARZk|HX*Mu`(DL0INeFeA^iM1hB0$tKZHTN zo3nJaBxzlxL91J7%wDm=mbYlcT{w5SzeMIxxJ=4GoI*l>no3M>o_a)2xpv6XCcS{p zUIX9uRfe8Rwi~*)oHcZ7dd6^X{GsdK0NNh)pfMjv#(l^~|A1?7u?-zeFXm4C{97=C zubRNjgBhr0TA0+WSEiP)Ghi*8wBpKM<06p0`u`#9J)okx-tX_{PJ^NM-mB78qzD2c zy(7K%-g^@S5fKm+!HS5z_uh>yc8w)|lW2+>O=61C#Au2+|2-Nb=3VRkE!JlZD9k?R zIcM&iz3;hqrfKJ|Nz~6?ooSN3vd}zfq;hQB@N}D~p-%hoWrO2`m#mo(uxPKN-{56O z-v!@J@SXpMgWo)6=ike0fc0n?#^FU1@`pK?iv#d|L-5|v-?kCs01wy)57xF{k(&0K zay5I$3Cnl8%M|YNQ_S5Fu9mSqPCI35ntsBTJmct14w;{`)-q?+Mg@FZ4IU122PqH*$db zlOn1(sY4|nSo88uIE%85d&{OC4OUDz60I6@I7Kt^V2*C+fl`COef7q^duN$?@9sDE z*tyEmZTp@vu3Il#x^Di?(q;Xh7A|X_!b1VgJDh<|6991N~nO z4?F-5+KwEe>YRXbuPRXHRbxuM>cC65;wFl@>?e-A6fPfnF+nNtLZ+(Uxgrhk4{NkN z&(73!J3U|D<>X2OrxSY&9gkf$m~i;9!G!&P8I0e{494$b`r~)L&sWHSZs2ur9&_*n z_Cf4}{zm98gMQBm_UKA-sM%e8tY&lZ4|SV!Ox^knIL$Oh^GP2v z7|elF>)zy0+fo0O(C>%-Oz2l#BFec5|Nn)Mrae)i(4P${@MkN|_veW`@1H&S9!~-V zZcm~_E>BXV9DmA}9`{p?jQvluWNm&}C~NioCb_YXPs&<;`<1NugWu%LzG8BwpELRY z@)3>$A0mS|hWWn-9(*0fe-ZRMk%QDD2PwRcYd}6Dip2dvf?vsD4}unX!)JTo{JtBT zz_~DYo+I<+<3mXKwz$4wEXxy`vucqk>kt|Ly+maAYMYe)Ul&Dse?AiFzI-9l`IU*Z z|HB2m20lOzb^!XDp+5xu9_UYpekpR0j8BMSkb?w$gKIGU5+Z+4ph?(o;LME35q^Ih zv&TLJXKpO>H>{*b}b!ogpiwIhRA!dGTnun*!avWRzsbS2 zBZnF}jvNg7tO{1Z~$N<0L350KD-7t9KkUeufc9)aZQ+m#W%1Id0kOkX0!lmytm(BR~#9%o;cWZx8`eKq06D4e!D1tT<*rmV5nZ zZObu>@jPba%46!@JSGzg|DFKvo&(B26LNzNFc%DhA+QSh#zwFk90BLRO>iGP0dV~P z*OvqI$BlV|oi)eCI`EjOGmq(b@tA55kIBaIm?)FSxDvFX9?S&Y(C!CIzzVP)?10ii za2i|#Ux1$g(uV)*YrG}LCg5J~6Ocnl+G2}5$ik7wOeXV~ZUCRDNAsCt8uE=ow51xE z#tgKl6ZAoMA&}H!E!YC~f)h}@4DNs*z)OHM1|a`<-J;*|I2z^%eJ9-4&KXR^_v4Vo z*o@;bD>ptf^W`&x2tLzJMqBduOu2&35Uwd+Q~7| z5snGZag6sVQlRgk^&7sB3W2A{Vtxh~q4!|U$Uj`+wI=~r_)I6X!_f}8lrx{%dGpzr zP(Cw_7cj#t0n;lHFzq@%(`w~2%}&tA|65}b|CRbM?@zULyx&!~@m{JP;Qgk2mitxd zHuqHS8}3Kh=iGNPjQa*z%tP=1;Hstfc&$AdUfToNgXGa2ZE{CWHwok6?7?T_0|m@B zTENDpAtx&k{AE@p_}z56;5U<5f?rK~1ka2I1W%2Z37#0O=KpB8h5x<5e*Sm*r}^LL z-Qqpa`G)tU&U5}<9VYk$+|*@)>i~DseUI0=Q{c6y;yQZ?uk#qp5pT>*&q;_FeFc9z zMF?L`ND}_)kSqMzu1xf!UA^eBZL8=T+c~0#HuFUfY?cV`S+5j+X|+lCxz#@5ow28d zpICk@ylMH3=$hqkqKjjg)Hxhb&Vtk6l*RinFYNL6h4=P{_YD9cxW|16@`oVggnkqG ze@yif{Nfob{Lwv5^zEcf(F2!asjpmWr0%*jOMmJ-Tl%(BpZKQJLh*H{Vez$z>t!xa z+$D3-@wm*n30Gy#On4x3V*D?%N5?bSLtsAw-hJbk%-;Wik0XaRa)%Ij?r`|uDAXd# z7}4q&?)NYU-p@g9{BQjHg!g=+M0cj9N`35|FTOsdT=vS82Dyt8_Pc+swA<~8(hj#*id)^7B0gDJVH4Qs%H&7G{E@wc z!1qSLZ^fV;36d;EpZ-cPr6)1g+`|Y*9zGRLa4R%S>Pl#W%=wTkxii5<3LgYjDIE)H zQa&8mu5uu-M|EGofZ87aH3*02jDpumRK(#OwMf^dUsCnyQIZ+miMQuIigy>Bj|-GO6&o#gG$vK? zV051H-l#IwUDImSw@0>UY>k+$xhcF)YeV>A?R8DnZ{rMQy&OmYNL5wgK6;o@$kH9k_WCP$5|S5H%*&9${5R?%Wx5# zNcR;VOpTD=lai>sJt<3VOJb46hJ;G3b@2^4tK-^qSH^bh4aY3dABtXXuq0}u;i75# zjRqqw81;wWGnyCvt3faFm$_j~yC;lk0nO3ia$Jl7vWFyi-YnE2PmZ4D%h6YPGITRn zmChHK)A53dg8g|@#5;3B6!1}~D(f@T)K{nHX^o_n>MT#K)mxgT}waLt+1{;HG zl9=IWSPbf)48I4@_g68W?!x!osKuW9dOjU*5YgTyUE0!Q!&}?nDm`55Bfq3NOl6=l zR%2dys!mU7j{cmIV#AK2YLl6T)6Hh&cUVr(n?JTOcgU(PXOm5J)?u5<%EBzI zr~hSHp3cn5(wRwF8Z#OVZn=)v0+v>J96c?A--7SEiVWZkPW>F5fqUM!V(&rgSl&pB zlhl&wQ{)C3f|dK~qBXi}l67WRW$DkXC^VW;USTr5w86ZgqH>r!7ZERC>m(Fyb!S7bt1&$LtTHa&IE$kX6?CYE?(>2RqY1YgL^%<>k+D$EKdUextjjEeUOe-2{EK2HI z#unCgS?AR(w9T$wZJ$xO$04=if0`5-JFPRC25eIrSJ@}l?{bK*J?9Wx{lFov>V-{QB^wi8$t>b=^MKK? zG}Hk8|3Njz0b_6x{XfwQ-`kH2W)S=CaedHy+^eN~p$&i5fQwX1zqfqDykM2;-YAXo zxk)<3-I)djT?NKDofYO8vm3{x&g!sAoY`+1*EV7oJ!7YRWb;{v@TU9r5e+YFBI?=C+TC>Yi^M(Y3-hv~#;%P{$d&fcAT~ zfo;E81-7y=K{J>|Pzy5~&FgW%T>KO@xQy{Xh5jEz4)Zz(S_#j!2KmEUWtzUmlxkOx z6I85nmo6UhmCs)hrkp(-tDZiTs-3(nM=yR!sbTcuI+KWnGtEN=`z!+&43G7jzujtT z-)SrFx%aHRyMG(wJ%?FL?PO+CXEW2$NS6Hf>XC(Yzm@*cdFIygo`fVO^41%-Sri$koNVp{r^P0#~*g`K{s8L6IU?(iOZSZ#G&_jfO&8WuY(Ji ze;+`9|5}{?N8eW}Ky)QrZgO>_pjxhzx z!%Wfg(EHrP>+p3B1^xZd-va&NeaONNp$72b<)?`OolQy{eK9?~EPhtKYhW<|IuZF%PhnjT){u?6j8qqlT!09Bv> zWPvnt86lG=4r4^fL5P_H2jBt1Kq`>51;IGOo6;!QSmX~9#sD)Lj_Hr%n3@ax^Az~_ z5D*8l;F?Q8J(vl)K|fdwR)95N3)lzVdjGrl{xNvt{r`V62y0{xHt?5Lzyg_!kv+$> zoH!=yfh%YNp&tX%K@py>8nl2;&<6&=GO!A40=vN5Eao=8{|3AO|HqddvJZRsOFMXe zNjuDt#psPkN!>Xn?}vI!gMJFg2Nig>CeRN4TZ?u0w{746I1R2N_jrWdz6MBsu+A;?1FhwnlTv3my98(VCm|Ozdl8s!W6z!x%`nVs|7t&+(Vft2W9X(RrL0_vJrTa>k=}Y;0^qIm_ zx~=$%yROVQ+{Kr>3@#}%-e{OBbls6ZOh)$L2HcTHxSJE3WQSY`ua9xw+#fcf+^=Ke zc~2}ddEc8A^S(8!<{p|&=kA-%;=VHN;l3~#;O>|Vai1EmvGi*{Hs(f0qqywLZl$Q^uE<6A@Kp% zwc>p)JH@-5kBfIYU6i7#a~Ok^_a9GMuOY%E^&AMkPL$bq0A2)`9BkNgF- zh}5FzVFvVVfH{5P<3P83Cv%s5gZLl%#t2XPrb-?2&67UlQzqW;Q!BIAr&)H_)DF2F zQ+wsNc`uUR>^-8eamr?e^>t1xLaor1pm;ig$-)%5D!Wl;09k zp|CNeL2-R>tJ2z_Im)X7`&C8)hg60G)~gQr?@?XicUEnY@8@a*K2KHqr!wXFQ<+kq zH&g8W4=e!QKNQ}78vIrQ9+PC~8Tb-D^d|O3oR2f76A6yI1MyRYyW&Emx5mcGZj4El zUl*OLxH_std1P9(%JON`RhLE1Qd<(ytG+ONk;XvSD$V}T9a{53KG2#Q^gpfcz@Ied z1pckwiTq`D08{PoXDXw?$DbjKf_^L?(Yla+Nf*$aG(O!(71G6I1v-&tO#9OvxE<+} zg`3g>q}QfQlUtdbq%fS6rMxt;P<3%ah1y_zy~cvLR?T^_U0S^{13Ep?D|F{f+oC%= z@~Gad@EdwFG^D)Km{x5h(-;j4NBv{)R+>We3-mwB z$2spJ_^o0d9VryjF6__QRAR-gE_N0yFZ7mPTo58RkQbvkFE?3bZg#eMS5~3s?958- z_VfnbwzQf0Eva)2no^b+H6*Vyu1(x$Qk`(gq%!WINk!c61{HBkw>*~V0IkumNX*4J zv>_dyGZ*K)i*e4o9Ot|%ah|ap_q(gW-h)a@Zn(lxu(;e)dO>NRTyOC-#je6cmDvRu z>g{>?nytBII?XwCdX3qw2K8CpMl~4=O)AsZn3kvQF)K|uZ&sB2z^o|gg;7xw(=SY9 zxda_K?Rfrzn#oeVRsOPbDk2nT zmBp#FmZYgS7w2d;7M1AK6;|t4=QkTx=5-pEq zIryN!oK&U{bVtKtFc&Zm&ryTV;C-(`{|uIZhp^n)3RX^6q@~l1X<)iNzqip%sgRBdVNKfR!w=4PGxDOepyMAVR3PXNkLJ+S#IG9^Q`>s78!Y`Ez@$o zut>}P)if=O83BW|Or|%Q*W-{z^j!h2eL(#$LH}e6yarBzZNw6As7;mzXBtpnn=Nn7 z3>VSNW^b96reKAJ#weBA`b720x=gLIngX5T>T>;pss^LniguH%@_Av6O14@i z6`iz9DE!ks)qEaY{;4`TIwvwUE(E`xj0ZZZBe8` z(!vDg_`wXd=miCuk^PlAq4TEe1@?6r_|08xG_`w!@sv47OguYp8+*+9*~oJyGw^I< z`kphG-e~^DT)d0d;Z=;mnYkDT^m`|?*A2sak05`57pz>vqrx@nl()v5vR04hrLA@s zB(3t3iW>=&iCz&aA32<+6uLZ5HE5_@-EUcw*3>1PI-ZLc=}um_UT@OC5q;PG+j=g2 zPjy}9G94Fuu!u_+(;m&EGQ1uz4=;8je?b5DFT-mR+AG&!FAOYE+eX|2atnOlHd#v9 zrcVjmtSNS@GcRh(6hZjrAW_JsC~@GXWLdur+4A1&OO-s=HK@3+ovr4&dZD`0s`VO< zBS$nQ4F6AK{IaJS;}m<+)-2%w zk0KALP}m_83O-~{fd?mXeg}PcQxAj*y!OY5ChyCTcH3Jds{(}?n-X99d>%1CyoHrr&^Y%39{3LQY=gT>ri{KnTOyrOIFh^j2wo+(wrd4Eh zYQEH%57tOo96uywe)NA*W(S^0neJs`lU-m3lNrr5%z?8@Py^`i*?<~s#Xgjsr~!O% z(*c}+hZj$U7mxT5-t!7aP9IB=!yOf}yJJK)cWlV&jx&wDGnFjwgpTU zxp*A@`xN~5Ir#4@xCY`D_8;8k$ojFE#{8sC=06#c=@Tn5dg4g>PdrKYNeF2@Ng(wn z1*G;iK*P`$F z$YGK$5`|qu4L*kd{v7`QKK%a=@c*x5u>U|6*@GVTU6^D4fgSw6D{>ECc<^ZC54nUJ z<->>1$3YMs{1b(I@erZ(??lhodmM%SUg&Rx{<4EOCLP1QIMDYZ^gZ=5GMF2f2cKXM z#Fy9u@io!dZ;6b4B+~m!viCs*Z3XOo(0H9aNF#gT;o1+F6%hJnK_JJ&eI4?7{dR{eOb|;Te(opM-uB zvr~;C< z;6OAAW-oZCD;HY`NLuG zA@~?b@`vZ(|M;@BBxV7BX=Z_@qaB*I#AF=tSMI1Nxx52z?ud)>>448}@u0fYur4e1;Z$ zPd}n1-{UvG1Cr5s2)-T#vo}P129Ik8&u@iCQ)AQvwJ?}KOk*-?g7FYfBPK|}U*&=_ zXx2fq1#e~D^%fi_JP0t1W0DeXxMmY zPk{d)uZeabhp<6AtT8t%?J-YXiRn%yrXEIrDaX?b`AqsnwupX~siGfc8tHqPHu_FH zhrW@XM-QbI(>>7$eIeXPcLjUsQ^85P&AUlAdEd}S{1);V%;(*tP^+fv1+=HGQ1=3G?(e#~8Dm~QBqx)KA^o3R(-PLNLPc>)L zZOvY~rMZxeyY_<+u--=xzT-_i-q-|46Z;|_uSnvC0r$GxLqt{TW+ zpgjq`-wpoP9Y0S(Eu1ar_wn}h#MYI*8RJ9uEW+uIc_Mvmo<-L!is`CF6;GgCU!DFi$0D!}m@VAbUYO0_EwIzY6^{MU(DLGN6xLtm(3wE1jL}%boCu;2riz z5N`9R7j2n5Lu%9HPN@x(=S!`1Un;%YZH@Fww_V~Dt|!IIT|N;n zarsHS$oX&apc9iGn8>6Tfc{b7wqVGCM9}BqZ3d5F()27?hVJ{w(kz|*Dc3!j$#%Ij**We^W{%rCm@jJZTAyBL08gTLbSDz``wv6@ z5Tr;a!i?!agahr2bmwi3@E5F)h!m|3Pm~@B%MdRQE09?lS|+zRq)vWeNQ=UP;7-N) zLGu;+0*93525eOB@;|8D>33PV!{?E5yU*`RZBvE5x!WpJ#4L7TgYDZmf-X1(*y6THwL~?Z3tk>^#M$&9ycYg z8wCp#zIm&Uf#-zwC#hKHr(>O;hI`#73u#9R?z@;amR6-X@s_7f5iU*%k{U>kl9`{B zB-fjmDc_w?px7B-uG|qeAg?%BF3$DD?8Ch!(zdkFe7{iS*{ zB4oPKtte`pN}GqaQ$GYVDe(<{|#(i%0YQfFzEr_9qSO~l}PgJPO%TTJx%~!3=DN`@cuGcKdoT*)y(W{%E zzDzGCZIgah%29)iwwI20UTXVCkocL`OUZUo*KygE9q+Cr&yh3GhnsRwzu4+j^iF#pvtyW%M zt4>aCk6vcZ68*I74F)NhhYgd`Zy6@0{iL6m%5)M_m=@4XOlBHyk&J`H|9^=ZTrWcw zR)h1s4e*>z@H&lH<6&RU+y-r$-7tpQ>K*yhYdu7DHGbmisxaB|%2qAtDU!bJuN`Rfeh@(votF;0| zhv!)8-9ZhmV!3x3x(8b@7h2&pz`Pk8&2CYr))q5rY8l6?o$e;AZ1RyVYY34os*je> zuS-(Osm)Z)tSMAatE$vYuAHuuP|>LuTeeU?s&uVkWbpyR@WPJ_!wP;d2+R9hH$0bV z1Fi5Jrui1h>){)`7XF7CT&joXn}IwI%g)WSalW$?dk;FW)}EzIjUC2RGs}*vXm=Hq zw0VmPT7$&7Gp5O9wInE}H>WEnH|47(G?r_`HZ*ES)y>w8s2$J`tzK;qT)EF6pyH}Q zK-u^D0VS_=1B#h;KoQdlEMl5(k&MGV%>SDh`*Y3ke6!$tyKv5TF6Kfn*4lV%=t2I_ zgZ!b#ii)}?^76Vog;{g_rPDjZWm0Cx$tQNCD#frpmIP!MFSd?H(*ZL3&wHj{cik}`97kAc_GrVebF*ey~*+sbF&pgdrDLSyXw^Z z=Co@}ojqU6t7E0MNBbUa_qNO0ZZjThyG{Q~)2)eVxHT~K(R`DKalkyhh_U|w`ukzc zwk|{#wFKUK8TQ_T(jlDtA5x&yAwx`IQuU($5~9(u^r5O$34{G`gFV= z=3pG4zk3mUA2f$oVE@5Noco3!FI)r9vrb6S>oh2Gtr>-{b)b;7Zd~9RAD-XpP=U|t zSkaVKX;L05^Tlo>RkAL_t@0C>_bE&m8c`g#bho1Y;>(KmgO3&L`d=y9&SQ$UeW3Rp zpVi^Ggn4%o^_T3$*)fdc0(2Lz#kuG8xDUn#d=ABCc<*gE_r6n>e0S>7)E$;IWyb{a z+%cIZZ};aWZI9%+Y)j-jZOsu(*itTX*xW2-w`s1l&4yvI)w5Z1$E^BZ+H&}n z*kXvuSS)2S=1bmjqYX9aLnaCRz3BTE^mzo@^Ebi&Z-w{V4(|y+IBg%i_hF7)j?2@; zOa$}Daah8W0c;<(?_@)Pj_{RHo@D2A|VFXWJJI)b8&Lyj&)I?V@6ayT3;%x*-V=V*^Ax=I zc|KWQl_9fhnq+d#l#H+0li{^VWN^)o^sYsb?$r#^zEVzFSK3J9(jci_*i0%Po+Xu2 z-;m1jSEO>7aViHHr+k3%-f|Z6@8}3>unsjq--prfK4`a~-(}FwfOgae@c(CV4aP;B z|Nn@{>^6^dzK|l#uT@F?YeQ1~+KQAPI+4P|sU-I>l4QQlAn6BHB>Jk81YayC-rc># zeF77G>nDWre6hjtcOJ$3$|^B>Nut$R6a7J*eT0#{i54PQVW&f^yIa@FM#go%{P1a+&YJFaNR^V=uWk z(Q@d|MN^y6_mVTX4h40O{D{c+HpbvCkpr|XA7cNq^0A(nJ8i`)~e`ith`7#B)yvvp^5%2aACue^?DRgZyD} z#%9Fy@u-2e$lBm9#sm0W@&%IoAqhx4cR8pBt)LV1f&s7utNkI1jV2lNNO!7EJ77+l7;8yOY$HO5CEb; zD#!0PN8CpiA=we~W)H?!c>MA| zFl%^hD_!Ib$N-Gs`*qM3$(SgO!`v912PNPyv(V2X=vG0i5mjzQpJpRtnTw2N02#|N zWGt(pw-tLn4npe!t_=E;ZX+SNi7H*kGe~C1HE?wl%nrJC@cY(kn8v^q?J+_vbZwE* zVQ$KK(koFAz2IV?n+DxH^sNjTOAUHBoxZ}De1S2!gIauwTHL0U(Axy9{iwo+bP@k^ z4i!0z5jq9k55RG7bQEkH^vA*P+bN(O$RDin^H_|DBrh}=N55%Iq9@9}^qqVJJ&;YL zFJ-gnGnpc~En7)9W$Wp>+zh%VH=8cY_0dJSg>+tS1)Y`KM5pBT(+SxR>6q+YIwbd$ z_RHZaG6bqS!49x}6l@~$hl%js6X5$N$iB&99JJ|gYZH2AZcX1AIMP?zp7g0!Fx}LS zp{u&7bWtyt&gqxX8G~v%Y0yL`3}@0&!)`ikxPT5C4$*$Yb+pG|H|^3tMceg0r7ikD z(MG+$X`LS9*61>B6<9e6=7PSvK;M~<{6+dr9)n!ynS%~J9BV>%Ol;}8nJZni^rbUn zBj|*60v)l*q=PmEwBM$j_Sn|ZF54DvhiwP9&9;x*Y_pi#XtRo2Z@rCMYjuoUJ@y7S zGWIcVXw09yrIw7h*pl%V;f6;G&ELV?(06y_A9$-_UU>A9rsw##hvOCL6Dv)+VsA-j z#yirniC%QTDVX*+$IuSv6mF|?4!6ms3 zCczTNgMvj9t_TLkKN9wjdnugfz=XZ_OgIo1&@>7<)69rSRZ-Jb5W&C8$L=oP_R=as-)?UlhF@yZtrdzA{8dDRG)cs7d`d3J~f zJ^G{;cr1~cKY5*WpZi|v9=CJSU9R_}J6)ekcepUAc4sCv(+Ny`2R?f2jlMrlEd*hW z9gKB;2-dj%xaXj^EFJJg{@`y-n*v>EP2f~+Brt@(JRnA}G$2K|C?H!j;9n@!?_VJ` z&#zv(*RM_79mDim-RnVCfENV}Tl4?yz6gS6b$TY>}%hku0$=AlzDOSg{Dpf@HD3?VoQ7MUB zkF4X6T0z)#wY<a#=USnEF&pRDa(Q2> z3^^Li)Th2oYwF5$;yNe(^-)ia~6YGh1%tezhEN+mslDW``srL=Hd75Wda^DvD4SE&DuEQ~`w z#-RvnY_J;NFUl3uygVK1${RzoavixDIUf9`Y(HUrR+v;xW{kKpBU!dAJxi`6tx%yT ztx_pJb-GG!N~c;@@}PP~(rS&=guR-{ahEldV!zc$iup?|DVnJyMlofe^cKl!o)?%0 zpQ8rX@{om00_?Y2L}V=kZum(ll=74V!jSYy^=jfth#+zO7`asS>HT&LAgZbvm`uDpuUDT30H zK&hhQNO68qylhTkntWD4o??1InQ}^gy=qc!yIOqCe2tjw5zVN~U0RXp=e5Gq9%@CT z{GlF^%v2+imUeVHv!*73aL0;W?+vP{njz zDxE%t3Y#Wyc}CU^vVBS-6xN!`s6YdpB$$AmKO<_3wRA&#TcAIzYjup+bqn-*~p?g z;W;~S{YATwirSGsw3|_Oy91@qoWx6M^A;qw28rTkOp}gjNsyV=oFNxJy-*>fsY)rZ zp+&{7ey-}&+99 z?akeokG)v$gO<6-AA0Z^ls!t6)?-M?J=T=a?Zm})dGeyV`~{J7!bM@7aZ>s~55*DxjLYEZ=#-*O){xKV|1 zXh9x_e(#3n#(wzD0pwwWxc>nt#P>;q(iA(OMN!CNA_vA%*n&x1$bzZ7!2S@v-~4Ey z&-@h8lzF+*9=+ukJfyhAf+ddFS7 z{;psSoJ7qJqTk!0w{9`AsHHgfy-czf2A&7R!jc3JD^b9(0r@S*H6Y6!$$Pm6c@6n< z9zzj4w`B=@*QHqk=OrZ~$Hfg&;}&&F+Yc@g+YW3NTlb$5TlL)+TlKsYkL_YIV>>~| zJ8og#pKnM00R6q_^A>2Y8pd20!F*f^-?a+kum<_VTI3HK1mwO!jV5g{B9{#|$55a!zLVq_xd--a79w{`N*TZ{m#QFct@V;Ad?t3SE-yRX!?o}h} zy+$;4uQgfjok$jYyvS@%Fq!U&CF9*$WVEZC40g7X-i`&Nvu!QB{-;26$Mc+rz@A=Sfg?7aO%!9+&ivuqn0Bv_@kGlx}56iB93;Dxm@ZRv` zgwGMC2TDX=>*9cGiO_czifg23h2*=e%le`!XID^&SDIp?|%dH5dF4Cv&|nN1A2@-AkdZlnMm*p zG@pYP;3bf>;a~XpUI7>Y2Y|8pHy4_P?+3t&e_^^fzpjPsK?RS}WAQZbnXmH)iQkpv z50M}RNIZ81XaH?s4v_eMN&c`DjDYoE8#oLugRj6-@c;dKg@2P|5)zm`G6x-aTy-Fe zwh*!yvIovUl0Sq2iRaD+C7=d0gLcpb=7EJ^2&@8|z<$u8M{*IsC<{!V| zdkN3~ff*U29Y&}HP{8-Rx9xbFKLp`7V?a760FqirvX~j519T&cm=CQ*=+koOt%Kf9 z=pBQ`6;$DCOqu8ZW*^_!d=tuUk1bVBWw;4M7k$rrKEaDEbh@W2P9-reG?t;(2C!=5%$Q#VG zkOjf>YiObtXp0PL!Aa(Y6PN-*(APLpNZU~3pvYt{wHjL6pmPMd$5r|O>C7?o zKr&Mgf&*axD3~qut&x9N$RLA(2RFgb`lyYT8U3MPOHW14xa+Ss`WJ#4#?r@JDqZL0 z&{bYBUE){L1%5q!$e%%H__OJxpqEYv7SU1u2p#5c!QF!o(O%wV+Rb}FJNUoSRzA{0 zun}w+1#^JDofw%Ee5bWEG8p`9u1+rv^yvq63%W1wK%dIEA#d@cOG*)RP9=d(t7gy% zwR}3NUP_16Yv`ax6YbY%r#%`yv`b@vc4!RKR*g-xS^XewP`ga))R0nPbJ>U*qhWwg zH-61BJW6<-y^KfhAUTwbM>`yF{{<^~dTgvlpKI#T4b3rhL3bjZGVr3KM!|H@B%1b` zCetp{EZSjKNL$S+XtQ}8Z8UG8_2!+l)_fkVHeX6B&DPPd>0TN#IY&!P?$ILS=QLo< zxCKUxn{UXtc?R#mN1?xobDhuz7aZ;Lk{iS5{J3mB8NrXp#3Utf!$7SzU?Wl&*m=I zWBrWRWyN@%V;OJu7{==u1#?B;q0QWJ?ZaevZ*a>EerF;+bJ1R&4vp8RT@$Tov$Hd; zbMdCtuEDgzHJXMdC2>n9Wpaxq6>x*DWxNHhb-ekmE&M*$IsCaU3;5kG!-6@^TLiNw z9v94Vye(*(@I)|UJQFmJV}j`pZ)mb-0tv4#vxKJPmQ(kqB7g9Qz8~J=y|KP@lcF8& z*#F>({K0Di4Nvi)rQZIu&^v-#;2p=CKP8pdJ0*wTGo@J2Fd-jQ1 zJ(h`DCU2CQ?tWOR(e1ia{iN@uYF+*o)i~qMM@~#s<-~+)F0@>^#+FUx&xy5a{`k19sZeucK>`~n_rn|hF_i3 zbl+B~M&BOkdY{GO+NtZr)!zGMD!s1AlzVFQ^<@QR!ooBu9zBrRw+63 zzEV=iONFFhCYKb%WD^75fo;I_V<`3>U>SM^`X_TR4tW@dT&(fYmU(Fc>P%Cowlq_k zo@P(=sV-bi>J(l@N+7>1IYL;R6elW7N|nk_%n|1%l*nWy)XHVVx5=l*^(rREE>}v7 z-l`NIbxJul@+;-oh!={n;Y>azjLF4>zD5?0vXVUN5$3@s$!~I?;|1{kMOfc~WmuAS zXJdVpr9w^FMpTz=LseN$Tv?_kuQ=0>Uzial%uA0E<)kM|XQgF{(^HFNQ&X$ul2c|V zBqa4H#w9LOicZ*~JT2~|a%9XG$`R4ODMdsvg@|cPJ|dFIy+x8oJ-{5ih1yGUpd%%C zJ(R=mfyJd*y;vzQYooG%#xZ5zv^&baslO`uCNo9fB&Oht4{>T+%2XP~55DTewhZYQcJ?DS1bgy>dQP^33{0(KC}N zc&0OXue5i3i`U~PsKJFYye6TyXFB#IwPLN^hP5_mX@Td&zhyQde`qqGrfCgD-DdLkK^-K3W)Bmn;gZ&6f7BDUtE1u9x+$>X7%W7*v>CzE07t^oZi5;!hM@ z3!f>t<}-QMJSOLw3v%A@AQkxo=HFS={%|wq!c63nvyn^A!Fg}cgzpQn98YeSp}2N! zif%WjX)_%te5Naf&YZ#pwFU74TBq^-W+V!y&d3x^X(^WWm|iD#YnmnF+BhKVT)$Rs zV(nqM3Dy6Tn^5^oc0xImolpu&K=B(sM-8rE3{Il<2ikF5=)$pSE<8^keCIrzcgAC0 zFMRJ@9)-Xt%t<6Ew`m@o1RJA)H7+DS|+xsdB^Qy92;SQjzNEKH|7F#SHXfTT8K;z z)PO7y1$-8Y$!nn&c`P&~_l34JY0#Nm2EE96Fpwq=Orr?{$=tXFc|806O1{nfR)JOD ze8HIB)k4djLqhX8w*}@M&jjW(nb542iOgoa<5~^!X^i`!9*jdjy!Rr^g{8M6QGT7omdRwNFF5cv{Hz$(jrhL-a zSVwB>yGR8WI4Q5$OA0HmlhW|dq_m8Y(h^3>i?9#lA5LTZ52NVZV;lA#>?E?;gLCir3}3awVp2MyP6|hiNbayT$sTqlnM2+reJGqn z2UAJ7zl8YvW)LU2*lX8D9CS`W;64id5|f^xv+p>DU)hJ6Z%4n^L3=TD=Rmg}x&_;j z$3Zs~x?cNn4F>G;m}AHvK0y9(hDhqX2nTp&pa(3$1mFe2G0D<#;KgJ=ix<{u2z-Di zId%&jeuBl(-w;I`_P*gDYQGD8--LcIhj#B)(KYbsCnmJWYf^jhj!vA+>aG~_ClW}8O&Ie(dbj`gZTp2pxj3d9zypU z@E!O8JOR(ZbBxdLsziSoV}Ujv_<$sUae2M}VkI6?T=o&T{hCeCU%m@tfV#IpzYO{r z(4Pi<->Yvj7#lR(_#QGK=qf=|N}~A_cnW?2=nkwLw81NY62r6t9bgSSK@2DcZD0Wy z0jpobG~jQQ&5(Zp!ukH!`GW%7&L-RVk4%;0qC5=kbQya@(gLro803O9=`?;z`cK9 z26`Aa9poR{xCRH0gdBu0KCkzqyd95l{G5P(Wuiv~=u;VVYA_X=@a%2S>V(!@^lJc9 zbQ$u9wb0oKjpN8ZZXl0%4A1@tGJ-dEuDy;&2_O9fGuDFFS4S=2`DFkPZFyY_D?B;^ zPxLVe(=HlX$#Z z?(rLT$h?^)AK+2KiGN^bYS{OogzQ5O`%i#8+9Un87WSyEE9&TjQ4K@R5(lkRXywrV zFgI=?o4SrUdKDSjC1hk5kb``PF*$>L>?G#J3EaC^l7bw>+}w{I?1APkuoLVU1+#>{ z8S)2xA;tvxgC>4f)jda}pnn1ATO{-n(64l8<)L3C(5ghm>#?_F26C3! z=y@M@1ucg9YUu31?EC-~xs4g}16uXx>#Mj{L4V~am=*FDO9A#s5i%G&8lpW~%Jj2> z7Jbb(rvLpvw%!9es`Kjp-ZP_u5MX)}2mulj5)u;Xz1LAkz4w9&2+@1*-Spm@0UH~P zZE(fC#<88(iGyRuE$&I2II&}U{=X}c*YCI1w^*Ay8jbEg=RD^rXW!@EF-_4MmQcN9 zm!R7YRz2CLKsTMr^+?}3U3YHPL;YszYQMScEm@*V{nzMX|1CP-f3ME;KcQ3ouIPmG zQ##uBcRJ+!jrQ|kp?zQv*xmP6*!&#OVBnwJi+{VD_2n1`eRxklz2!7QFZcJ*(}M$a zb9j`lk4(|kQ8~Iix>y(4lX8Ac1N%$bbZShOPK;fkV`G==$k+`!Gf`kU55MD$J^`qQ3G|$b)2q^ zpQ0=7p}OD^uQQV}bkZ|l$Gl2)*sDedCpT;VJ-yf+vm|7lwHFmROci7L0Ibz=#eZ`?Y^0q@;#0L(o;osZ0gxTzyLjhmR))d?eZo$=$ zF)Ra8s54l0JfFu>C!gyoIb2ss=NV_o&f1ycqRpwETAwyetJ5O2JS|B}(yXRMsrjb) zsio$*sWq1Flx9m;N~hh-s;0~nU30&IYMi*+_fTWs+MGj zX<=5p=4GXsx-)akU71DZnbt~6hqb}3&Dw6?nlZ^ldX54fxPWvC{qSU|lElRdI6(-sG6!K+#|Amd^#U~jo^;lxO0PoFO>Wn(p zRvh06k4#(8@7qt5x6?>OgK|LUBZVROn&>p`E~R9g>zNn}i*?cXfD zs{=K%#%`weZ$byEXOAs*EUf6K?g|&pteB*>3V*ehhpVYP-c(O#AcstWs}ss@LG$_~f8iuq1C<*WN1>ZQQDSJ2#kRh2MK-6nLQv3)&&d7X5$_j>|1J2hc9W0usQtmVx$Hf_mJZ&pZ6Bzr_Ax4N z_f$!HfQs59RM4Ja%4cck`owDL zUa(*V^~N&v2foKCd+`uuF6KKp7Ee~{;y@)Yid52~L?tY;n&K7~m|_-Gm?IZ7S;FUc z*@ezqY9BObi^H_;BaZ%ER~@I$eARJk$Cr+NGk$UOYqdH0HUEltiNTAs`{Vfg5PTQc zl8+nE!#2?`!CYQ9tfTJb{ggHRl(1%$;?}q;W{sbsR);8Zb*#cyrz&(+u0mFonSxd} zm;zSJG*4Z=#NxYjvz^b9!*-JwJz(dx;1xTsxnJ0McK>4M*=4i$ocSx>B>!*Erybyb zU_H68g?!x3y*NAQm!ON+HQU&G58}636t-=!Lbi=n&^GP?+3GLK!wH9@{xteUc= zP~MwsV1RHjP{Ug=yT9U(Dkc+05e>u!_HnSIB`U ziT`!{y$sLkd&$Q=~-=(Wi&7uftWtPkU+DsQ?W< z6{*1|Q#A18bPYIBqkhLb)%VzPIUU(0$3y4ju>UDJ?EOHF_x?+cJ8kN-9c=5xlkh*h zlm38@=ioeYj2OVR9IlxsX@^s^!x^sspW_(}7pQ&jqxQbcvmYL?ljDN}Wp~v@ma85z zKj2SeG6CM7%0SD&f0@DU5}o_vMsyDxIeVRn_$`v{TN?Wpid@_O;xYIi#P|E~^Ax;$ zPNNT>B^Tjrg0lq93^=3j=N^m)x&F_oM&FR`c}%iAZpS1yfW{mJCIUaeV&qBec(Ry8 zZ(-1(gFi-%YkUdywHFwGKF7u%un`+B_2T-$UJd3P{vL#TGap?HZ`&1~#rXhQ*h9qN zVZICYQTAZmqW*u9eJIbc|KK_9zj#p$u;5iE2KyoC8LlkEycuvp(K?s}-eLeYHo?8Q zlHcLzt5*?Y5fPsM8v_#jw|j9F-!BsDW9J#0FQE!KAfY;Dq+@B`~ zFH`@&#vYJ2;U=_{4(Ko7j+up!GmFv@=pn@9FVoON6455G+He!EjkpOec?3y3EmIE zpTQ^KFW@Witp!cu9`c+<_oBQ&&HNEs!T;wwQD`z7P1pN8j88b)h8g+?*ZJ*$ z&?1CAJ$6jx7zt8AJ}_cY4`zT_U_Rb0L6cYo-v)GvZ7}UdlQ@Krr)Zul`1v$Z_#ZTh zZ>XJr)jj^i>pS2NcVQce?lIV?vxoDJ0e zfMEx*>Cwjm_{(TGjXfB-aFxJS16LCfZr2CcaR)p8fE~s@jJNOZ!+3+-c$F&uCGz5V zV(|>0c#1PT&RLBtxd|QxkKBdL6}@4UGwtC>DpU8H2l1QUSbUE*_9e0Sl>I0lqQ%_d z-k^7RPRm<7BjpYBhgY%VCG>|Eu;V#&kY~{9oG^V7kdbj?VmS8H)cdM9>3Xe|(HYJ4zycS@=}|S2-F>9X>U~ z(1AbQWZ*({mgRW9o=n)m9a0D3I?KQ7MC4^M^<%91SC4{t2>yfr2R3)~2ZmT1Lq3mz zMkln__fF2ldNA4Kitgf#pFu=42H#TgEr)L;D$-R`rLLIk*Ix?(Hhlb78f#D0a zZ}>{>VNc4gVSBY>*ePur`jEB^L8%)2vDOd%L2C!2t`1^{*FbvqUG!)$?hKg>XP-=B zozL+Netv$eS&t9tr)$Gqba|AQ&W#Dw$+1y7Ixbm<+_JRat$_U{qPud~&tfr&t?(Dz$z}z1B{d zq198mv~tQKEuXSROMSL$vCm;G^uDC|lb_OD&p)br(m&MY!RX=+CNkRHMbB-PQ)z=> zrobqsR;HLIqFLXDQR`sKG2ekY;P0w?1H84JdxJIyMx(VPYi(erRs~Jhil9<03#!qQ zpk^%!>ePaud72lrLURJQm}UnaFwL5F!PM#hxT$06yQVh3znfa8*i5ZHHd6}^B>yiA z9qR9aZ105kH;GK^DO_d&mlIgyW7ED6XYB|bt*8KYEEQ>W=FJ{W<_+HIwO{vI>I-a+rsynTf@$pn?oP7GzPzIsSox*#m0RC4pSpH zn6K=MqHOkskv6-+2oTntVNGwh18=hziwt+3C} z+~$yLJ?fa9an&(1?PW)6%IA*OAh zOLj@KU1o8YeMZqzht$H&j>!dw922Ks>64K6LZA4YPaWg4esYY@v^m6EK}Ih=As62! z2G7Iw82tCopx$d|jR`jMdT}HB&9SYfdVngbT~u1_p`vO(6;y{PzdA;_)yc}K&N5|H z6_~75mFBd{MoUV?OuM9t#r6s1n;haw4?4z_Ty~5qdd@Mb;1kEl{Qo#a=Ghz~b3slo zKFnrZAO_Fi^Nlvjj+x}+Y{mt!X%<=(w#;BDTHe%GMU5jgy>Wta8-0}B7^KX`NM$r8 zD!tLF)P{UhaznW}vA)3)SJ!D5Q@6-Is&<2YMD>1$u&VnULMxti2r2v6A*AF-hmazh zLr9^`KBS-*cjybRk^@iT^EEgx&ZQh%K)nSv%qJh`pg(l6*6*-acKZ-n+g+887L(fU zujKYHCAGyVp)E~uZMlk>QEG~sQD=^5ZMTHAEU<($ueS?o+Gjtl;i7#&-EDjSnvd-L ztA4ciudvzsmxHoiyh{vTr0s9Pe{~-DxR`Qq8MXg%`X$G?{M?8=1^g~`PJbnKk5YX1 zM8$SbQFM2(qPn9L(VeXD*;xvmU8LZy8dG4`3{$|YdFH7z*IIl#_F8<}FIc>1JY$*M z@@LEB#vd$`>ur|FbvC=nwY_+gw!YoL_zcJWOUcJol%kYGyVh|J2*<9~?1foL-HSc( zOC1!kWQf9+xGH3cr-GLFD{x7e0v5-~e{s6}7U#=%QKftqwrKLgIVR8ft4$vBcAMPi zoHtFF{j_PqtPf2SI({%sXtS9o%m6LDc$GGIlK5YT`S<-g8ZzUxNFXWe*tuk)7Ix@q!U8>vZaljOcOTNBolX#DC1ja$_v*Oev^0~$a9NCu$? zsJ*Gv#~w0i(2;%`aD?yOI6Pj?hkexdaIpFuibe0R%Kl)9EC-ro+D`-TrN-QSm_heJ z2E8{B`@W*X{Y=8!NVs0yg#V%Kw8I|S0ggj_e9IB$2+)3%`u`a10Ai1GFV0D7-&54y zXSnu%&Ls2sekAd5z(Vl6H((KRE{>$4!=J6}9cWH1@7?5c>;j4QJOj_Cbo?K&3;uh1 zafA388q9gPPr$Y71akyj-EcLW<{pT%^oMio!Qd3W_i_FIGWx^))V^0mXEu{a&R`gD zWwP-CfdD%m&fz^e{X;bNRTAp{<2d>l4Sol*@39a5Q@wZy-!I|s8MybuxdF}v=h=q> zXBnKCSBSv_#DGwGUZ?)Q!95_H%JFe*cmg~Po^fQ-9mFDVGy|>&ojrgH5wR>tayc^j z7@MD-Be`yKvL|0gpZSs`{0{>&HUPuC^Z0xO?yc}Hg|~x`R>PZnjeCI}CJ*5Bfz$0N zVsM-1fIP<@kQd?Rbd=T%wBWaZdxZ4+p=cuG5#s$=6vVJl&SRn21eWphUUU*p`2PER z?i(&%VF$R6wmpI0yWn00_iX&F=VJwrqQSr$@e~@&v+RR`bJQ#7P^6VZ4v&g?!HOYx3l;#N<0~w2&yY5bXF*3s^}3LEwLcoZ_*egR1L9xdcMj{gQfTUf9U2JRpX zkRLYeu&n?jppC%WXk1|HV`vWlg`tIbfGHpt7&UheClC;3HcXCL;v^^7#hT9z~*f1y%@_Y14g~=1}1?35CbxRu?M3PGy+45G4_Hi z0L#D{un}wr#=R&<&?C;mdX;cIg(mSXy30S%3V!`Q6odEA;C*lh^ynWBqj>dSxbWJD zhc5^R$-sz338(?hpaVUk8?FUtAIsob1Is2PsU7&Z4^84YO=H}PasyxApxXU{X#D)Y zobPdlcff!5Vf5%9PV9lu5P%NRqd$z}I2rWr!T6cy)c%MT^DpZDe^B@T4LiQUjxVv} zFWB)3c6^K-f5wgv*c)?)`;^|JP2Qyne+yOM4HS`AiH4zhya=8L&w{7HQ@}P1-hTag zMa{3iz_7)Lg*&mFinbGhZ;AMo310zR74X!ttD*&t4*cPH7})U}?06kJUPXU+32o|m z+T!$3Qrv z@hb(sYP?H=IVE37o>9z(K^*9 zS;w6+b)@ff_Lr1uf4^$&?boQ?{oA#xf46oFSfp(O)@aLs?b_7;kk-bPV z9U2~{eIw$udt|D1j>^_{o|Ce5bh$QMIR6^H_oq>FGORxn!MZA(`8AfrkqfV&qHdO{HhwgK3BcxFRGiw$d4XViymXZxYhG7`2KsiUyh><5?NcuvmALX^T zHe!ycI%2u0DtxP{BJ8lKEcA-GB;anL8`qQL){3j=JXLVqx|7k?+-A27wg0sk|Z zy-TqBm}ZahdRHpTPGYhsy1(Yej8<2SyEd8&ymQFUyM zDq~wr<*~C(rLjv*B{7@LMbQV%1yPqQ`4P`r^1?r|k%}gAjw#4yjPV!M>QlRRRB2<$Uud1X}RU~Dr zJgG>fNmVLNYBCii&N3AwE;i>UY&7R4?6+jcU9@DzK4Y;)e`v8r{$R<7uvs#~ZRU)y zUKm=`?}@=n_IvlzF?P{_>^5r{0DZ)asRd? z$Ji{%(IBc9pOJg-WUv&)|Ht6IQpJ+Dn)PiZxqv;3dA~i!tcI+Cs>vFoiYyP6X8EZ& zD_DhDQJS8WsQj!9|$15v8P1gKerRSHJ((-FfDfw;Yq`Z0NgxocjxSZW~G1+JBqB9@2 zi^{lT7n%0GU1X}w5}6E=dhr2y_d32lh2PiVz1U3c-%9P@!u94xYX4f+`TVY|)JX*; z!<1LzrtA_gWtR9Wqa;k}C9z5^Nl{8kwvtMUlu%r4iYsn4#T3moM-{EML>Am@37>w( zE;R3!T}aM*mXNG}T0$~ymXHjP-U~yEdWD!jj{gtBd%m68zmpOOtZU<59Nufemf{*a zWmR$iMb#LkRe30-%1=pE!Ah)(R(w^G;;J$gQ&pg-$|^-xwwS^zy3L{GE6u@WJ1v2w zr!CWpAF~7${=pJ3{U4TqJewsT7vz9!o8@=p-3!G0G5mi3-m|kAo90j+f;GIJGm~r1 z*j0!vsg39ljUyG078Bd(t?0&SifW8dL}R?d8`Blqkgt%2as|~lXY z*|++n*{AYRvv>KsW}ni3n0<J8q{pN0O=;i$-tp$cywtI+mI3U2pPP}>JvrDDJCB-gZh!gD9Vx%L<27Zjw|wSK)8si3@|u$%&pB3kbQj9KyH*oscgU@4 zvBu5XqOqOFG^YJwxy*Q5E-inTOCwhh8$i9yG^WmGdV&1A+0D2B&jomouVakfz}T>n zx^F%GaxFE_YVHBSo~RYnz02+8v3!swE_cy{W$toYHbvu>1<7?;w8kt=k;~FNjapKv z5ld!h_~Hc`x^R;Q&*wgnxz{wX`?nf2>u(y|$?CZswDscYF4_V9EAVjJh>mWhPi&{| z0`s@A2V)EOA8aNUHc@-?dv7pyqoamz8mwU($7sk#4-MYvt3ew>bkD{(_1|EX^ZF9? zUDv2SYv;&e^;+4l+%L=W%d#waNtT6wk=^{CWIvbH>zrN~+Y=sILp#88dI$MU|IHzk$H~?dV(JaqeCTvTqYX^o1G-tb{cmp zf4un|jy=tV{f{VY|Ao+GLj>-{BgFave80fCPf!N#Kfu^@kg*B0f-+zQQHRi?4paLc z;r@rC=nuz4!}SG2fD2$Da(pU?01W=em~@UZ*c~Q`4&mVb;|zR{V&5N0_ix-I{B$0q#@q?tyO|T=Pyd zCcsw=S2|o#7ugSVnHXH*9t=*?|3UU2T!a5%@CbO+jsd10liqL!J2xg^9|qoV^bsba zr)ZNW8E76Sk#2J0M{hGAe@p`WKu5p^U}!LB;XcT(H{;{t^Xx-`v*Hr>0^QGjC|Ah? z_`GhAi-dCUEn@Hlyib8=z_Z|a@Dg~%k%6C>yg3FD+zWjoltm(T+^I&VVBq;58vGqj z`u5vQQh(*M*zwxWx`5xu;NF41E8v~QM{4mi|2i5BoS`?-V4g&Sfp6Gzwi86n)Gf*s%2fko`o*vkbpocbGo^tZ3T-*t+3 z@1b4S5$AdM+zfXK-06>_!4NjT7x+G!S7?JbXoI(CgWtjNdvFJQ06qjCflt8~;2ZEA z_?H>oWiW-yL=X(H!}zi;+Y+z`;JA(8+Mc+(_rjHm$G|u+5qJZm=8gxMpa@ihCeQ)8 z!9uVctOc9FPGIZ>ISwuW^c5PZS7G>$`my(Uq~CG;3VaGa0v`gKow4u2sI!f0{RRwM z+<`xc2B{zql!02%0%n3aU=dgWdhbQ~NxRS_4x&k%qGrEL-FyqJ;5TRmU-j%eF}S|~ ze+GX9{{!9zHiwb?Z|r#(28P~!?Z(jugn|TM#G(LH0ONU}t?dep4Vm_rJ{xeDYCo1SaV%;D3>vzFhz|bO$DBT7&ry=}55EwPT0pn*QHlAP_ z6>1dvM+#MY4$WK)S0!FGphdKD=1$@{hj=c6XE`adj_P_VJbUo(7!kNg>fVCyU8?`T zp@#Ho9*=W;3_JoJ1~zASO-{VB2M&N|P!N}YqmzAyHuf!hFuqhA+EqF}<-$|U8LOzG z8;~Sh(IYzHnS&;=knXtx-`3M*w^OU{hy4^>*U>&+CL*7*5923VsP~(E?&Ek77@5w9DhzaT7Zp!H(<1 z<015it5o+_$c_8hb#We5!niBuBoR7}S4Uwu0uF;icVQa@zrDfViyvh|?{UK4VZ_r7 zUT=Ie_FzQeR}x&Aa7|}_Nhy9+!BCGst#Eac0dvt=mY_wfp(VD$bAW$m(LHX^IO&(?$F=VkUwT_88kvM1#f zcAUVDWAxc0w96s(EbT|T+{@mW-Q>njVzC|OtwdlGf4zbBSVw!TA)8mjy9%tliyr;M z8Gi8LJq&r^WhY}Ow!`Pa=q_XN)024m6U%V;;&s5BrhS%d?Y1k>F1s@Au&>cJ`zCF1 zXxAo(Ioja3L~9+_YqjGpt#CN5W%gIK#O?(xvV5ckmhUy+%r{Vhxu#wi`pZwm;?q(1 z?+U*gQ_NVV72dzp*HNeKhw7-4n-28z*6sm;+Bq;%+Xf|Q^Wb!C7@VVZLyEL!NQG7n zt<#F3ty(s8mX-`#s71rpX#UV0nmhEEx`$j<*Ps_QYv8Bq9QdO;2B5U|??qcb+F~Tb zE7Sj;(Hd)KYTD8PA zPYYd3G~cyK^IRJ>XIzJ7kDI4i<5sBCb*tK452JTnR*TECY8w5K8b|%8hLMai+!NFT zbQv4`e}Vtqsa$4YSii+oca`H=f0iEDvU7r?HjW>rH51*m!rdFKB~S}JA~b(eyyi?w z)$B>xnl-6FGd;`I;aR6P&sMd1&Q`PMQZ;&RQvIX@s`a?28uzDFHQ@tQj{mnR+}Qbs z9#f7UW8j~}{v(zh#@5f<(ZnE@7{sv52xr+7M9s-_T2@ZxdnbLyX#SK*nlojpx_m=5 z(>EHOB}p@UGt}anr)IxmHTqSn-mh7;ezR2Lw^&tv8&u)DPi0fitCY#Nc=8<;d3~=! zPoBht4pZpP=nK9j2Jgf58oW;?vb0E{)=Fl16Hg4HSpJ2ut_$d=IRT?IYuZG0pvBAx z3{*>Cgc<|m(OFVe83w zr*fvCfcpS%o=!Cx4d0ig21~D(;J=m5+S1CB3mi;i$;W#uh)s8BA9aKdQ)}2bHHCSp zKFnXWVWFxHk5Oe!s}ER-lpl{^OP69TDf8ODm(PFDKq#nlQrmF zQ^vG^m@?2|($Qdg;Ab-#+g^W1{GW&O(QKB1xvcB*Sc39;6E-bPqRxr7Q*+c{)klv} zO|*w9qkUB#9i-CeNEJuNt1voE1<^Umk1keTOpS74T9p+&N14&9Oc_x-O=*!QO{o!& zn3BVOXG#wJyD2HyW=aYIfnb`=^f7txTX=3~kqi0cLLs?O#JZ+{rFafH5KE7?B$Mjm z`R<8?(JD)rsNw`46~<4~^!PC4#m6cqK1JCHS;|T%lr^DB83`>)OXyZg!U|Jz{B~1f z+zC^B%negq)Z3=G$iJE5!fmG5Fc1ntY^D!s^EZgW)A)Y9m?dx-YrAqvg;Lg5*fXEF zw9+<}sm>}%8KJ@yH|3{zDK}-RvQk2nnG&sxlq996T9uYMT`8#*N=|K3VrrM-Qwn~aG!3aPt2f1 z1FM?p6WG#-ZG{zFYb@!f#NrW(D|S;%v6rHYrz)~IL=nX?3M)=lXmPfJi;ERhT&rnC z?FuMbq^X6QOuhw&O;hr(ntXC!Gx=nHZSu*qnS3%pI!LpbenTESgRc+6ZzN)TUjli z$~JjdERa|E26>hpl1Iq{axZ#S?gd}TBcH2>xi*tW4#>8dUM(RP>KU8hy3j?bIG5Ug zKBXX7&g)sTsW-4Im*2;=m=)O6Uja=cHMPl2eobDQ(&R6nM!t)*F;-rUY4U8$mq%lT zCSv%6`gwAzTc>fg2Q;?&ipEsFtg&TZYHSHB=VDOUix+5v$B6p_aGitq*kbzSGHNcc zj8dRuA$@}Pv%1;$j*WgZ9p%|MSd%(kr9*9;M|wtI*$$6UFI zez}EtVl(}6Be}4FT;ToWHPqhNGPF#yBApcmKh>oPu{h4a{M=Ig!u377|(K`}_%LpyNv1#bIz1_kJ|Ul{BN z1_JCjfE@=W118}Ev2?C1lABoU?WA!Dva!Ks`~BGW2KIde$uA7V*svD;0iQ13OFQt9 z!v}He5aByah(HxEp2-pq0&&Ch80~PJdq7U`42qN3KzLc~f&L`ZaNvqF#N#Y>oQvm} z$9wgBZa$r!fF8dxB?ykS4|A`P9)Y~M8+{pJf0*vc08JhMpDRYI^M%8_#F;jy~W?Y zPXd0A^Vo2KS_kgaaPNn6<7rX{&Q|!!;IqONeTh8~gwW$EdoUj2StK;6-3@pj1&;wv z*|R6*DUQ!r7JK?RuzZ^K@;R|}0z5v%)xCTGT9*k$;x(%KOFWtqf9A5{&1-}D-bU>RJ!oCnU zG>HKAgd|ZsRI+$pzy&;#>r0&a@(q6XIrv^D@Ou~BtMRv+Uo||)J`{M9;SGHP4F=9} z@C}2{={3&JpY~`k?{It<{2sgq2rmU`uNLwp$G?Jq4(Q#yZrsFfBOW&FuwjR7FQ74P zIAb%a|FOWRvkeWxIJyC&<_-k0ARSBx<)9wS0JFe6uoxKCl5y48sNRe#*+w;a7CZ=^ z0e_&<{TnrO?{~5MmE&jNLvRQD9`x!SBRCowgn`ju0x<5$i2z9;8x(;m&svfW-|~l7#VD!XaC1t{vSAg3ht;6y44VP$Krt}Ly$jyg`*wC!+> zT0|K>Rik?}z%@h1X_uqu4~N(rbAa5~i|VnPcXwjlHaysZXPZ#BHp08%|G?I#*Z=R~ z{sb7^-5743Q$KtiL2Hhu&Aj0ZgfEi#C2-bs{K|o=0Io9ps>Yv27~9Cm+3+nwv0M$; zHgu0eMCCqu@RKrBGh-*y68Mc?ZD0{FZ1~ws?d?d-$1wYvGqnc8+#}@1{q&Rb^wm?e z$x-yNgX|sKOK#lDo|K)~u?;)6@EqYy*s+29ScitX8ck>=ePucSmSD*uqCX$&=i<>E zvUWB-$>`O_2xF+61`Hd%h5Liy4Czcax4|Q$n2rWBk2u<+DGksO>D{VKGuf4%1SpP{+^yVX5lsk-`a)Xe_-)Y0#p+SrfM z>hwFcIQ>=4>_2I8WTbKEg^g+VFL1s)k*S7h_vU1#!bwcKXK&mJgezCBwtDaCn^Nk4Vwn5t-^9k*}_iC7L<1S{);s)i!dbT1PEZ^T>5-9Jw2< zoUIia~(;g1#HZFz-$be3|x`=8SEi_c6^J=jT11EQQ8d zv~o22AI6N)g0UW&>*|Nj5{%9grB1g5wY#NhhFi8;+zQn+zCsP->s2?tT{Yw9scQUc zRl4m`x!XyVy53On*tb+P=5H$EUZ6sr(Nc&WW8fQ>9e1WMod$E6BZBEYk{ED26hf`V zRI`S-ES!k`;675bJjScT!&@^Z1*pX{Oii9KYVb@_ou^ebo_VVBDp7@3jmo`RRq8cI z#a=7WJGQI9^SJUot}EC5P327ZRypGt{oMdsOg1{q=N_~J{=WeCV{zm`0?VFw))Ubz zyTYh*_+7V;quRZPs@2C;jc76TQ~Xrp8>}i{?tk!&SDA0BN`13c>|3NFzbX}=#pL^S zDc5hAa(uTc%lD|PJ`X8l^6N_X`daCpjQ$?Ly%(PlgSX&%7Ty~vv_l&0kV-o+m?D#Z3EBG5+k#oC^GaFZ(EI_g9Jk6B#TCb68vE(;vXjT-G(% zF^|}^U`KhBlZqmTX?moqaw9#I6*)!L$Uvn>hAS;HPN|WpN{-4=QdF@LqiPi&)vmax zg^G#VpyoF$jV?E2(alr^57+SZo&UR0n6ZG@)2w=q8!L0 z7c!~+X`7M+2j#{MR#u#gGU6sGEsp0u#sw%TE>wweF-nL_R$P3RV&jVx6JM>U_!)|f zpRe$^^$LsKuaKBa3XXa~!4ZE^aM&*j4rRA&2ngm{3T^&8{ysXLd@La!D<}`ZwsP_j zTW0fqV>Z{D)6KFb4^V3INF^t`DKXhg@yULQO%7H}a+IQy6BU_aRYXdG!cwXfn$oJ^ z@{j&m{*hcMiU8qUQ=!do6ZaeNUt+m;vX(vpw$xA( z@pD%xYs^B{`FtjY@AZzi4pXevRnb-tMOvpQ!WyVBYlK3r@d~k~E6B=oD6JI=ur|v- zV~+eX*2p(~k9<-u$~*a4c_)4<@A#kO6UUXJm|na=9y|rdHTW*p(I=WHJDMqlI4*0T z_Q#fbepi5viPIg>Sq3XS&qbkm6BL~1t)M)AP0I^aKwhl;(PI42Vtn(;L5NbH5N9sP0Q{viD4Tj&#Q^ve#$$9C$i8I-2j zQ->}2*b-lER!|B0L&*sFmyDBN$t3xf_{pavSl%U3np~13Pqdgx#U*ktZqUS{E{!i* zA-95Ea-DuoWAmQY*zAwwn)x5OW^ffT9i-Xx0&V^nvA@5S@v)OyV>bP=o3Q~H&qV3u zy=v^r!IoHTnOf6FldA{Iv)V{J&eW)i zpZHvxi1E`)Xu!utqdTYgluthBoDEa8s2AHMZ-X zh9&A>zfJvWPpNP9P4%s~qkd)oQvVWGu4pj>idfsy?hkSPi}T6HCFo%*7#mhHK7skm z>6g4$j6JFRJ`8)jiO-nX_8Q!Ej|O#()ID8p>fhz1ezW}5cUHKZW+ljRX0{w=mdUQO zMV5{QGPP}@fIQlJ5xMbgnd<&3Qw>+os#(FJ$2v)7Re ztJ#CG3O$V92Q6do{Zi^q>~db*N0!Bd2-avY0eGV+1R5Ls7+li;iN2toK@Owmu4SM( zKqq^E#$}b({xvrJ%;0CkF0Kd@`^)%#2JWMq=@Z-Nmte_OKDveL|F~e?NbQSFQ~AsU zFbv!JtTUq_^aDc)nhRh7vTh1s;9Z-*gqwpSbf{G%_(}+t@8P8PVcSat_zU!ypRfxX zX0z8CAB{(LoZ8M9y$iWwH-COF9{~;gOiC+xC$%T=2CiTbum@bs0@$$&J9do$?!XU( zgA^K-SnQy&w~<&|AlP&cn{Pw#Dfayb+pu97-+PGf=ioW9mt5G-xBwOtwl+|~>r9R@ z!2cl6!2lx)-}>1Uv?AIWY;-Hcz{;D8Y^w zV?aJ=Wudg1i2=u+zRg9WFTpQ-o|@}8V*);Ifp-bMwc}R>oLTV2z-8PE#4@bsc`O4S zWe>(JF|dLs!PDRw@GN)%ybN9gzqPRMgqYkJ!$nhH3YU0_rxNa4>EgJReIhva(Tgl( zKjZg5=os8P@pmP>UHDoHZyx?7;9D@9Uhs{9YY;pR|K)iFp4Y)|?&2+u?|?smKY@?I z7vS5z=pdum58^{16$i?}0&p+D$zO2b7ej$C^ai8OHZ%z1XlNoHz#l|{6p#Z-Kn-XD z9bh(C0G0yd>hUJ96C40WC3^)tj#lsvRsXkCS6o8`UxSZ;!E9&{uY+FQV<<;MgD@}* zxB+7iMhJ)l8NkqD%0Mk>M$s_tMVXEMv49G4DHXNx{ji(i+Qq(%L)>9-mb(2K_4A9= z?jKRd{z!OwzYq2|91RWPHZXLEUhTus91IPjFBk}1z$6d=j94VWnFUV)zLazRIx4o_ zdr`i|jxVX^KF5yFu;UY|KjV8*-lsZzk2>>RRF1c)px^9KWSAp4+f(2sxB;$%UT#DC zumD2`FzgtPf8&Xv4?YF+*D>fhsc>ZDOA$&!1WBxSEsw?9Yq|7p*jk87O$L165~xCjhA!iL^JFa2NqNA~azqTR-zAMsrzXfc0O z7&=5e-lU;@aN7YtVvJc;mRz>|wlh3vy9gR2Ig8sTcA(w~h!un?|Q^pdS`93WFK zV$qXC=Y3l0Up>8O70fH|;=eY01^4^F=;*YYZX6^> z_hH8#?6?;@c4EhN?AS`XY$89n^8&7=aBa{EKD7*`W-%?aklr|-emW2Cx&H@-4UF79 zUkCPEhS}%(FyvZ{;n)m!fBYOln~jHaGJFB(EMaiP!j%j|2EOIe8w&Bd9KKqZTi}~T zk6j4Y8m!q#rW{A>co22Oc$&+XMC+FxrL`413@~ErbA~hHmdBSE(r$sP!NU^N*@A_3nPF#)j|k`_D{uzj0&e_h1SF7bi0Hx-yUP z{^tG;TGM9;_wtO@;(n9RS$s8jK#*p$CuP>acy$g+QTw1wwGEoCR=&Tdd2p>72e+zz zNVjT-ELZiAt>_&`R6gjcN(a26lK!8oxZh7Ic1HQ_3!E4`@&At#m~Om@!BpD7pShfA z=HO(ec5GS0w6kbvKg}IBQeDHxt8>I;bQYffI4V@lqoUO~IzjcL(^NY;N7XJxs&c7R zg-eslTxO}nWr>PhHmP9rA?1&{qP!6=DR=ni${EJ!&%Ho7=rP&oF@G7$bOZl$)0lok znc~A(&#)ZZ%g>v!W%(qQT;tFm#tu=d>sU37^H9Cp6xF)%{Dtx1>@A5^`S@g&j<>3K zLcWS7l&N4sz49k?D0jj_G>;9+8oysww@b=!eO~EfKULb8pOuCdlZF=}a25{1r^ zpnT6X<$C5Q2Q4Pct5#O8Hf4CtSDM#4rFiaD@+5k-`)wsm{8$O&e^P=Qy8+Q*63}7( zL=22);ywZIgNZDUl38{o(GE=cYgh(x*-~w^PqmM;%J^RGVzih-pGnG}GDW#l0@+u> z_diVG`HNGMmBD>VX}*O@^{rB}Z>y4g=PJQ>jpC;4QLN8-MNfWO(Ow@ZdeVOsjSdry z4ih~Q9iQ{R%+l{B{P(A^#L6HSz_wJ@6bYn=kKCy|B1@<_g03# zztRFiloAl7TVVglo`6cAU8-ccvNxS8^eU53uEUEa~BG&%C7yu$Bja@fD* zjRxb52IGw`^cs2aL^kV9RuDOvo{zv-Cq_8K$Y$ zvGTLJYl_uJK349*u!hUa8YfS*7!T`o^p0vx$Y|I2j78`STjZL0Tw{|T(U`!dW?wC_eguxo@SSdCo!`Pbdj@4=E8}A`Ykh2~sbvoawh&?kOz$J_{6X@{ zAFWCGZ8%6ff`jBsS%|~8eWpEp(SM+QrxUT#q%|=Xrl%c98v%LYwDN# zmilM?UH4d7xuV4kLW_Bvc7G853mwc;v+0*}sr~0uI)eqgZtY?Z2DW5iO9XM5Labct zxd)_fpoZ3t)R0;?4XX9jz}l%AP#dcLwQ+K;vC65YSbeG+Zsk$y225SY*n~ZitI?vc$qhS)tf1}#THyo+fe~OF1FI+S2Mqqph{e(( z2AM{Z8^afFL6bR&jgMp7huHP+o_#24ry}EA=hibmZe^ap)LlD~PB_lqj+xu2|3M;u z9t^wyw+d?jFavEd1MJv>9b2$t3wCVr0<_8IIGiCC8>v*rks&?DG)2iv}< z^Am?f=nwdN2F_#ml8<}%lf9VCu@#g8BvwVzlztp1fMLK1u*f5M{tHL`cmNmk9Ivo!o7{1-(e>oOlhV zvEdLlfV1!(hi5k&YdQa%lzQT~U|a?Kg;1IKAPI2XY;1HLl& zGT@1VXDS>M;26RA`x3?;-Q{79oTf*2xykVf@Dz9kJO^F`uR60p8^whx;&CUE;wcZz z03`o=5WV{Xdq%!sA^D?@z`gwpZ2<2~c&jhd7w@Mp!WVcQJ@QfX$XkpD@R*MfHu=vsUSHsOBu{~NS>2BW zhW=pedoYfMCNcr|f-sN(te^mtgF0YTCY@joFs>G_0PDb3um>EaZZV!o`Y4s%Z_o?A zqK>wq8T38}#Mp!J26zcP4|=%`&7nW&2Mis=*n{B(0zouL0okAkRHA*ddiE@AM=RJz?R=67_kQ$(+tkp1qJIB3{~OsU#Y^gdN^9q2Rcy}y5U&}&kA@ppcU+(Ne-d{ouzqh z;PdNf5?|AVy*p$K{lm~cu7dkP@4Y7`j{nsl`rxPW9L{lYdhwUj;E6!{NI?6@pf1is zNhpD%5{`N}T8MTB8bLQa3rWWnq~%7^a~GBNQEKN)Sn)JE!Jpv#i5j~1J``gg#wlQE z5x;Wx+VDMk%s1>u`HWb6K#lo(O+otz#*=9DfFx4Uia&XrwHS^{IO^yg&2*D?be7p@ z9}D4H371jv??wAKMb&+cmU#`0;5+n)Ufsh`&h`Q$d;YHtefw~56o-oA<*RBm|4caosToTuJCOWl7OJ5CahV^rRUskslLKJ24v-_56X z5}EC^*jBQ2Gpw7yMqup4_^%Crh1=*(M%QA7=Pw4pI}*;`Js20M`>7D%i6wr?@L2IF z4~GB8-g|~ebzN=$OT7#AIwOr{B#kucWz>5|NT`AkDhNpk5LNWvd+)u7jxiVv7#Cc? zCGK_-mnVK4$Bs+fyAyZ&|Jxdf?Ywz&JscvpYkah*!&a`ucp?0^mHFb5zu0J+)*fU})fx4Nom5r@JER_+7RoJV;-U#fC zlJQnOGS+H}jIo+0T~>Iy*RZl0q5l%zY?bs-?@NQ94{( zq}{bkT3si}NY^k20iR ziNyyj$NEi7_mRwFyuX-fXND)&U$}cquZKp)dWK84SB!LeCrO8QI(tiUrOl^UT6_je zvu~|5`8LTg-wtW;>y#{f82n zdQ+L^S$Z5uA;(By=>wN}29|;vYZ>G3E~5hiq&-k4twBZ^85EDsl8Vm4^B;oqrCw7e zb($)v(KJf6rcH)u$4aGkx>RVFN||P>lms7_VkW<$fY+qR?`tVSiz!5lDMX8T4I58k zPhAUiK8tq9VJ_#miNBX6vjoI9y%Ef(q0TZg)JK|hT4~gUOTEq{wP8szG>qpzgyqPP zFz&youapXXos{WYq{J{riVahx(6C7I^_wLx?5O01-YePK|CVgcKO`rZr*fmkX0u^Z4C4oh8XMGKiVfd;d(V0ChK2n$vEcpp~$xVoo z?1Xs9N=TKA#B51VERwXOA!1HyMC%wO$w?C=F=;Lu!#asiI4E)PmnAm#1&K9%A#rFh zacD4ck^GK=zaPZ^7qNM2D0xmTOJ9~LW7AmabI!T{eJwfegZtvz%q9jePTF$qLDQ|d?pSd@8zNMhy8QMV~)bbon#U8CFS1?mp3GUl2{^ z^P)-rRJ3R?A!slmDcoC(I^K=%&tm^*8~w6_HSTCqqEYCAt@KN{jDkx&TngcuTyBrP z;vrF`0TNjn%6^UrF_gxNzBEO`O0y&sEheP28ogtL1ec7Fppxlm3@gOHh-YyWoEP7` zXT>k)6Y)of@kfUVK!>?jO+2#Py{(--(M7);!`LvEpQC%=z~8-a8U?o+xa8A5i8a<@ zsB#e<*C>Tn1&g+d`(Rc@OEC8k4&old0YeJJe@LbH4jC>!L%PJPvJZ`6xeThvyJa%iJC z{H5c*y#Wm#?C07)TinLJFy&K?(MmQ*i2X zM2)@hy9BqF;PoW}0~~tLAE^6rY#&-jpV-LQun9>8Oxa*@Uk&dUz$qC{5g?FrT~=aW zf!DGU9xLIo5*{nzu`(RQgG_)gmec8$(SS>+z@nW@Lg(T5Jp4X`8yse!KT!TrY#!K3 zpV-bA4Q7L}+wcMJm4nPJ=y4zvcmsRD0u!g*ge?O+?Xrz_*%m~D!ed7Q$6Wp%%6X$1 zba41aD!Jh_jr}bAKVcAp!*cWo%ISyR{@t9i2by3KXam)_jxB$Z#4#KM?866ug)UzJ z2f)D_IK=Ug10CFh0s{#!cpOUtw8_y%{>B%FX^#W_eD`6>_z1_tVJG<(HV#qdcFJ20 zrcq`mbtt1Ti;xd)KNPC=rm2OmO{e%&=#$js&w$wh9EXt3id>v)`oI``T z0~;4^;3CIM;4&c8NntFS$O9a&fkz$DLVVFB;PIlF}Y#1b7+{ZX}8~ z_Jr8-=B zfVvh%WhlKsWlS@{e6SR(A)DMncCGHmp?>ARi~Qwjw2$}E3@lH{eVgNp;3@D3xCSiG zA5r@+9B*I{$6#=CKT0OAl@?QpCQwg?)J*x7Yf=6IkFUtuzJ$l;@c4|(>tiw^_3Qq- zOFY^~IyUmLq3#D-P5BL!-%R=K__Pa6VmunbWPCdV zT|!-pvVwYUB24$v8mG|;uA&vZg3W)hrOR>+Ns z^eCBZKRgbR_aA`AK6V}Kmc``j>rjMtVdn&`b02@Hr}+NBFmJiD?lO)lk`)I=3K&{s zMb)()MH}PJ-`Mq~UTP0UB(~zQW5$2k*eGPbM>(Zep>Wh;XE-%(#a1VJ#5nYbDfEyz z*ji3>Y?dXkTgbpNj~1LmWX;CvETDRnQWq76Pr>Uz*}o3dkaLIFGZcDYH;`dD40}=7 zN}xnDcCx8cA$H2}_z+sZ4r{}yQ!BOaLLcbG-ZX42#Ku})?WPx>!9x$D4ZOprKN3L} zl~W!6fxUNt8de@ofTiAA@p58=PFg6X?Gf}TZ?4(h84$)VL?oo!VA=If3 zEusl~?Ra4fiqK^Cd(6QrE6_c*(SwfTC9VS@vPYpms8Qx7n7!mR2ZlJNn)?~jE_gE? z48lG`Eh9U%aHWpEC@Na)h2z5*Y;hkNQ9nOw z7DC-3WSmv3^jIZJw{^O7+VC6<+aeidTOn?j=(1UMY2hxCf-iNU8SdmLVQf&^s1LxyJ@6 zbw4B}Zro+p`FSaF`bdf#f07~xMqqpP9t5MsVgJ5(rqU#)`ef$OM3x${tZU&iGmK?d zAZsdbCmHF5{@|^VVLk?F@QIc>-vo4)RH^pMk}AIfsq`zC3jd)}=HDcx{-dSXf1(un z&y#%rb&}_IKyrL9O19VUCEN2e$@X9;9$HMc8ydErSd60`QfY?_+JWh9XF74fw7)2U z^#s1@!6zd%cI;{KkeZ+XsSXO2A;A$cI5#NOpBmLs_=i}G2|mR~WB{wWfa>5EFOFvjL_4SgW zZ$s}GC#i;6lA>QF$zgjXQFmSvLY|QX&Bv064wHxulNdnmPa8Z){qMx?$s(3oCG>}4 zmU{&(J+oO;q>&=fMmz>ZDk2@FDBMd5A_Cb<5-Qmd5t0=VBN-7%k{+2MX_5J2MvF-? z)=ILm1-+w35{z8`U|cD&k-H@(;+&WaPl+k)BZ)zSi9v&j(USY)gZpz>T4DEiIm^7k zEI+}DQr5PGqz8O9inggIPDm^_ z$$5vewj4rgP)0oBn|6G}?JlGw)mkzVTqHH#Tax30Br#qm3GrP2kPs)a2`LhjkS(Uf zVu?+OC0=fY&p5Z+r#oJKCp?wkf7<%w|XSl6XpNN`S2{lhaXILVdl;c?f?GToZ-O zLnG;zZREUQNh=(h;lTTCaH)k$A#sviVkO}P&gd&%5}F?<+I*d8@*^cUKTZPkQzd|X zG5+~w;>-Pmee&DID}N$71JB~f-6HPUC&exEy11pkEgt3{#4{EB0UhQl{=Pu{Ph#_6 z2kB84V>DRO2?zcj2cI^$4TVb{?L$+GzQSGt%iP7k%uoEvwBl1{5brXRc$FoKM_CqH zM~S$V)`&}Kt2meTqBG1BhvJQ5UwB09@~?`0E?3HC|3jS6VVu!n?xFq9wa_Q9xxa_8 zVLTjq(MLH>9}9dWQ6i1$|ZC{;^ z)=?-n)m36$g(IsdWXNoOIA2TWK1Aobn@0T;PW^%;?q{(=i@A$Bs_T)CVsqal(!;5Y zk$rIBIDIm*!UUv(as2L#Z!$))_a81AiXPNtCw9Z##cH@8Cul)9h(*X`z#T?M8a9|g z1?Q<7s?^UQs8^wV>?e7-#E-}?z~?gx{TWW^57<9Vz5B7bms7VwdCeTgX^wp}kuTtb zoFhe?lR}ju#*lZy$7QrNAG!c<29Q9Y0}Q&O66wqgvh8%XHk{mw6I&>JsdH`0w*|dP0;Pri3MyOTtdHG z!5E3lR)Lw&9Z4xQu}3yB46A6P@bT+bM}fp*|Hi4baAjjQ03$LEP~ zGAtwagqIV1<)5(SH9Qu>V=+7yhk_{HOW}ww7S-ZZ8hk#a)qa%OC*b}&xP8Ec_!Hd7 zH?Y&sNBcI@FQK^-%-8}={%+ia4>sZhK92@k-~m4yz~b2g-jfYhz!A_c8;H@3IuHXg zKsoOb#Oo#zXe)7YKa@yEef3a3pD5*Pcs`HV*0%@=lFp~7!65a5R`=ns^2kTSOJ zV|)fv_Y>-1U?0js`T~eNgbsC>JrKaEAA1T%Zr~Wl;{X@`Qx~}npPZq;oP)=}bvXr= zflzJ4I=7CAftWb;BIke2_kNas?CrwND(uX{Mh|5VKT6l7Y_$)?bdokWO&i>X2E$iL zdfGUy*vLLWUqaa2egU8c}po|}_MgfBRiM_mkHw(Bo&@jH0eb`-( z-TBy@KwVl+(FWMdK1&;5BaHHWF3}fnmVXcBE8M5-+`xkzuY*Uyl3t=CIk#8nq zBLf=+)T4s(Ybbvh<+o72%G}1F5%iKPPQ}tJnqVQeR*|=FLHjsJ^PHu=4^!W_2kc&< z^b0`g5XZq$@H5T!YkZF{zT!9TkH~f3#utAf$9#pn|3&iLKcERc%QYZR!Q*%Ec$9W| zn2hck+1-QixS#CrK6uFa0WYt}01mo$RQ^}WS z6AeqzJ~p67>_sb3J3_9*=xuUXi}taR<2tYw+~n{X_TK@o0JR6>5qxn!adAa-*owkV z0{%socu+@u3FdbV9(IVENMeM~hXV3>8BSRzgW>}|>Y-0RaLz^r|H(g9@EP%&c z_AAYX$4ug5Iz0N|F$Erz@yA5krI-FP7K>w047y<0L0@gB#8%X$kwkVgy=?@xZ{j^{ zy=2D_=f*JW$uRH1dd3y|_LgaJIc>5~yr^Fg_QGgsBQ=a;R7j@fxef$>^uVJV9-YKT z2XWd?zir{2W>_`honh3Xfyk_*m((&s)S!N;U1vA(S8P4&g%5&g11(bs*vIS5zRctB znrCN4ZCtR+Jt&BcG4$6?VzdJu?ey7J#*UHjXeLgZ=r6;t(m)OC7{h9;CP=jvN|Dt{ zKG_Y+GmLbPva9HC;Co=XBhstX{c-Hx6OImLWI6=9c)ec7G6XI&nf`iRY^2l9P120JxMxii;bIFCp3VAqWE3MpXo zX`bV4l56*{k7KzJ&GH8>eId-H{>){b z?0<0ekXl!N8R`}yRqo+Z>28vWK?zdkVU|*^5i0g9lp@c;Qs7x9d0s7&PoU`Y+tNs2B~l60|>s7scFuuO?Zi-}`jOpLx! zO!^Lq(sTWTexXE!ZIN)@2{D8`BnHi!5+3}$LTn1OWTytW*HGqk{s?SiQzndB0NN5!wnJ>9xbMbM2U__m#D~mF-BHM zq_JMYjqPGEa{YsGf#@PPNoe>n3DG|wA-XptH1s>sp~2`h>eJ>xWH0p zT_wx%3etmOQUN||p>1k7ub8$;i?x?Hw3wJ^Ux|(m7Grc6`ifB^Oz{$q7Gp5wN?1%8 zT1TCP@=R-OOs{BS=1H(=qXco0c%YG4DB^VqGJGq+dVa0Zv6i8HbxqVM?5pMP_FD4I z8kXJFaNxZjxV7Mu8rr4^pQNSQNEBL31X_$CK2Y@Wp%NA!A-aTE2|0usiHU&0*mjbAT5asA>QbDwx~&tPBUH{yp5Kc8g4Nl+aWU^c8>6q-rHN)gVEsCJ8``@kfjCODz%~bG3M< zHj5X})b>c7E$%7n#4Y)dxF+5$uJNykdn|V>M}zT<=KkuGeFt?u-blY3!5V)gV*^;& z42R+5I&e|j2x{O`SdIol+=QdW1aXa$f0mEJ zquEB$j)cKEJfccs@kuCLyee(Qp~6jUD}2SeB3P^{^b8;-K;TvoWaV_Q@_G^#3ND=j zmJ-xEapqb0JVgTc0R?`~kLj#0@&74KJB-cU6W}lj4wK=)QC+^#Jr2nLE~T84*2?eB zoF7cQxHnjfO}z^PhZhI{p>$vdyxIhiLE{nVH3auioLN1QL3R;|%Xavkf!E{kd7l7- z16s^E%07XegV^1L%QnqqoCXVKkQVdzDBkDtA}PQ($#4nBFaG$&nWClDiG~_P!1w_T z4l{rlz+gXuU>`vcHPM*E$1~8*NBh`9VW;8s7@Xe4xp1i9I&VHcHkCdxi?LxIW8?xj zfO&kScMiqDg{?i34VO5o6UzBsaJIv5(ql)!;EOT%q8ndy9H3kfxjoI!zo2jPAdZg1k~@8O32u#z)>$#EJk>)e13*5dz@LxUs1HH=tcs(Jrguu{sz;Fwi7(Ead%q8h9*)bOz#fn?`~uD)p|}Bjvcr+U@*u$BvD*mJcs-c+ z2;v>IhpOn7a|HjNX!vh&Jo>{e+T8Rq`Vn4 zRX1feQf4t_nSt>rZE&16I6)kOfrbAmj;C+nHjcN0v)~SJ0bB&QTka)B9)QR7SbjJx zV9_`NOh+5Rk@sJ|aZS#TvIm>%usIid6Lz9?JmLkXd5Ps+AcW#0)d-2g=l zXf9Vc-UIFf4}fdn5kNHul9m^p`2iIk@5HmAzMOl7bb%!#b{Kv0n#db(h~*v>YX61O zAe?~GM1nv#hzIE)A5?%EFdVd^4RoSvjAeyBNxozC{|y<)-^qf$)Qi9H%@@EE;2O9OC@sKp{mHNMf5pQWgo0@7Bx55B?W2giypjy5mhzh@ zzm@Vk$rHztEleb%p2j|jIr4@qL6=yA)$Qbu{p7G0&?KHBv-x=7+M7ox^JSltP>0ksd~CI@;dKDOLm=1<~4-8|6&f~boDJ0@%-QGN#H=TUwMU7->`)?lL% z8_m>f6y36$x=q09G@@ZXIr}QKf*rKVactd3{`?x=`GMTsawXLYj>~Rv_y*e_g1>;@ zgU8Uv?nfKDOkCW77IPbU{|VaUD6w&vJt;Tug|UY$au=EFPBP5xX54t$9{W{-jTU*o8{y&86o5EpyVEVrY@SoUD(u@!}#czR$8b;`g_E}4B1bt=bJ z6*aG?6`Gk&J76#lThqwZ7ZI@=@XkSc(M3M_FIaw!Qet`XQ7gv}u=gkMJ8(BpLtZ~% z_{Ohx)YA>SzSz`IF9Wus(IMiolY*TL>XnPFB6N>(tX5;M0eda5-qm+47q=Q{DxJ_~UNq4^B)gHkPSzA#K9d(bUKvn_BD{ zuoaEB&3(-Bx34ueL{tiwAs)S{YRFof2vq_qdr9u@RcwF6G! zzwOW;JQ;5N@j(zi06TfT-iv7#Uh1csNsOQ4=qFuhLLCx%;4&3{Jx*5AX3g_2(1eDeoz^pM)Y4am(r2sSQE8PbgRQfq+`2$YZOWzCW~da} zj*xuYPRX^MEZMdTB+G_tFs!)-!|Dnn&3~aXegWA30G?&}^?-rgKbm%6dfN*&nOJT_ zvY!%Oli=6s%M#Gt2Ccb>BsgA?1p5~y!S*vrut9&YM#I)I#bWbp0(~KcSOgoA z$o=DJ2YB_uZ&V0NVti8JWiN&9?vn5BCpm*O>?h&=3m!(v@Q9N%&tx%sW=e`zfh2oz z{eyRdBzU(=oc9EY@t!9puMHCA*)PUH_lVK;Wr=e6Oro5ak*FX4i{evIZKlrCf99m3F zK$(~VY9%Uw>mLHfqcO~p@PKuq_dg_IzE>p7>%TXElrZgBbcR_HqFE!_-~*xwx=VunUlfhsm!d_33GpWPPh{Cc-A`isKoQHm zVwR#D7vvL<*(}9b4mKyVwxex|Xq)tKTZu)Bi4OG^V`!j6goa3XXt)@3F{0NcNf=s; zF04pG!>UB9A0Zk&*FS{Kkif9j5}?~J{vnsdPxFHK2Yn#{=rDo)6TaO%P*Ds8LACx!SV4WC5E*-J>Yn`okaBse-q0;6>jfEMF#iW5Jy z7++H!T1SO=#S9aVm`-sw^@*ElxwuB{5f|fmagKOaTn!(KTNpcO*_(3@ZEzM_N3px7 zmhris^=%y-cs&_DU4!Ak=hbj2o*cXoEA&3|&Eso|S zu{URlotbBGm}|w_+)k1-2`u8v+ej8pBVs(pg83bOxco+}qFL8b_G#*U2)nylNIUq- ziZ(d3!eJyFc)uBL)o{s$YXUwYYY5O{l)S&3bVGlHQfoz@?OfCvx=2=t-?0O_J;95@c~U@lt2M))14poie| zCVb$K%GwtD$FQS*j^4qk8z#U3%!N9)6P2zW#z?r7z|l-Q8SzOF?dAq|L~_7nTjdLa zNs#rx1Q={9a{*)(`3edtpN;mh9)3sR^Z=aRfDat9(I2pXgj4oSWPF;+d^rscU@n+E zh148AL&s7DI3{(l{{Sw2w2>2h*d&R*;R?La69NFfXp983M+3pzKmc)VkksMS+8GqI z7S4y^brnvp!wC+>+;gAu`zJG3O=o<9>UvzZY##H~9LDLHnCYV$e4YuH82l14hP;a+ znG(@AtkDthMF+kZ2g~O*TWHGQ4-WM_O z3KIbwy2$;py?+h;asy)|VY`&$)OFCM>1uhu5I)JA69EF?;*LO0QPe{01jHQ7;&b<`O)#i zS)kD-$B2s~opb=&;}9nuc!2MG$oI)PDQ6qyET^1ll+g+5IHmBAWgkk!Vf4s;G?*il zt#I@Pj&W2t4o-qo;5KjuoCSA)JBg9I@W*{dG!NS3YCW&<#TA@(=}ta>l}Y@cy%2iP1p9d84PhI3LUf&Jw1yqs5*pjq7W6w1> z0o;cqhGdR5d996$+UW?dQt?-=a%1~9S=Flj4%RoIbP%-%!yo8?38Vn^3t$OQJG1M_ z4@aO$w6i+zB2OQOE-@LMU6>^wAvYP)p*E6t&1`h+J ze_RG?FT~H#{VfzHFQ7sHh`>%f-%e*Go`;Px^oT0TuSfS7L8j7%CebCY!{arwkXOmT z{zL}&5?SsG1nut$lBdb7euveE`Nl((`T)2D&Vo}w=@7rQulQI~M>pyfKz(%7BN{tN z*vLftC_q;!qwFf`Q&0I#WNxi!1f$8-$D(^oq6ucOA7cS}!77?&8}^RST$gFSKf>l8 zNNEj?JnneRM%2n?W=(Y)pm6BzW|~V>~>@ z!lMTs-SFsyM+ez|J8{b0Ft9h4SJPp&lwP!z$T?1{U4!YnM6_k+nc97(eDJsp`UAu2 z?JkyK{^lN#C44qV?C_`?cGVt?AnK;WRs^kXVsuELZmC3LCUwihUNQD6u~&z^X6$tl zwUdb$t^=VLs5`o-Cmk2j>x$5A3$d^8M_Y!~LF9YB!~jG4exU9HvdW!l){%MKilqo+ zX%~By+KG*kXs1o^XoN>SJnG<41CMHW3?V)SW2u6_O6e=bFfAk!@~KHKEt-vov*^j0 zD4v;&KpEIqcn16Gez13hG9866C4vpSUKYeW4zG!D8O^ewnSRnpY}BHiRx@r?vNwib z*6E=!*h|Eo8GG5-<35xy$$?=OUdo^bX+(~S;uLySG7+DIMwmn-CSqUVai)`duzNdG z+hG&yh-jvFUN1JV1Y(Igp3mF8SOU6OOQoH&lv#U9u@%q1wAM(THTQt9F-o>gtYq3I zNrr8@q}k<3s$H3+*w;vs!$?VR=#e;w=@M(dLSpQ8i^=9JBkvR7J@5nI8cOPXIf^_d zj%h!Uc^s^XC+~(&UnEOG__YMHrotzs9=4L>>>`daeUr&kj^^`L z!`zp^xP-l<*xr-Rny!$1ljEFR;t^h*_+&WkGlV5b0dYcV%ib0T(FeJSF33kh0=fPn zC{#3DrxY96e4)m*u1ZaOo(313uyQ5>kLq%=p9<%R1ZWD8AwD;vMdb zz7i~+;bAf;B1+sN62&z#LtG+@#3^#9I7YUJePplLMa&o5@J(W4I4;&<4~cEa8)B#Z zS{%?}F3|?3v2_UBJFCg{YDfpbY+g^`y^bU{vaI~o`phQXnc6pGgqYH16&42Me_Kao1=jJyeLtM;WfQ}zY-;(E!npQ6ep4nxsEXsZNxgy9okeBzklBvu)NScEfpq-y{Hn{EOGSvn3*=L=~R zoYsv4XP`B#p%Nqw?1>Tf#7Hb0&|z+;?4!f!m)PApin($$9Kfs&VzQln(FzCnRKO(@ zt}#?Cg!b`-i*2C;4djL};0*%#VVKUF7X=8KJUVzTL6+MyJper&6KD4Q3;fzy_NDf5vk=uSiVtN?yVoEHvPe~NO#H!P7X=&^#VJz4^7G6o*q z0YJwfZvtsFW{D-RJE&|sg|=>o>pA%S0nG>w4d@Tp*@w-Y%NVCuz+ok0Ex2k6F?IQ^j@4hZWEkugXeSb zqb=IW{jt4wHDkj%IIPD99H*?|qgD6-K6!l3Wrt#bliynk3d(6d|v)7`}94EaTZ4TLjDgOd>i z@l`j#lI(vE_4!)bW}Or8X3z~`0*YY*$l-M@pV1zxmoPCOVp6)sVE7T0Mt|5wpV+}^ zU;&t*nhR8dY(9$xA-whkwt&P&E&6`}-ywyufN*08e?uFwC(yhYfOT}dBm%#b*YuD5 zw8vh)wCgUM|2E~Y|DW${-2)}i2Ril=wjh5$aY$D-aP&QZ26K?IZ-Q!&zFE-E@d!8y zjse^}poyI3csp&Q9^80Qj}}6kT&^a87|((QXP&zaz2x^ym|QV}jaAr~Mfp9HJM1uR zK$&LBGlIYq=wK%)PnD^_SAV0ooaJxA{GXc0C0<_v_kjn&BQ_*ZJ|r>3#*4XV9!+Q> zGx$*z=RNfRiN*V@D6POv4MJ%ljzC=xqXkBwR=#QlUcd^p{KmB?jp!aD$x}z6Np!P9 z?nNhGRZ{v%CM=MxE*1d=Q z5ogJKpQe?*WO%dOS$7V{*+AL<82kl13$6hb*5~lWDRNBpERqA{+k40_c~T#BQO_eu zLtV+nMgcY~&m>t**0~ZM%i*yUe=H(S7od2|rA=nRXgcis@aANqbs~0e?!)NiHKQdl zX0flj(nX-UU_ZXF?7{HDUJ$l))F%=fG1y4NhPoF<7B$VsP6_oHjIE(q9fqw|qN1Bz zeG*LO;*~Yz>wAgZ^LXhG^d`$547CqK;a%)0|33^agHwQ^f;uZn+MaK#Js3XN3#PO% z>=>z240aN+W5!Arb;`q5F+F22(`gOXhvSKM>A-p$XOD#S2s}89h-t)9JyECjUew;e zYuI`c+zZrj(+`w=rV8rJ_|14>*@L0(gQ3Be9y>6Uh6>Z?a5bMM|?#OzIA$BG2&j-`R zhW2x&Mrsd+KlU`((qk)<-Vj5L6RBG&b<3n)`LsqUy`>6!jo53)7SH0qdxf-QKJAf5 zd*tHz9F*5=I4C@0gOu$?404#lvMB%cC|s=5n-{T4G}e+EfKRO5srqk3!ld9}P8^{*sNA zEM8^6ggb;{s|g#O1Cf(}q83N5jV0n@h=~|Rz!*mV7&u(V{zYt`j9^-2TG;|tg|p-c zV|f9WF@DVD9xPXBr!pJvK|z1bBSv!=zcSJ4(iuC_7{gE=u$KapOw=K+!N5B)R$PN& z#WfgKlkgbVV9-ay8O6dGRSm?10lm}!hX<(l`ADW*mKuBGSxbQxyq*)q5)3X~eBKnu z@|dsi;}6Ei-E@v#%Vhl_-Ic!}17>mNM0{=t*$A3UQa&?`v-yt)3tyGVS!hl;m% zi+FjD7Z3KtxO;6BH_v0@>VChtxx6aw&R>WJI?P?v_ck-}n8ng6mt|lcOS~M`mHa&( zUOe7Y8aQV#T=Vcrs*W|b#tOZ~Q38Fq|AG(KKllcTub)nQ{EXu5A1_}1>EaPkAnpNG z;>z_&E&*f3DPXoZ`mYxUzkadzxmO%KUlGSaUx_n1%q9GOin<@nr%x2I>?}r;D`e@* z-{Z1bU*nSod@`72dLC_-icd@ie(Mai6Q3X#@eJ}35AGS`9uy*OXfdvuSaH#q#aWvt zj@nAG*EXRuj1e0W18ergSOpy-X}=;?elLlQ&!=LG4s$1UJwe?MU|T&Za!mz4Lzcs# z1P*X%$2Sf5WH9ID!`F;YVu+KF2y5}uIf+ZChdAl{#8Iae2c1Fe!(zlPEJbYexnixS zF#RylMY28}tb#lXN9|=UUic$iJ|SuPQO;A>610&7O(czGqydc-Y@|}g1~8iU`z$}y8ur2I zB3xL{YMHTve~_~UEQztN?#Z&HmgRpv;}e+9>v6o-4wptguf#8fa7m%)2zUe}v(}Bb z7wb4T5&&;Nu*MQBu{2gJBx4~Q%a>y7z$lzIl}21n1z5aCULbLN4lW;~!~7^`pxTeE zJ|wDJs6n<=g~#gy9n(OgN@7fTp@a%nSGfI!+3Kn-T{I zq+}eN%$JkfNfahq61Ie0aJmCN&*C3AME)qJDf=+Cb~iIVwJ}C=`a)<==QzHFzRCN; z;Zsg^GC0>n`*1m&c;Y|XTwC-FXObZg^bIDPEZQT}00`bpI$I_uWl+Hk3QC_yLoOzu zcEaf#e4asz`3V1T?F04?a>~vQICR5d3>VKop>{3UF8fjhRnH@_KRPLilcn+gUh04IliF^3%XGqW5EK z$5_V532>MM2QUNL6Zl#Se1>#VUHlSHQ99bohjw$oFHD{!#`X;4E;N=WC;AaMOfr?U zMnfo`Dk_az4R#tnsP)K5TEF0*C2vm;nbc6O7@#M!1xY z!;PF5g-?Rv!tg6rBe4%B8G$dF9B43C0*AO5hA$d*bZ8SuL#LtP>TqB!1q_`7*UfM} z4ZkNT_)~ny-UI5a_MvR&a$?c49$e0Y2iU zVl*0U(&0uB`v82=jxXAinE3P1J!rs|4meY>ruFbV3BM=c28W^K{w0AI;O{4oI@6Wj?TUl0oL#rO<5c?Fsc zj_cve-D}``6rPX5@e{a_`(tnSN;vS*hPC)$HAVAx8(ga3luIbb!zT=$-f*&?i#-MW z^c%pH8?XW5V>Ucy`vW~-te90m;}S$O#<4J3%EIIzMjnLk2k<2K$FACkvUL+{$c>!F z>#lV)Bkvc16gWk!VoUFzk6?i!#3tiFb|9;Y@2zn0%UGxJOCQt4y*vl27}qZfOA=m8KyDnaN_Pn8-1Vqt{-> zBS2zDc{iY5E9?gc0PZCLWWWPQz%g(FoQB8kAuN=L&$ENkJ-S#dEMc^f zEi3ckWJ@h*9v$RrW6&igkV*EjQk_F~y_D7BdVbm7L;il6JoaH!koV9dEYG36o1@Y{ z)Lw|=;D6=FbOXQI+lFNQOU|Z=Q{onxD3;vbE*ZAclab)%Hg}q=j7CqXE zY2a7U-;p^y0*`C(;OPlm$xwrhMs$g0a`krXbfXpYVrLr7G9TG$HO;kyx}GFNt`jmJ z5I&Z@AKN+J)FN(j_zv5jf;Yj-u9Zg~px$Ane-2>RWNXGdR+Q$ztvpm^&3CG1iargBxWH z8e9kW;EN07m}kgkPKXZmAd>RqC@ls5Wl(-Dd z7FjM!IAbBcm`4lEp>=0rbq2V(7em=+q!`$P@jH(9f%D)LzUXJ)!G3Zoei@|{JvNNg zMLmxsnYyHt7w0fE6_QDoVyBWi)naQnw%V{YhF`g-;0^V3*0p5c$~%|Io&U^Ug`fG< za<^Z#7vn?hy#kc|dw}X*sw>i68EPD;mpdASAGWmEF<>JK8?o3(qCV=^^la=D5Q(MO zs>D_;9%v#eMxha?dtuBV<6OpHJ79Jj5v=xLe8Cx(Js9fv2DY97svE1Wt?V-_QD-Gf z+wg5?Gzc#=hyY3p!H$9HDvCPAlGP_+$4tGlu$50Fl%PjcVy_N;pc#8z*qTh#&L?6v zz^)%{;3_TmHo3Fq9-Qj<61E-&m%vG&hCMa(t^$j|Y_ym@;&UST_Bdi=3_LpFF^YC+ zgGVdqfW2~<)L^ff zzq;u~eMHVuKHWhytGmoR3BKSPmb(!>W{v*f!m#eiknhbL?!`365Wm0)4V`IcEIdZh zCco^#@WEan9uL8i9$Q9wX)N}Vu$P8Ct^?tnEEr`lhNUr%rc#q+S~QUsPGGc%$MbQF zu5oZsc!()hJrnpuFmcEfw;8MmWNE-MWHQ5iH(Z+GSu3`b>Wp0vEcjwCh~61OL~qH4rrC60e%@NhJqh@$l) zX(e^_kO6NhMNdbRhca4*p!S9ki6P8BN}0b$%hZb9!_iFlO!Mo&5?-rE7mkOES{gR` zvjnBB@@ca)dk0CjcEy?(_WZENJs>DOk{Y3eV2^7sthfflx==!_t3+cnLV|3@NTBr$ z39w#;s>EI>?J1^1M`AevsMznT3gTg*af*s-|&>>9%9P`D`k!Mgi4Hs{xZt-%QE*=gm z#lwD|46?mIi#{iVn3>cs1Q(6;No*cSB_7il6Vh2`nOXM1Wo$f4Vti7EPs(+0z$YrD zi}tn=ox7t1IlD=Kv$yy=2coZph>vT8c)P}mr(3FcxbqAO_X=?x)F{q_I>pI-nmD?z z6bIM6;^2IzI66EdPPQM2v-S6KA(nVdAs$&?Y|W-mfCX9f3Aps|ek=Uy@JSh5bMT2u zDY@)JbbdDC=f(XOxK7EDSE>$zQ~Ng zo*409rgZyO&SK*T_VyIAT;(e(OIhx6oLU5jJUHN&I=Gb6R=J7916*R@5(*bDv=|4C ztJr85R5Sq~l!Q1E(1=xF&*M0?oVI{V3!l}&Mcq~+k7CVmiKcxt#POgATd~#CS@fR3AJED4;eZA<5L^bF zYN!S+yf=Xb@3<#R27y#KQ$VX9dR5Mz`c#`%7e0eK;PQwQd zA^1V_ot&hu2dl`t>lmLJ;V=viU@EVBc&`mU>iNKx@X6)e1bDFRM|`NJLy9%ph7-xC zJ0CEZajzC`lmnsxK@^`0aH@KcOWZgbah@e%i8=+JN6}*5p|J1eI5hXyGd46aSK+ed zt#AN+ydDevcHSQbpK`clz{7-3w6u>0#oJ_A@unkMf-8d;F=3`X%mk~Mm{1#txHd>q zX;ig&Bc%uJV-9>b!S@7O%p(+rAELgKquAevt!?da=!8QT96%qh$8|7Hj$*DDiKL;5 zsl}D}j4fc?_f<$lK%pItM0@1Z9(levi}uK+J#yjz!JLEhvK!DDx=Gw-!FL0EkHZHJ zNyJ0K57JNhdqy)hjA5Ln$(HxhFD5WXaU75PM)K84c&N*Z;)WAa6z2^Wd)kLdj1O%H z76*bBUzFpEGWtiU4$vN@#6$^BE9MJD7%7|q*L83_2Dj_@@ICnaB!{}_6WHE52@ZX5 zm<9(ho!4Ew*T82b@H10{5nlzt#g*n|!9pVe+DRI030x;S5OJY47}jV&1lmFpi6f0x zjq`@^zSOd=^6nYI#@56~aVc@>VtLle3GVmK^^!zwte#s|=xz~3!Pa2=mz!zC7uq44pX zP5upM7Sl}bVEV6)>b1f&8vu`K-ardX1bCKZz?njzPol61yWxBVu5ZJWd=I-j*27@~ zKHxZQ9dy^=F5WMIPZE3#z;7A(H&q~s_yzduH-F#!Y>5?%5aMI0k7Yr$l(<-0!nt(7 zML1PmlQZW$3#Hen5BI^~&mCJ>H-I_39t)Z_5jH?|BlT?JAm9R6p5v?=fbYO-byT?d z`9@wV@O{~WKeiGZ+am#Sv8{&pXpc=eeZwif|2&I@Z)DF7-U5rjB+v$`K=w}hBF7Mp z9>97RWh(G`05|`}tpf?kUXBX;zyWX&^n;`DI37&m7RN%Rn1uoXe0(kwB_|$!gmXWY zZF^_~FrDvr@^(Eapo}D-ryQR{T#t45MtQ2to3D?k@^9cc#}k06Au9Z$hg{(GWpJ+x zT1N;s6igwR9m@WYiEOCGS@)givo|>5xBpq$*xK1UIyt+zxexO6^7irb4+sj@XhU^j z`tXRzsAyA6Tzo=ea!P7idPZh;PHtX7VNpqGS^40~A=N``YU>*shc%68ZfR|6?-mR3Td;7^lBLU*uUxfy&ARm)Hf`Rrb^DH;yY}qeci`Zm z!$*!DJAU%i=`**VJ$L@XotG}(b>*IW@4NcIgAZMQ_>o6{_xKY}KK;zI&;R}pe|+(! zm;dXP|9db@4o-xr=S0)|NXZ1Xl}Ra z|NmC|TUo0XcTgekO3Ztxp!X%}16A0EsJJ%}_(m1^u`2YFh<&pP{wx*!`GkM5ivJ2a zK$YqNb*c*trw@!&onVye1>JOmajG9oQXOF$Jzp?bqwy2D1*AGWCu zv6~*TUv-Io)hABSDQ;7};tsmSMb$6vRvqJhdd4-?H6Bxa<0(4FbELx$_^z)>i84T8}yR5woL6U;9g}tgNkVY;1`cdj|(cM7_6;Ysv(`w%~V z{zOn<5HUm$;TOV)I3kc#Bn^a;C6@jrm(1v zbY8i2-YpN@^1v+*-15LJ58U#=Ef3uCz%38l^1v+*-15LJ58U#=Ef3uCz%38l^1v+* z-15LJ58U#=Ef3uCz%38l^1v+*-15LJ58U#=Ef3uCz%38l^1%P89{B9lPyaKuSFZo* zKcDc?%OC#d6K;Ka%LBJO@PDZXzIx;D|B?CUXExma)qgtU)peuS{pCNM@X5*5CqMuH z+B*}lD2^-uw-G$nD;}#N8V|&H#QTbisNjJhL~cb8L~uF81R@~b;uYcz3L3dY)&msq zLR7)Kx%eU0Q^kp_K;y2Q(H?Q&xc313T6Oq4QR4FQ zUah!pq2e#vWvz5~bobaMssNRoO*x)-NmC~#eyk*sEFf7x+yW2tPlSe@6_pppuK9W_ zHz7d`cogb1v@)Zmj=P%V=v z!n;W*k{>%xf_HPMPu|a&8WjBt&&;i20jq3#D>clhU;wq(4=kF}TT9NE+-Cr|=uXNy z$MO9-YH_2B*#JWCOEUJXnLo?s2Qffi{`5-bTaADulq~RHwLnF|@hnk=KV8_n+S6kA zDiQC#IB|vjaJ~a@yT?>fu--;rac@mwcS1Xi@unp5H1IG>!-KgW)yug~yI8=bkCk#x zD})8`JEbVo5*DBo@}FNa09zs8#sJm|0S5+)-m%yXzK#|KMjFGt<=3)LX!3&?7a$e=nJrNLefGb^uK&3#aa)j!iAg#e zxE32=&F{C!=`#lqb#dMX^-t#}%UBwc?Yi(SUY_RuNE4Ob?UZ7?x7TeNNXGjesnQ5W zALOQz;!Lj-w1zf9pzS^y_*@7yW^_{SLV(*uYuHWzt7+ge0W78gG*U?TF#x=YmpUzA z0FUjAhW2dVOaZa_LGCVF$(rLV**<%S()}NuLEG{5$}#GViFD#j;(qA3y_- z1Mu91D%sQb=3>>+JoPuqfyh;kP83Rdo`ZGZeHn#`ZuVD&Vw{hJ8ftow#6}1pQxc#Z zB|j3NpUrqTNnx5xRgv3LDCg)wOq~G%SWy~~F9BH4z$wC(W;B4Tges;ofU6w^#AMc( zR5;ojU&l~^E*^}D4BxzVS)iALDKi`-IEbl0AeXZL9}CnxdGM57Q|XEDC38%Nch%;M z=?5!9rx%;*DF@d2=P4D{D~0_-lXwbn5wtg)T?c_>q@0@%fsFvLg}`|LOn{^RpNK~{ z-Ws_Qs+dfw=w1wgFG=9zs}SIKvK1sPDjt-H=YWLiB-BtbR4B$j6aoP>hXiO_^V{kQ zQZ%LEd$Nx2C;*Xv4#pGXm7$;pE{xWIWO@Q(0|P+C{nW%F7JyJueqQ>~gL|VxBN&bp z`w*$LAuMozUy$wSp1L}n{@S@)KgJ(3jQ<@zRWhYc(N!grX9mMK2@yNQNPFnJ`;!Xr zFPnfmPC@`W5Kn~9L>{K1pp$@#AW#MR3IJXZz&leD$q52ym7}zJjPegEh7bz4DNqBN zMFNHt@GS|9q=3xs1TlyLkRt)~r-5XC8d;U}86F1S!(5#{c}=T8_G&rTmg!-53J9vR zmTNrTMc-ihycN6CSfgS_ol;Q)S)k--qX<7c>C9ig! zGEA>?>!ylk)%=rtBw-EjZZ4e!qKNj$e>p$`kLp#cx$Pu?!1x0Hpa%f(Z&{EDHroB9 z7%#*C8m@ss4rd2|8Y7^>3;@Uj023gv6#%0k@XxL!K{*iRrX-*bHGlyK*Np|X8`E=EWOdDPJZ9SWl9bP%RV1YS&b|yVr_u8tD~Le zG&6InIYCL}wI$3L44%!P!Q54~C1pmB)PU5jt)6HPfaS^~m7gEgkVJ}7p!x(sj*?GR zlKM;usR2=*)C~XxLJb!HK=qUv0LZRi%A!(~3V*L~b%K0`;eu>!B_ycfAg(fme<(E99b)3TJTjd%$Uz6f_MeiKF-7o{rSE@i3%q_% zkbCC%;r;P@V)h<9eg%#WVgei{4(-)R>7sDW`s9+=E1VZpG(6B~R|=b>Jimt2aE1Z` zNZ`pZNO+I{a)E#&0O0U}gE5R`&`(k^j6yhuF$7iA%?2ki0sv?o00uzdSbvh}1%V1M zV$^{EiV-e}!9T4=0X4$-1g?$(P?1UfS!j11Y5C=58bH^k7}>Z7Zir30!CsTgI7BM? zur2Ve_SMVk>bLYoR;I^pTJGoJJk#33)MS#0wRr4PXuxR{^7g5%L9~f@^_ZPA2SwV8FQ& z_zxU4r5z52LDZVKQpFoRsi z;P9a{SIek?8WWWY|Lhj1ymcYtMACu1F_Do`F^5jy0=>-R#V+QCLq6@U)2?Mxyq>&q zn?BRQxObeY!74M`NSi{>9skTJ38cVV6Rsxvf$D>xk3y_P<YK6;@H{HbYDDv>Pk>nu?BbMe=5_mbCylp4|E&U}RUI!9n73d<>&jFx{>Vp7) zLSV!h0x4s_NxTQd9asnTl#djSa$58m4-_EuXg==7acbS$aOR?jv?2;X+0(|DIr^*$ zPrg)63*{v@Wu+S5lMy*gDwZtpTP*PJl54pcX~&WeB_t*!pUeT>2SP7`-0Vz^$BrI0 zsQ;(>eFlt}=KBv&&ELU&#c6{OW<)bc#F8q`j)1@p5_s$c0T@0E7EVs`FmQv=KDFPMQFd!m1;B4bQ92k9{ANSXze)I%zIPvwPz4Or>E); zFtBh9+Y?!qEZG!WieNF9;1? zMwb8$#eV8#dAbwkeVYj62<=q7)!VekSjUz7^8ZcA(PhsTo%voBY>nsFUA`H_xw8uI*9&O{?t0o2-P%-&UlZ|v #include #include -#include "./bmp.h" #include "./tjutil.h" #include "./turbojpeg.h" -#define _throw(op, err) { \ - printf("ERROR in line %d while %s:\n%s\n", __LINE__, op, err); \ - retval=-1; goto bailout;} -#define _throwunix(m) _throw(m, strerror(errno)) -#define _throwtj(m) _throw(m, tjGetErrorStr()) -#define _throwbmp(m) _throw(m, bmpgeterr()) +#define _throw(op, err) { \ + printf("ERROR in line %d while %s:\n%s\n", __LINE__, op, err); \ + retval = -1; goto bailout; \ +} +#define _throwunix(m) _throw(m, strerror(errno)) -int flags=TJFLAG_NOREALLOC, componly=0, decomponly=0, doyuv=0, quiet=0, - dotile=0, pf=TJPF_BGR, yuvpad=1, dowrite=1; -char *ext="ppm"; -const char *pixFormatStr[TJ_NUMPF]= -{ - "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "GRAY", "", "", "", "", "CMYK" +char tjErrorStr[JMSG_LENGTH_MAX] = "\0", tjErrorMsg[JMSG_LENGTH_MAX] = "\0"; +int tjErrorLine = -1, tjErrorCode = -1; + +#define _throwtjg(m) { \ + printf("ERROR in line %d while %s:\n%s\n", __LINE__, m, \ + tjGetErrorStr2(NULL)); \ + retval = -1; goto bailout; \ +} + +#define _throwtj(m) { \ + int _tjErrorCode = tjGetErrorCode(handle); \ + char *_tjErrorStr = tjGetErrorStr2(handle); \ + \ + if (!(flags & TJFLAG_STOPONWARNING) && _tjErrorCode == TJERR_WARNING) { \ + if (strncmp(tjErrorStr, _tjErrorStr, JMSG_LENGTH_MAX) || \ + strncmp(tjErrorMsg, m, JMSG_LENGTH_MAX) || \ + tjErrorCode != _tjErrorCode || tjErrorLine != __LINE__) { \ + strncpy(tjErrorStr, _tjErrorStr, JMSG_LENGTH_MAX - 1); \ + strncpy(tjErrorMsg, m, JMSG_LENGTH_MAX - 1); \ + tjErrorCode = _tjErrorCode; \ + tjErrorLine = __LINE__; \ + printf("WARNING in line %d while %s:\n%s\n", __LINE__, m, _tjErrorStr); \ + } \ + } else { \ + printf("%s in line %d while %s:\n%s\n", \ + _tjErrorCode == TJERR_WARNING ? "WARNING" : "ERROR", __LINE__, m, \ + _tjErrorStr); \ + retval = -1; goto bailout; \ + } \ +} + +int flags = TJFLAG_NOREALLOC, compOnly = 0, decompOnly = 0, doYUV = 0, + quiet = 0, doTile = 0, pf = TJPF_BGR, yuvPad = 1, doWrite = 1; +char *ext = "ppm"; +const char *pixFormatStr[TJ_NUMPF] = { + "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "GRAY", "", "", "", "", "CMYK" }; -const char *subNameLong[TJ_NUMSAMP]= -{ - "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1" +const char *subNameLong[TJ_NUMSAMP] = { + "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1" }; -const char *csName[TJ_NUMCS]= -{ - "RGB", "YCbCr", "GRAY", "CMYK", "YCCK" +const char *csName[TJ_NUMCS] = { + "RGB", "YCbCr", "GRAY", "CMYK", "YCCK" }; -const char *subName[TJ_NUMSAMP]={"444", "422", "420", "GRAY", "440", "411"}; -tjscalingfactor *scalingfactors=NULL, sf={1, 1}; int nsf=0; -int xformop=TJXOP_NONE, xformopt=0; -int (*customFilter)(short *, tjregion, tjregion, int, int, tjtransform *); -double benchtime=5.0, warmup=1.0; +const char *subName[TJ_NUMSAMP] = { + "444", "422", "420", "GRAY", "440", "411" +}; +tjscalingfactor *scalingFactors = NULL, sf = { 1, 1 }; +int nsf = 0, xformOp = TJXOP_NONE, xformOpt = 0; +int (*customFilter) (short *, tjregion, tjregion, int, int, tjtransform *); +double benchTime = 5.0, warmup = 1.0; char *formatName(int subsamp, int cs, char *buf) { - if(cs==TJCS_YCbCr) return (char *)subNameLong[subsamp]; - else if(cs==TJCS_YCCK) - { - snprintf(buf, 80, "%s %s", csName[cs], subNameLong[subsamp]); - return buf; - } - else return (char *)csName[cs]; + if (cs == TJCS_YCbCr) + return (char *)subNameLong[subsamp]; + else if (cs == TJCS_YCCK || cs == TJCS_CMYK) { + snprintf(buf, 80, "%s %s", csName[cs], subNameLong[subsamp]); + return buf; + } else + return (char *)csName[cs]; } char *sigfig(double val, int figs, char *buf, int len) { - char format[80]; - int digitsafterdecimal=figs-(int)ceil(log10(fabs(val))); - if(digitsafterdecimal<1) snprintf(format, 80, "%%.0f"); - else snprintf(format, 80, "%%.%df", digitsafterdecimal); - snprintf(buf, len, format, val); - return buf; + char format[80]; + int digitsAfterDecimal = figs - (int)ceil(log10(fabs(val))); + + if (digitsAfterDecimal < 1) + snprintf(format, 80, "%%.0f"); + else + snprintf(format, 80, "%%.%df", digitsAfterDecimal); + snprintf(buf, len, format, val); + return buf; } /* Custom DCT filter which produces a negative of the image */ int dummyDCTFilter(short *coeffs, tjregion arrayRegion, tjregion planeRegion, - int componentIndex, int transformIndex, tjtransform *transform) + int componentIndex, int transformIndex, + tjtransform *transform) { - int i; - for(i=0; i0) - { - snprintf(qualstr, 6, "_Q%d", jpegqual); - qualstr[5]=0; - } - - if((handle=tjInitDecompress())==NULL) - _throwtj("executing tjInitDecompress()"); - - if(dstbuf==NULL) - { - if((dstbuf=(unsigned char *)malloc(pitch*scaledh))==NULL) - _throwunix("allocating destination buffer"); - dstbufalloc=1; - } - /* Set the destination buffer to gray so we know whether the decompressor - attempted to write to it */ - memset(dstbuf, 127, pitch*scaledh); - - if(doyuv) - { - int width=dotile? tilew:scaledw; - int height=dotile? tileh:scaledh; - int yuvsize=tjBufSizeYUV2(width, yuvpad, height, subsamp); - if((yuvbuf=(unsigned char *)malloc(yuvsize))==NULL) - _throwunix("allocating YUV buffer"); - memset(yuvbuf, 127, yuvsize); - } - - /* Benchmark */ - iter=-1; - elapsed=elapsedDecode=0.; - while(1) - { - int tile=0; - double start=gettime(); - for(row=0, dstptr=dstbuf; row=0) elapsedDecode+=gettime()-startDecode; - } - else - if(tjDecompress2(handle, jpegbuf[tile], jpegsize[tile], dstptr2, - width, pitch, height, pf, flags)==-1) - _throwtj("executing tjDecompress2()"); - } - } - elapsed+=gettime()-start; - if(iter>=0) - { - iter++; - if(elapsed>=benchtime) break; - } - else if(elapsed>=warmup) - { - iter=0; - elapsed=elapsedDecode=0.; - } - } - if(doyuv) elapsed-=elapsedDecode; - - if(tjDestroy(handle)==-1) _throwtj("executing tjDestroy()"); - handle=NULL; - - if(quiet) - { - printf("%-6s%s", - sigfig((double)(w*h)/1000000.*(double)iter/elapsed, 4, tempstr, 1024), - quiet==2? "\n":" "); - if(doyuv) - printf("%s\n", - sigfig((double)(w*h)/1000000.*(double)iter/elapsedDecode, 4, tempstr, - 1024)); - else if(quiet!=2) printf("\n"); - } - else - { - printf("%s --> Frame rate: %f fps\n", - doyuv? "Decomp to YUV":"Decompress ", (double)iter/elapsed); - printf(" Throughput: %f Megapixels/sec\n", - (double)(w*h)/1000000.*(double)iter/elapsed); - if(doyuv) - { - printf("YUV Decode --> Frame rate: %f fps\n", - (double)iter/elapsedDecode); - printf(" Throughput: %f Megapixels/sec\n", - (double)(w*h)/1000000.*(double)iter/elapsedDecode); - } - } - - if (!dowrite) goto bailout; - - if(sf.num!=1 || sf.denom!=1) - snprintf(sizestr, 20, "%d_%d", sf.num, sf.denom); - else if(tilew!=w || tileh!=h) - snprintf(sizestr, 20, "%dx%d", tilew, tileh); - else snprintf(sizestr, 20, "full"); - if(decomponly) - snprintf(tempstr, 1024, "%s_%s.%s", filename, sizestr, ext); - else - snprintf(tempstr, 1024, "%s_%s%s_%s.%s", filename, subName[subsamp], - qualstr, sizestr, ext); - - if(savebmp(tempstr, dstbuf, scaledw, scaledh, pf, - (flags&TJFLAG_BOTTOMUP)!=0)==-1) - _throwbmp("saving bitmap"); - ptr=strrchr(tempstr, '.'); - snprintf(ptr, 1024-(ptr-tempstr), "-err.%s", ext); - if(srcbuf && sf.num==1 && sf.denom==1) - { - if(!quiet) printf("Compression error written to %s.\n", tempstr); - if(subsamp==TJ_GRAYSCALE) - { - int index, index2; - for(row=0, index=0; row255) y=255; - if(y<0) y=0; - dstbuf[rindex]=abs(dstbuf[rindex]-y); - dstbuf[gindex]=abs(dstbuf[gindex]-y); - dstbuf[bindex]=abs(dstbuf[bindex]-y); - } - } - } - else - { - for(row=0; row 0) { + snprintf(qualStr, 6, "_Q%d", jpegQual); + qualStr[5] = 0; + } + + if ((handle = tjInitDecompress()) == NULL) + _throwtj("executing tjInitDecompress()"); + + if (dstBuf == NULL) { + if ((dstBuf = (unsigned char *)malloc(pitch * scaledh)) == NULL) + _throwunix("allocating destination buffer"); + dstBufAlloc = 1; + } + /* Set the destination buffer to gray so we know whether the decompressor + attempted to write to it */ + memset(dstBuf, 127, pitch * scaledh); + + if (doYUV) { + int width = doTile ? tilew : scaledw; + int height = doTile ? tileh : scaledh; + int yuvSize = tjBufSizeYUV2(width, yuvPad, height, subsamp); + + if ((yuvBuf = (unsigned char *)malloc(yuvSize)) == NULL) + _throwunix("allocating YUV buffer"); + memset(yuvBuf, 127, yuvSize); + } + + /* Benchmark */ + iter = -1; + elapsed = elapsedDecode = 0.; + while (1) { + int tile = 0; + double start = getTime(); + + for (row = 0, dstPtr = dstBuf; row < ntilesh; + row++, dstPtr += pitch * tileh) { + for (col = 0, dstPtr2 = dstPtr; col < ntilesw; + col++, tile++, dstPtr2 += ps * tilew) { + int width = doTile ? min(tilew, w - col * tilew) : scaledw; + int height = doTile ? min(tileh, h - row * tileh) : scaledh; + + if (doYUV) { + double startDecode; + + if (tjDecompressToYUV2(handle, jpegBuf[tile], jpegSize[tile], yuvBuf, + width, yuvPad, height, flags) == -1) + _throwtj("executing tjDecompressToYUV2()"); + startDecode = getTime(); + if (tjDecodeYUV(handle, yuvBuf, yuvPad, subsamp, dstPtr2, width, + pitch, height, pf, flags) == -1) + _throwtj("executing tjDecodeYUV()"); + if (iter >= 0) elapsedDecode += getTime() - startDecode; + } else if (tjDecompress2(handle, jpegBuf[tile], jpegSize[tile], + dstPtr2, width, pitch, height, pf, + flags) == -1) + _throwtj("executing tjDecompress2()"); + } + } + elapsed += getTime() - start; + if (iter >= 0) { + iter++; + if (elapsed >= benchTime) break; + } else if (elapsed >= warmup) { + iter = 0; + elapsed = elapsedDecode = 0.; + } + } + if (doYUV) elapsed -= elapsedDecode; + + if (tjDestroy(handle) == -1) _throwtj("executing tjDestroy()"); + handle = NULL; + + if (quiet) { + printf("%-6s%s", + sigfig((double)(w * h) / 1000000. * (double)iter / elapsed, 4, + tempStr, 1024), + quiet == 2 ? "\n" : " "); + if (doYUV) + printf("%s\n", + sigfig((double)(w * h) / 1000000. * (double)iter / elapsedDecode, + 4, tempStr, 1024)); + else if (quiet != 2) printf("\n"); + } else { + printf("%s --> Frame rate: %f fps\n", + doYUV ? "Decomp to YUV" : "Decompress ", (double)iter / elapsed); + printf(" Throughput: %f Megapixels/sec\n", + (double)(w * h) / 1000000. * (double)iter / elapsed); + if (doYUV) { + printf("YUV Decode --> Frame rate: %f fps\n", + (double)iter / elapsedDecode); + printf(" Throughput: %f Megapixels/sec\n", + (double)(w * h) / 1000000. * (double)iter / elapsedDecode); + } + } + + if (!doWrite) goto bailout; + + if (sf.num != 1 || sf.denom != 1) + snprintf(sizeStr, 20, "%d_%d", sf.num, sf.denom); + else if (tilew != w || tileh != h) + snprintf(sizeStr, 20, "%dx%d", tilew, tileh); + else snprintf(sizeStr, 20, "full"); + if (decompOnly) + snprintf(tempStr, 1024, "%s_%s.%s", fileName, sizeStr, ext); + else + snprintf(tempStr, 1024, "%s_%s%s_%s.%s", fileName, subName[subsamp], + qualStr, sizeStr, ext); + + if (tjSaveImage(tempStr, dstBuf, scaledw, 0, scaledh, pf, flags) == -1) + _throwtjg("saving bitmap"); + ptr = strrchr(tempStr, '.'); + snprintf(ptr, 1024 - (ptr - tempStr), "-err.%s", ext); + if (srcBuf && sf.num == 1 && sf.denom == 1) { + if (!quiet) printf("Compression error written to %s.\n", tempStr); + if (subsamp == TJ_GRAYSCALE) { + int index, index2; + + for (row = 0, index = 0; row < h; row++, index += pitch) { + for (col = 0, index2 = index; col < w; col++, index2 += ps) { + int rindex = index2 + tjRedOffset[pf]; + int gindex = index2 + tjGreenOffset[pf]; + int bindex = index2 + tjBlueOffset[pf]; + int y = (int)((double)srcBuf[rindex] * 0.299 + + (double)srcBuf[gindex] * 0.587 + + (double)srcBuf[bindex] * 0.114 + 0.5); + + if (y > 255) y = 255; + if (y < 0) y = 0; + dstBuf[rindex] = abs(dstBuf[rindex] - y); + dstBuf[gindex] = abs(dstBuf[gindex] - y); + dstBuf[bindex] = abs(dstBuf[bindex] - y); + } + } + } else { + for (row = 0; row < h; row++) + for (col = 0; col < w * ps; col++) + dstBuf[pitch * row + col] = + abs(dstBuf[pitch * row + col] - srcBuf[pitch * row + col]); + } + if (tjSaveImage(tempStr, dstBuf, w, 0, h, pf, flags) == -1) + _throwtjg("saving bitmap"); + } + +bailout: + if (file) fclose(file); + if (handle) tjDestroy(handle); + if (dstBuf && dstBufAlloc) free(dstBuf); + if (yuvBuf) free(yuvBuf); + return retval; } -int fullTest(unsigned char *srcbuf, int w, int h, int subsamp, int jpegqual, - char *filename) +int fullTest(unsigned char *srcBuf, int w, int h, int subsamp, int jpegQual, + char *fileName) { - char tempstr[1024], tempstr2[80]; - FILE *file=NULL; tjhandle handle=NULL; - unsigned char **jpegbuf=NULL, *yuvbuf=NULL, *tmpbuf=NULL, *srcptr, *srcptr2; - double start, elapsed, elapsedEncode; - int totaljpegsize=0, row, col, i, tilew=w, tileh=h, retval=0; - int iter, yuvsize=0; - unsigned long *jpegsize=NULL; - int ps=tjPixelSize[pf]; - int ntilesw=1, ntilesh=1, pitch=w*ps; - const char *pfStr=pixFormatStr[pf]; - - if((tmpbuf=(unsigned char *)malloc(pitch*h)) == NULL) - _throwunix("allocating temporary image buffer"); - - if(!quiet) - printf(">>>>> %s (%s) <--> JPEG %s Q%d <<<<<\n", pfStr, - (flags&TJFLAG_BOTTOMUP)? "Bottom-up":"Top-down", subNameLong[subsamp], - jpegqual); - - for(tilew=dotile? 8:w, tileh=dotile? 8:h; ; tilew*=2, tileh*=2) - { - if(tilew>w) tilew=w; - if(tileh>h) tileh=h; - ntilesw=(w+tilew-1)/tilew; ntilesh=(h+tileh-1)/tileh; - - if((jpegbuf=(unsigned char **)malloc(sizeof(unsigned char *) - *ntilesw*ntilesh))==NULL) - _throwunix("allocating JPEG tile array"); - memset(jpegbuf, 0, sizeof(unsigned char *)*ntilesw*ntilesh); - if((jpegsize=(unsigned long *)malloc(sizeof(unsigned long) - *ntilesw*ntilesh))==NULL) - _throwunix("allocating JPEG size array"); - memset(jpegsize, 0, sizeof(unsigned long)*ntilesw*ntilesh); - - if((flags&TJFLAG_NOREALLOC)!=0) - for(i=0; i=0) elapsedEncode+=gettime()-startEncode; - if(tjCompressFromYUV(handle, yuvbuf, width, yuvpad, height, - subsamp, &jpegbuf[tile], &jpegsize[tile], jpegqual, flags)==-1) - _throwtj("executing tjCompressFromYUV()"); - } - else - { - if(tjCompress2(handle, srcptr2, width, pitch, height, pf, - &jpegbuf[tile], &jpegsize[tile], subsamp, jpegqual, flags)==-1) - _throwtj("executing tjCompress2()"); - } - totaljpegsize+=jpegsize[tile]; - } - } - elapsed+=gettime()-start; - if(iter>=0) - { - iter++; - if(elapsed>=benchtime) break; - } - else if(elapsed>=warmup) - { - iter=0; - elapsed=elapsedEncode=0.; - } - } - if(doyuv) elapsed-=elapsedEncode; - - if(tjDestroy(handle)==-1) _throwtj("executing tjDestroy()"); - handle=NULL; - - if(quiet==1) printf("%-5d %-5d ", tilew, tileh); - if(quiet) - { - if(doyuv) - printf("%-6s%s", - sigfig((double)(w*h)/1000000.*(double)iter/elapsedEncode, 4, tempstr, - 1024), quiet==2? "\n":" "); - printf("%-6s%s", - sigfig((double)(w*h)/1000000.*(double)iter/elapsed, 4, tempstr, 1024), - quiet==2? "\n":" "); - printf("%-6s%s", - sigfig((double)(w*h*ps)/(double)totaljpegsize, 4, tempstr2, 80), - quiet==2? "\n":" "); - } - else - { - printf("\n%s size: %d x %d\n", dotile? "Tile":"Image", tilew, - tileh); - if(doyuv) - { - printf("Encode YUV --> Frame rate: %f fps\n", - (double)iter/elapsedEncode); - printf(" Output image size: %d bytes\n", yuvsize); - printf(" Compression ratio: %f:1\n", - (double)(w*h*ps)/(double)yuvsize); - printf(" Throughput: %f Megapixels/sec\n", - (double)(w*h)/1000000.*(double)iter/elapsedEncode); - printf(" Output bit stream: %f Megabits/sec\n", - (double)yuvsize*8./1000000.*(double)iter/elapsedEncode); - } - printf("%s --> Frame rate: %f fps\n", - doyuv? "Comp from YUV":"Compress ", (double)iter/elapsed); - printf(" Output image size: %d bytes\n", - totaljpegsize); - printf(" Compression ratio: %f:1\n", - (double)(w*h*ps)/(double)totaljpegsize); - printf(" Throughput: %f Megapixels/sec\n", - (double)(w*h)/1000000.*(double)iter/elapsed); - printf(" Output bit stream: %f Megabits/sec\n", - (double)totaljpegsize*8./1000000.*(double)iter/elapsed); - } - if(tilew==w && tileh==h && dowrite) - { - snprintf(tempstr, 1024, "%s_%s_Q%d.jpg", filename, subName[subsamp], - jpegqual); - if((file=fopen(tempstr, "wb"))==NULL) - _throwunix("opening reference image"); - if(fwrite(jpegbuf[0], jpegsize[0], 1, file)!=1) - _throwunix("writing reference image"); - fclose(file); file=NULL; - if(!quiet) printf("Reference image written to %s\n", tempstr); - } - - /* Decompression test */ - if(!componly) - { - if(decomp(srcbuf, jpegbuf, jpegsize, tmpbuf, w, h, subsamp, jpegqual, - filename, tilew, tileh)==-1) - goto bailout; - } - - for(i=0; i>>>> %s (%s) <--> JPEG %s Q%d <<<<<\n", pfStr, + (flags & TJFLAG_BOTTOMUP) ? "Bottom-up" : "Top-down", + subNameLong[subsamp], jpegQual); + + for (tilew = doTile ? 8 : w, tileh = doTile ? 8 : h; ; + tilew *= 2, tileh *= 2) { + if (tilew > w) tilew = w; + if (tileh > h) tileh = h; + ntilesw = (w + tilew - 1) / tilew; + ntilesh = (h + tileh - 1) / tileh; + + if ((jpegBuf = (unsigned char **)malloc(sizeof(unsigned char *) * + ntilesw * ntilesh)) == NULL) + _throwunix("allocating JPEG tile array"); + memset(jpegBuf, 0, sizeof(unsigned char *) * ntilesw * ntilesh); + if ((jpegSize = (unsigned long *)malloc(sizeof(unsigned long) * + ntilesw * ntilesh)) == NULL) + _throwunix("allocating JPEG size array"); + memset(jpegSize, 0, sizeof(unsigned long) * ntilesw * ntilesh); + + if ((flags & TJFLAG_NOREALLOC) != 0) + for (i = 0; i < ntilesw * ntilesh; i++) { + if ((jpegBuf[i] = (unsigned char *) + tjAlloc(tjBufSize(tilew, tileh, subsamp))) == NULL) + _throwunix("allocating JPEG tiles"); + } + + /* Compression test */ + if (quiet == 1) + printf("%-4s (%s) %-5s %-3d ", pfStr, + (flags & TJFLAG_BOTTOMUP) ? "BU" : "TD", subNameLong[subsamp], + jpegQual); + for (i = 0; i < h; i++) + memcpy(&tmpBuf[pitch * i], &srcBuf[w * ps * i], w * ps); + if ((handle = tjInitCompress()) == NULL) + _throwtj("executing tjInitCompress()"); + + if (doYUV) { + yuvSize = tjBufSizeYUV2(tilew, yuvPad, tileh, subsamp); + if ((yuvBuf = (unsigned char *)malloc(yuvSize)) == NULL) + _throwunix("allocating YUV buffer"); + memset(yuvBuf, 127, yuvSize); + } + + /* Benchmark */ + iter = -1; + elapsed = elapsedEncode = 0.; + while (1) { + int tile = 0; + + totalJpegSize = 0; + start = getTime(); + for (row = 0, srcPtr = srcBuf; row < ntilesh; + row++, srcPtr += pitch * tileh) { + for (col = 0, srcPtr2 = srcPtr; col < ntilesw; + col++, tile++, srcPtr2 += ps * tilew) { + int width = min(tilew, w - col * tilew); + int height = min(tileh, h - row * tileh); + + if (doYUV) { + double startEncode = getTime(); + + if (tjEncodeYUV3(handle, srcPtr2, width, pitch, height, pf, yuvBuf, + yuvPad, subsamp, flags) == -1) + _throwtj("executing tjEncodeYUV3()"); + if (iter >= 0) elapsedEncode += getTime() - startEncode; + if (tjCompressFromYUV(handle, yuvBuf, width, yuvPad, height, + subsamp, &jpegBuf[tile], &jpegSize[tile], + jpegQual, flags) == -1) + _throwtj("executing tjCompressFromYUV()"); + } else { + if (tjCompress2(handle, srcPtr2, width, pitch, height, pf, + &jpegBuf[tile], &jpegSize[tile], subsamp, jpegQual, + flags) == -1) + _throwtj("executing tjCompress2()"); + } + totalJpegSize += jpegSize[tile]; + } + } + elapsed += getTime() - start; + if (iter >= 0) { + iter++; + if (elapsed >= benchTime) break; + } else if (elapsed >= warmup) { + iter = 0; + elapsed = elapsedEncode = 0.; + } + } + if (doYUV) elapsed -= elapsedEncode; + + if (tjDestroy(handle) == -1) _throwtj("executing tjDestroy()"); + handle = NULL; + + if (quiet == 1) printf("%-5d %-5d ", tilew, tileh); + if (quiet) { + if (doYUV) + printf("%-6s%s", + sigfig((double)(w * h) / 1000000. * + (double)iter / elapsedEncode, 4, tempStr, 1024), + quiet == 2 ? "\n" : " "); + printf("%-6s%s", + sigfig((double)(w * h) / 1000000. * (double)iter / elapsed, 4, + tempStr, 1024), + quiet == 2 ? "\n" : " "); + printf("%-6s%s", + sigfig((double)(w * h * ps) / (double)totalJpegSize, 4, tempStr2, + 80), + quiet == 2 ? "\n" : " "); + } else { + printf("\n%s size: %d x %d\n", doTile ? "Tile" : "Image", tilew, tileh); + if (doYUV) { + printf("Encode YUV --> Frame rate: %f fps\n", + (double)iter / elapsedEncode); + printf(" Output image size: %d bytes\n", yuvSize); + printf(" Compression ratio: %f:1\n", + (double)(w * h * ps) / (double)yuvSize); + printf(" Throughput: %f Megapixels/sec\n", + (double)(w * h) / 1000000. * (double)iter / elapsedEncode); + printf(" Output bit stream: %f Megabits/sec\n", + (double)yuvSize * 8. / 1000000. * (double)iter / elapsedEncode); + } + printf("%s --> Frame rate: %f fps\n", + doYUV ? "Comp from YUV" : "Compress ", + (double)iter / elapsed); + printf(" Output image size: %d bytes\n", + totalJpegSize); + printf(" Compression ratio: %f:1\n", + (double)(w * h * ps) / (double)totalJpegSize); + printf(" Throughput: %f Megapixels/sec\n", + (double)(w * h) / 1000000. * (double)iter / elapsed); + printf(" Output bit stream: %f Megabits/sec\n", + (double)totalJpegSize * 8. / 1000000. * (double)iter / elapsed); + } + if (tilew == w && tileh == h && doWrite) { + snprintf(tempStr, 1024, "%s_%s_Q%d.jpg", fileName, subName[subsamp], + jpegQual); + if ((file = fopen(tempStr, "wb")) == NULL) + _throwunix("opening reference image"); + if (fwrite(jpegBuf[0], jpegSize[0], 1, file) != 1) + _throwunix("writing reference image"); + fclose(file); file = NULL; + if (!quiet) printf("Reference image written to %s\n", tempStr); + } + + /* Decompression test */ + if (!compOnly) { + if (decomp(srcBuf, jpegBuf, jpegSize, tmpBuf, w, h, subsamp, jpegQual, + fileName, tilew, tileh) == -1) + goto bailout; + } + + for (i = 0; i < ntilesw * ntilesh; i++) { + if (jpegBuf[i]) tjFree(jpegBuf[i]); + jpegBuf[i] = NULL; + } + free(jpegBuf); jpegBuf = NULL; + free(jpegSize); jpegSize = NULL; + if (doYUV) { + free(yuvBuf); yuvBuf = NULL; + } + + if (tilew == w && tileh == h) break; + } + +bailout: + if (file) { fclose(file); file = NULL; } + if (jpegBuf) { + for (i = 0; i < ntilesw * ntilesh; i++) { + if (jpegBuf[i]) tjFree(jpegBuf[i]); + jpegBuf[i] = NULL; + } + free(jpegBuf); jpegBuf = NULL; + } + if (yuvBuf) { free(yuvBuf); yuvBuf = NULL; } + if (jpegSize) { free(jpegSize); jpegSize = NULL; } + if (tmpBuf) { free(tmpBuf); tmpBuf = NULL; } + if (handle) { tjDestroy(handle); handle = NULL; } + return retval; } -int decompTest(char *filename) +int decompTest(char *fileName) { - FILE *file=NULL; tjhandle handle=NULL; - unsigned char **jpegbuf=NULL, *srcbuf=NULL; - unsigned long *jpegsize=NULL, srcsize, totaljpegsize; - tjtransform *t=NULL; - int w=0, h=0, subsamp=-1, cs=-1, _w, _h, _tilew, _tileh, - _ntilesw, _ntilesh, _subsamp; - char *temp=NULL, tempstr[80], tempstr2[80]; - int row, col, i, iter, tilew, tileh, ntilesw=1, ntilesh=1, retval=0; - double start, elapsed; - int ps=tjPixelSize[pf], tile, decompsrc=0; - - if((file=fopen(filename, "rb"))==NULL) - _throwunix("opening file"); - if(fseek(file, 0, SEEK_END)<0 || (srcsize=ftell(file))==(unsigned long)-1) - _throwunix("determining file size"); - if((srcbuf=(unsigned char *)malloc(srcsize))==NULL) - _throwunix("allocating memory"); - if(fseek(file, 0, SEEK_SET)<0) - _throwunix("setting file position"); - if(fread(srcbuf, srcsize, 1, file)<1) - _throwunix("reading JPEG data"); - fclose(file); file=NULL; - - temp=strrchr(filename, '.'); - if(temp!=NULL) *temp='\0'; - - if((handle=tjInitTransform())==NULL) - _throwtj("executing tjInitTransform()"); - if(tjDecompressHeader3(handle, srcbuf, srcsize, &w, &h, &subsamp, &cs)==-1) - _throwtj("executing tjDecompressHeader3()"); - if(cs==TJCS_YCCK || cs==TJCS_CMYK) - { - pf=TJPF_CMYK; ps=tjPixelSize[pf]; - } - - if(quiet==1) - { - printf("All performance values in Mpixels/sec\n\n"); - printf("Bitmap JPEG JPEG %s %s Xform Comp Decomp ", - dotile? "Tile ":"Image", dotile? "Tile ":"Image"); - if(doyuv) printf("Decode"); - printf("\n"); - printf("Format CS Subsamp Width Height Perf Ratio Perf "); - if(doyuv) printf("Perf"); - printf("\n\n"); - } - else if(!quiet) - printf(">>>>> JPEG %s --> %s (%s) <<<<<\n", - formatName(subsamp, cs, tempstr), pixFormatStr[pf], - (flags&TJFLAG_BOTTOMUP)? "Bottom-up":"Top-down"); - - for(tilew=dotile? 16:w, tileh=dotile? 16:h; ; tilew*=2, tileh*=2) - { - if(tilew>w) tilew=w; - if(tileh>h) tileh=h; - ntilesw=(w+tilew-1)/tilew; ntilesh=(h+tileh-1)/tileh; - - if((jpegbuf=(unsigned char **)malloc(sizeof(unsigned char *) - *ntilesw*ntilesh))==NULL) - _throwunix("allocating JPEG tile array"); - memset(jpegbuf, 0, sizeof(unsigned char *)*ntilesw*ntilesh); - if((jpegsize=(unsigned long *)malloc(sizeof(unsigned long) - *ntilesw*ntilesh))==NULL) - _throwunix("allocating JPEG size array"); - memset(jpegsize, 0, sizeof(unsigned long)*ntilesw*ntilesh); - - if((flags&TJFLAG_NOREALLOC)!=0 || !dotile) - for(i=0; i %d x %d", TJSCALED(_w, sf), TJSCALED(_h, sf)); - printf("\n"); - } - else if(quiet==1) - { - printf("%-4s (%s) %-5s %-5s ", pixFormatStr[pf], - (flags&TJFLAG_BOTTOMUP)? "BU":"TD", csName[cs], subNameLong[subsamp]); - printf("%-5d %-5d ", tilew, tileh); - } - - _subsamp=subsamp; - if(dotile || xformop!=TJXOP_NONE || xformopt!=0 || customFilter) - { - if((t=(tjtransform *)malloc(sizeof(tjtransform)*ntilesw*ntilesh)) - ==NULL) - _throwunix("allocating image transform array"); - - if(xformop==TJXOP_TRANSPOSE || xformop==TJXOP_TRANSVERSE - || xformop==TJXOP_ROT90 || xformop==TJXOP_ROT270) - { - _w=h; _h=w; _tilew=tileh; _tileh=tilew; - } - - if(xformopt&TJXOPT_GRAY) _subsamp=TJ_GRAYSCALE; - if(xformop==TJXOP_HFLIP || xformop==TJXOP_ROT180) - _w=_w-(_w%tjMCUWidth[_subsamp]); - if(xformop==TJXOP_VFLIP || xformop==TJXOP_ROT180) - _h=_h-(_h%tjMCUHeight[_subsamp]); - if(xformop==TJXOP_TRANSVERSE || xformop==TJXOP_ROT90) - _w=_w-(_w%tjMCUHeight[_subsamp]); - if(xformop==TJXOP_TRANSVERSE || xformop==TJXOP_ROT270) - _h=_h-(_h%tjMCUWidth[_subsamp]); - _ntilesw=(_w+_tilew-1)/_tilew; - _ntilesh=(_h+_tileh-1)/_tileh; - - if(xformop==TJXOP_TRANSPOSE || xformop==TJXOP_TRANSVERSE - || xformop==TJXOP_ROT90 || xformop==TJXOP_ROT270) - { - if(_subsamp==TJSAMP_422) _subsamp=TJSAMP_440; - else if(_subsamp==TJSAMP_440) _subsamp=TJSAMP_422; - } - - for(row=0, tile=0; row<_ntilesh; row++) - { - for(col=0; col<_ntilesw; col++, tile++) - { - t[tile].r.w=min(_tilew, _w-col*_tilew); - t[tile].r.h=min(_tileh, _h-row*_tileh); - t[tile].r.x=col*_tilew; - t[tile].r.y=row*_tileh; - t[tile].op=xformop; - t[tile].options=xformopt|TJXOPT_TRIM; - t[tile].customFilter=customFilter; - if(t[tile].options&TJXOPT_NOOUTPUT && jpegbuf[tile]) - { - tjFree(jpegbuf[tile]); jpegbuf[tile]=NULL; - } - } - } - - iter=-1; - elapsed=0.; - while(1) - { - start=gettime(); - if(tjTransform(handle, srcbuf, srcsize, _ntilesw*_ntilesh, jpegbuf, - jpegsize, t, flags)==-1) - _throwtj("executing tjTransform()"); - elapsed+=gettime()-start; - if(iter>=0) - { - iter++; - if(elapsed>=benchtime) break; - } - else if(elapsed>=warmup) - { - iter=0; - elapsed=0.; - } - } - - free(t); t=NULL; - - for(tile=0, totaljpegsize=0; tile<_ntilesw*_ntilesh; tile++) - totaljpegsize+=jpegsize[tile]; - - if(quiet) - { - printf("%-6s%s%-6s%s", - sigfig((double)(w*h)/1000000./elapsed, 4, tempstr, 80), - quiet==2? "\n":" ", - sigfig((double)(w*h*ps)/(double)totaljpegsize, 4, tempstr2, 80), - quiet==2? "\n":" "); - } - else if(!quiet) - { - printf("Transform --> Frame rate: %f fps\n", 1.0/elapsed); - printf(" Output image size: %lu bytes\n", totaljpegsize); - printf(" Compression ratio: %f:1\n", - (double)(w*h*ps)/(double)totaljpegsize); - printf(" Throughput: %f Megapixels/sec\n", - (double)(w*h)/1000000./elapsed); - printf(" Output bit stream: %f Megabits/sec\n", - (double)totaljpegsize*8./1000000./elapsed); - } - } - else - { - if(quiet==1) printf("N/A N/A "); - tjFree(jpegbuf[0]); - jpegbuf[0]=NULL; - decompsrc=1; - } - - if(w==tilew) _tilew=_w; - if(h==tileh) _tileh=_h; - if(!(xformopt&TJXOPT_NOOUTPUT)) - { - if(decomp(NULL, decompsrc? &srcbuf:jpegbuf, decompsrc? &srcsize:jpegsize, - NULL, _w, _h, _subsamp, 0, filename, _tilew, _tileh)==-1) - goto bailout; - } - else if(quiet==1) printf("N/A\n"); - - for(i=0; i>>>> JPEG %s --> %s (%s) <<<<<\n", + formatName(subsamp, cs, tempStr), pixFormatStr[pf], + (flags & TJFLAG_BOTTOMUP) ? "Bottom-up" : "Top-down"); + + for (tilew = doTile ? 16 : w, tileh = doTile ? 16 : h; ; + tilew *= 2, tileh *= 2) { + if (tilew > w) tilew = w; + if (tileh > h) tileh = h; + ntilesw = (w + tilew - 1) / tilew; + ntilesh = (h + tileh - 1) / tileh; + + if ((jpegBuf = (unsigned char **)malloc(sizeof(unsigned char *) * + ntilesw * ntilesh)) == NULL) + _throwunix("allocating JPEG tile array"); + memset(jpegBuf, 0, sizeof(unsigned char *) * ntilesw * ntilesh); + if ((jpegSize = (unsigned long *)malloc(sizeof(unsigned long) * + ntilesw * ntilesh)) == NULL) + _throwunix("allocating JPEG size array"); + memset(jpegSize, 0, sizeof(unsigned long) * ntilesw * ntilesh); + + if ((flags & TJFLAG_NOREALLOC) != 0 || !doTile) + for (i = 0; i < ntilesw * ntilesh; i++) { + if ((jpegBuf[i] = (unsigned char *) + tjAlloc(tjBufSize(tilew, tileh, subsamp))) == NULL) + _throwunix("allocating JPEG tiles"); + } + + tw = w; th = h; ttilew = tilew; ttileh = tileh; + if (!quiet) { + printf("\n%s size: %d x %d", doTile ? "Tile" : "Image", ttilew, ttileh); + if (sf.num != 1 || sf.denom != 1) + printf(" --> %d x %d", TJSCALED(tw, sf), TJSCALED(th, sf)); + printf("\n"); + } else if (quiet == 1) { + printf("%-4s (%s) %-5s %-5s ", pixFormatStr[pf], + (flags & TJFLAG_BOTTOMUP) ? "BU" : "TD", csName[cs], + subNameLong[subsamp]); + printf("%-5d %-5d ", tilew, tileh); + } + + tsubsamp = subsamp; + if (doTile || xformOp != TJXOP_NONE || xformOpt != 0 || customFilter) { + if ((t = (tjtransform *)malloc(sizeof(tjtransform) * ntilesw * + ntilesh)) == NULL) + _throwunix("allocating image transform array"); + + if (xformOp == TJXOP_TRANSPOSE || xformOp == TJXOP_TRANSVERSE || + xformOp == TJXOP_ROT90 || xformOp == TJXOP_ROT270) { + tw = h; th = w; ttilew = tileh; ttileh = tilew; + } + + if (xformOpt & TJXOPT_GRAY) tsubsamp = TJ_GRAYSCALE; + if (xformOp == TJXOP_HFLIP || xformOp == TJXOP_ROT180) + tw = tw - (tw % tjMCUWidth[tsubsamp]); + if (xformOp == TJXOP_VFLIP || xformOp == TJXOP_ROT180) + th = th - (th % tjMCUHeight[tsubsamp]); + if (xformOp == TJXOP_TRANSVERSE || xformOp == TJXOP_ROT90) + tw = tw - (tw % tjMCUHeight[tsubsamp]); + if (xformOp == TJXOP_TRANSVERSE || xformOp == TJXOP_ROT270) + th = th - (th % tjMCUWidth[tsubsamp]); + tntilesw = (tw + ttilew - 1) / ttilew; + tntilesh = (th + ttileh - 1) / ttileh; + + if (xformOp == TJXOP_TRANSPOSE || xformOp == TJXOP_TRANSVERSE || + xformOp == TJXOP_ROT90 || xformOp == TJXOP_ROT270) { + if (tsubsamp == TJSAMP_422) tsubsamp = TJSAMP_440; + else if (tsubsamp == TJSAMP_440) tsubsamp = TJSAMP_422; + } + + for (row = 0, tile = 0; row < tntilesh; row++) { + for (col = 0; col < tntilesw; col++, tile++) { + t[tile].r.w = min(ttilew, tw - col * ttilew); + t[tile].r.h = min(ttileh, th - row * ttileh); + t[tile].r.x = col * ttilew; + t[tile].r.y = row * ttileh; + t[tile].op = xformOp; + t[tile].options = xformOpt | TJXOPT_TRIM; + t[tile].customFilter = customFilter; + if (t[tile].options & TJXOPT_NOOUTPUT && jpegBuf[tile]) { + tjFree(jpegBuf[tile]); jpegBuf[tile] = NULL; + } + } + } + + iter = -1; + elapsed = 0.; + while (1) { + start = getTime(); + if (tjTransform(handle, srcBuf, srcSize, tntilesw * tntilesh, jpegBuf, + jpegSize, t, flags) == -1) + _throwtj("executing tjTransform()"); + elapsed += getTime() - start; + if (iter >= 0) { + iter++; + if (elapsed >= benchTime) break; + } else if (elapsed >= warmup) { + iter = 0; + elapsed = 0.; + } + } + + free(t); t = NULL; + + for (tile = 0, totalJpegSize = 0; tile < tntilesw * tntilesh; tile++) + totalJpegSize += jpegSize[tile]; + + if (quiet) { + printf("%-6s%s%-6s%s", + sigfig((double)(w * h) / 1000000. / elapsed, 4, tempStr, 80), + quiet == 2 ? "\n" : " ", + sigfig((double)(w * h * ps) / (double)totalJpegSize, 4, + tempStr2, 80), + quiet == 2 ? "\n" : " "); + } else if (!quiet) { + printf("Transform --> Frame rate: %f fps\n", + 1.0 / elapsed); + printf(" Output image size: %lu bytes\n", + totalJpegSize); + printf(" Compression ratio: %f:1\n", + (double)(w * h * ps) / (double)totalJpegSize); + printf(" Throughput: %f Megapixels/sec\n", + (double)(w * h) / 1000000. / elapsed); + printf(" Output bit stream: %f Megabits/sec\n", + (double)totalJpegSize * 8. / 1000000. / elapsed); + } + } else { + if (quiet == 1) printf("N/A N/A "); + tjFree(jpegBuf[0]); + jpegBuf[0] = NULL; + decompsrc = 1; + } + + if (w == tilew) ttilew = tw; + if (h == tileh) ttileh = th; + if (!(xformOpt & TJXOPT_NOOUTPUT)) { + if (decomp(NULL, decompsrc ? &srcBuf : jpegBuf, + decompsrc ? &srcSize : jpegSize, NULL, tw, th, tsubsamp, 0, + fileName, ttilew, ttileh) == -1) + goto bailout; + } else if (quiet == 1) printf("N/A\n"); + + for (i = 0; i < ntilesw * ntilesh; i++) { + tjFree(jpegBuf[i]); jpegBuf[i] = NULL; + } + free(jpegBuf); jpegBuf = NULL; + if (jpegSize) { free(jpegSize); jpegSize = NULL; } + + if (tilew == w && tileh == h) break; + } + +bailout: + if (file) { fclose(file); file = NULL; } + if (jpegBuf) { + for (i = 0; i < ntilesw * ntilesh; i++) { + if (jpegBuf[i]) tjFree(jpegBuf[i]); + jpegBuf[i] = NULL; + } + free(jpegBuf); jpegBuf = NULL; + } + if (jpegSize) { free(jpegSize); jpegSize = NULL; } + if (srcBuf) { free(srcBuf); srcBuf = NULL; } + if (t) { free(t); t = NULL; } + if (handle) { tjDestroy(handle); handle = NULL; } + return retval; } -void usage(char *progname) +void usage(char *progName) { - int i; - printf("USAGE: %s\n", progname); - printf(" [options]\n\n"); - printf(" %s\n", progname); - printf(" [options]\n\n"); - printf("Options:\n\n"); - printf("-alloc = Dynamically allocate JPEG image buffers\n"); - printf("-bmp = Generate output images in Windows Bitmap format (default = PPM)\n"); - printf("-bottomup = Test bottom-up compression/decompression\n"); - printf("-tile = Test performance of the codec when the image is encoded as separate\n"); - printf(" tiles of varying sizes.\n"); - printf("-rgb, -bgr, -rgbx, -bgrx, -xbgr, -xrgb =\n"); - printf(" Test the specified color conversion path in the codec (default = BGR)\n"); - printf("-cmyk = Indirectly test YCCK JPEG compression/decompression (the source\n"); - printf(" and destination bitmaps are still RGB. The conversion is done\n"); - printf(" internally prior to compression or after decompression.)\n"); - printf("-fastupsample = Use the fastest chrominance upsampling algorithm available in\n"); - printf(" the underlying codec\n"); - printf("-fastdct = Use the fastest DCT/IDCT algorithms available in the underlying\n"); - printf(" codec\n"); - printf("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the\n"); - printf(" underlying codec\n"); - printf("-subsamp = When testing JPEG compression, this option specifies the level\n"); - printf(" of chrominance subsampling to use ( = 444, 422, 440, 420, 411, or\n"); - printf(" GRAY). The default is to test Grayscale, 4:2:0, 4:2:2, and 4:4:4 in\n"); - printf(" sequence.\n"); - printf("-quiet = Output results in tabular rather than verbose format\n"); - printf("-yuv = Test YUV encoding/decoding functions\n"); - printf("-yuvpad

      = If testing YUV encoding/decoding, this specifies the number of\n"); - printf(" bytes to which each row of each plane in the intermediate YUV image is\n"); - printf(" padded (default = 1)\n"); - printf("-scale M/N = Scale down the width/height of the decompressed JPEG image by a\n"); - printf(" factor of M/N (M/N = "); - for(i=0; i2) - { - if(i!=nsf-1) printf(", "); - if(i==nsf-2) printf("or "); - } - if(i%8==0 && i!=0) printf("\n "); - } - printf(")\n"); - printf("-hflip, -vflip, -transpose, -transverse, -rot90, -rot180, -rot270 =\n"); - printf(" Perform the corresponding lossless transform prior to\n"); - printf(" decompression (these options are mutually exclusive)\n"); - printf("-grayscale = Perform lossless grayscale conversion prior to decompression\n"); - printf(" test (can be combined with the other transforms above)\n"); - printf("-benchtime = Run each benchmark for at least seconds (default = 5.0)\n"); - printf("-warmup = Run each benchmark for seconds (default = 1.0) prior to\n"); - printf(" starting the timer, in order to prime the caches and thus improve the\n"); - printf(" consistency of the results.\n"); - printf("-componly = Stop after running compression tests. Do not test decompression.\n"); - printf("-nowrite = Do not write reference or output images (improves consistency of\n"); - printf(" performance measurements.)\n\n"); - printf("NOTE: If the quality is specified as a range (e.g. 90-100), a separate\n"); - printf("test will be performed for all quality values in the range.\n\n"); - exit(1); + int i; + + printf("USAGE: %s\n", progName); + printf(" [options]\n\n"); + printf(" %s\n", progName); + printf(" [options]\n\n"); + printf("Options:\n\n"); + printf("-alloc = Dynamically allocate JPEG image buffers\n"); + printf("-bmp = Generate output images in Windows Bitmap format (default = PPM)\n"); + printf("-bottomup = Test bottom-up compression/decompression\n"); + printf("-tile = Test performance of the codec when the image is encoded as separate\n"); + printf(" tiles of varying sizes.\n"); + printf("-rgb, -bgr, -rgbx, -bgrx, -xbgr, -xrgb =\n"); + printf(" Test the specified color conversion path in the codec (default = BGR)\n"); + printf("-cmyk = Indirectly test YCCK JPEG compression/decompression (the source\n"); + printf(" and destination bitmaps are still RGB. The conversion is done\n"); + printf(" internally prior to compression or after decompression.)\n"); + printf("-fastupsample = Use the fastest chrominance upsampling algorithm available in\n"); + printf(" the underlying codec\n"); + printf("-fastdct = Use the fastest DCT/IDCT algorithms available in the underlying\n"); + printf(" codec\n"); + printf("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the\n"); + printf(" underlying codec\n"); + printf("-progressive = Use progressive entropy coding in JPEG images generated by\n"); + printf(" compression and transform operations.\n"); + printf("-subsamp = When testing JPEG compression, this option specifies the level\n"); + printf(" of chrominance subsampling to use ( = 444, 422, 440, 420, 411, or\n"); + printf(" GRAY). The default is to test Grayscale, 4:2:0, 4:2:2, and 4:4:4 in\n"); + printf(" sequence.\n"); + printf("-quiet = Output results in tabular rather than verbose format\n"); + printf("-yuv = Test YUV encoding/decoding functions\n"); + printf("-yuvpad

      = If testing YUV encoding/decoding, this specifies the number of\n"); + printf(" bytes to which each row of each plane in the intermediate YUV image is\n"); + printf(" padded (default = 1)\n"); + printf("-scale M/N = Scale down the width/height of the decompressed JPEG image by a\n"); + printf(" factor of M/N (M/N = "); + for (i = 0; i < nsf; i++) { + printf("%d/%d", scalingFactors[i].num, scalingFactors[i].denom); + if (nsf == 2 && i != nsf - 1) printf(" or "); + else if (nsf > 2) { + if (i != nsf - 1) printf(", "); + if (i == nsf - 2) printf("or "); + } + if (i % 8 == 0 && i != 0) printf("\n "); + } + printf(")\n"); + printf("-hflip, -vflip, -transpose, -transverse, -rot90, -rot180, -rot270 =\n"); + printf(" Perform the corresponding lossless transform prior to\n"); + printf(" decompression (these options are mutually exclusive)\n"); + printf("-grayscale = Perform lossless grayscale conversion prior to decompression\n"); + printf(" test (can be combined with the other transforms above)\n"); + printf("-copynone = Do not copy any extra markers (including EXIF and ICC profile data)\n"); + printf(" when transforming the image.\n"); + printf("-benchtime = Run each benchmark for at least seconds (default = 5.0)\n"); + printf("-warmup = Run each benchmark for seconds (default = 1.0) prior to\n"); + printf(" starting the timer, in order to prime the caches and thus improve the\n"); + printf(" consistency of the results.\n"); + printf("-componly = Stop after running compression tests. Do not test decompression.\n"); + printf("-nowrite = Do not write reference or output images (improves consistency of\n"); + printf(" performance measurements.)\n"); + printf("-stoponwarning = Immediately discontinue the current\n"); + printf(" compression/decompression/transform operation if the underlying codec\n"); + printf(" throws a warning (non-fatal error)\n\n"); + printf("NOTE: If the quality is specified as a range (e.g. 90-100), a separate\n"); + printf("test will be performed for all quality values in the range.\n\n"); + exit(1); } int main(int argc, char *argv[]) { - unsigned char *srcbuf=NULL; int w=0, h=0, i, j; - int minqual=-1, maxqual=-1; char *temp; - int minarg=2, retval=0, subsamp=-1; - - if((scalingfactors=tjGetScalingFactors(&nsf))==NULL || nsf==0) - _throwtj("executing tjGetScalingFactors()"); - - if(argc100) - { - puts("ERROR: Quality must be between 1 and 100."); - exit(1); - } - if((temp=strchr(argv[2], '-'))!=NULL && strlen(temp)>1 - && sscanf(&temp[1], "%d", &maxqual)==1 && maxqual>minqual && maxqual>=1 - && maxqual<=100) {} - else maxqual=minqual; - } - - if(argc>minarg) - { - for(i=minarg; i0.0) benchtime=temp; - else usage(argv[0]); - } - else if(!strcasecmp(argv[i], "-warmup") && i=0.0) warmup=temp; - else usage(argv[0]); - printf("Warmup time = %.1f seconds\n\n", warmup); - } - else if(!strcasecmp(argv[i], "-alloc")) flags&=(~TJFLAG_NOREALLOC); - else if(!strcasecmp(argv[i], "-bmp")) ext="bmp"; - else if(!strcasecmp(argv[i], "-yuv")) - { - printf("Testing YUV planar encoding/decoding\n\n"); - doyuv=1; - } - else if(!strcasecmp(argv[i], "-yuvpad") && i=1) yuvpad=temp; - } - else if(!strcasecmp(argv[i], "-subsamp") && i=0 && subsamp=minqual; i--) - fullTest(srcbuf, w, h, subsamp, i, argv[1]); - printf("\n"); - } - else - { - if(pf!=TJPF_CMYK) - { - for(i=maxqual; i>=minqual; i--) - fullTest(srcbuf, w, h, TJSAMP_GRAY, i, argv[1]); - printf("\n"); - } - for(i=maxqual; i>=minqual; i--) - fullTest(srcbuf, w, h, TJSAMP_420, i, argv[1]); - printf("\n"); - for(i=maxqual; i>=minqual; i--) - fullTest(srcbuf, w, h, TJSAMP_422, i, argv[1]); - printf("\n"); - for(i=maxqual; i>=minqual; i--) - fullTest(srcbuf, w, h, TJSAMP_444, i, argv[1]); - printf("\n"); - } - - bailout: - if(srcbuf) free(srcbuf); - return retval; + unsigned char *srcBuf = NULL; + int w = 0, h = 0, i, j, minQual = -1, maxQual = -1; + char *temp; + int minArg = 2, retval = 0, subsamp = -1; + + if ((scalingFactors = tjGetScalingFactors(&nsf)) == NULL || nsf == 0) + _throw("executing tjGetScalingFactors()", tjGetErrorStr()); + + if (argc < minArg) usage(argv[0]); + + temp = strrchr(argv[1], '.'); + if (temp != NULL) { + if (!strcasecmp(temp, ".bmp")) ext = "bmp"; + if (!strcasecmp(temp, ".jpg") || !strcasecmp(temp, ".jpeg")) + decompOnly = 1; + } + + printf("\n"); + + if (!decompOnly) { + minArg = 3; + if (argc < minArg) usage(argv[0]); + if ((minQual = atoi(argv[2])) < 1 || minQual > 100) { + puts("ERROR: Quality must be between 1 and 100."); + exit(1); + } + if ((temp = strchr(argv[2], '-')) != NULL && strlen(temp) > 1 && + sscanf(&temp[1], "%d", &maxQual) == 1 && maxQual > minQual && + maxQual >= 1 && maxQual <= 100) {} + else maxQual = minQual; + } + + if (argc > minArg) { + for (i = minArg; i < argc; i++) { + if (!strcasecmp(argv[i], "-tile")) { + doTile = 1; xformOpt |= TJXOPT_CROP; + } else if (!strcasecmp(argv[i], "-fastupsample")) { + printf("Using fast upsampling code\n\n"); + flags |= TJFLAG_FASTUPSAMPLE; + } else if (!strcasecmp(argv[i], "-fastdct")) { + printf("Using fastest DCT/IDCT algorithm\n\n"); + flags |= TJFLAG_FASTDCT; + } else if (!strcasecmp(argv[i], "-accuratedct")) { + printf("Using most accurate DCT/IDCT algorithm\n\n"); + flags |= TJFLAG_ACCURATEDCT; + } else if (!strcasecmp(argv[i], "-progressive")) { + printf("Using progressive entropy coding\n\n"); + flags |= TJFLAG_PROGRESSIVE; + } else if (!strcasecmp(argv[i], "-rgb")) + pf = TJPF_RGB; + else if (!strcasecmp(argv[i], "-rgbx")) + pf = TJPF_RGBX; + else if (!strcasecmp(argv[i], "-bgr")) + pf = TJPF_BGR; + else if (!strcasecmp(argv[i], "-bgrx")) + pf = TJPF_BGRX; + else if (!strcasecmp(argv[i], "-xbgr")) + pf = TJPF_XBGR; + else if (!strcasecmp(argv[i], "-xrgb")) + pf = TJPF_XRGB; + else if (!strcasecmp(argv[i], "-cmyk")) + pf = TJPF_CMYK; + else if (!strcasecmp(argv[i], "-bottomup")) + flags |= TJFLAG_BOTTOMUP; + else if (!strcasecmp(argv[i], "-quiet")) + quiet = 1; + else if (!strcasecmp(argv[i], "-qq")) + quiet = 2; + else if (!strcasecmp(argv[i], "-scale") && i < argc - 1) { + int temp1 = 0, temp2 = 0, match = 0; + + if (sscanf(argv[++i], "%d/%d", &temp1, &temp2) == 2) { + for (j = 0; j < nsf; j++) { + if ((double)temp1 / (double)temp2 == + (double)scalingFactors[j].num / + (double)scalingFactors[j].denom) { + sf = scalingFactors[j]; + match = 1; break; + } + } + if (!match) usage(argv[0]); + } else usage(argv[0]); + } else if (!strcasecmp(argv[i], "-hflip")) + xformOp = TJXOP_HFLIP; + else if (!strcasecmp(argv[i], "-vflip")) + xformOp = TJXOP_VFLIP; + else if (!strcasecmp(argv[i], "-transpose")) + xformOp = TJXOP_TRANSPOSE; + else if (!strcasecmp(argv[i], "-transverse")) + xformOp = TJXOP_TRANSVERSE; + else if (!strcasecmp(argv[i], "-rot90")) + xformOp = TJXOP_ROT90; + else if (!strcasecmp(argv[i], "-rot180")) + xformOp = TJXOP_ROT180; + else if (!strcasecmp(argv[i], "-rot270")) + xformOp = TJXOP_ROT270; + else if (!strcasecmp(argv[i], "-grayscale")) + xformOpt |= TJXOPT_GRAY; + else if (!strcasecmp(argv[i], "-custom")) + customFilter = dummyDCTFilter; + else if (!strcasecmp(argv[i], "-nooutput")) + xformOpt |= TJXOPT_NOOUTPUT; + else if (!strcasecmp(argv[i], "-copynone")) + xformOpt |= TJXOPT_COPYNONE; + else if (!strcasecmp(argv[i], "-benchtime") && i < argc - 1) { + double temp = atof(argv[++i]); + + if (temp > 0.0) benchTime = temp; + else usage(argv[0]); + } else if (!strcasecmp(argv[i], "-warmup") && i < argc - 1) { + double temp = atof(argv[++i]); + + if (temp >= 0.0) warmup = temp; + else usage(argv[0]); + printf("Warmup time = %.1f seconds\n\n", warmup); + } else if (!strcasecmp(argv[i], "-alloc")) + flags &= (~TJFLAG_NOREALLOC); + else if (!strcasecmp(argv[i], "-bmp")) + ext = "bmp"; + else if (!strcasecmp(argv[i], "-yuv")) { + printf("Testing YUV planar encoding/decoding\n\n"); + doYUV = 1; + } else if (!strcasecmp(argv[i], "-yuvpad") && i < argc - 1) { + int temp = atoi(argv[++i]); + + if (temp >= 1) yuvPad = temp; + } else if (!strcasecmp(argv[i], "-subsamp") && i < argc - 1) { + i++; + if (toupper(argv[i][0]) == 'G') subsamp = TJSAMP_GRAY; + else { + int temp = atoi(argv[i]); + + switch (temp) { + case 444: subsamp = TJSAMP_444; break; + case 422: subsamp = TJSAMP_422; break; + case 440: subsamp = TJSAMP_440; break; + case 420: subsamp = TJSAMP_420; break; + case 411: subsamp = TJSAMP_411; break; + } + } + } else if (!strcasecmp(argv[i], "-componly")) + compOnly = 1; + else if (!strcasecmp(argv[i], "-nowrite")) + doWrite = 0; + else if (!strcasecmp(argv[i], "-stoponwarning")) + flags |= TJFLAG_STOPONWARNING; + else usage(argv[0]); + } + } + + if ((sf.num != 1 || sf.denom != 1) && doTile) { + printf("Disabling tiled compression/decompression tests, because those tests do not\n"); + printf("work when scaled decompression is enabled.\n"); + doTile = 0; + } + + if ((flags & TJFLAG_NOREALLOC) == 0 && doTile) { + printf("Disabling tiled compression/decompression tests, because those tests do not\n"); + printf("work when dynamic JPEG buffer allocation is enabled.\n\n"); + doTile = 0; + } + + if (!decompOnly) { + if ((srcBuf = tjLoadImage(argv[1], &w, 1, &h, &pf, flags)) == NULL) + _throwtjg("loading bitmap"); + temp = strrchr(argv[1], '.'); + if (temp != NULL) *temp = '\0'; + } + + if (quiet == 1 && !decompOnly) { + printf("All performance values in Mpixels/sec\n\n"); + printf("Bitmap JPEG JPEG %s %s ", + doTile ? "Tile " : "Image", doTile ? "Tile " : "Image"); + if (doYUV) printf("Encode "); + printf("Comp Comp Decomp "); + if (doYUV) printf("Decode"); + printf("\n"); + printf("Format Subsamp Qual Width Height "); + if (doYUV) printf("Perf "); + printf("Perf Ratio Perf "); + if (doYUV) printf("Perf"); + printf("\n\n"); + } + + if (decompOnly) { + decompTest(argv[1]); + printf("\n"); + goto bailout; + } + if (subsamp >= 0 && subsamp < TJ_NUMSAMP) { + for (i = maxQual; i >= minQual; i--) + fullTest(srcBuf, w, h, subsamp, i, argv[1]); + printf("\n"); + } else { + if (pf != TJPF_CMYK) { + for (i = maxQual; i >= minQual; i--) + fullTest(srcBuf, w, h, TJSAMP_GRAY, i, argv[1]); + printf("\n"); + } + for (i = maxQual; i >= minQual; i--) + fullTest(srcBuf, w, h, TJSAMP_420, i, argv[1]); + printf("\n"); + for (i = maxQual; i >= minQual; i--) + fullTest(srcBuf, w, h, TJSAMP_422, i, argv[1]); + printf("\n"); + for (i = maxQual; i >= minQual; i--) + fullTest(srcBuf, w, h, TJSAMP_444, i, argv[1]); + printf("\n"); + } + +bailout: + if (srcBuf) tjFree(srcBuf); + return retval; } diff --git a/tjbenchtest.in b/tjbenchtest.in index 22e15db..1c08b37 100755 --- a/tjbenchtest.in +++ b/tjbenchtest.in @@ -21,14 +21,15 @@ runme() EXT=bmp IMAGES="vgl_5674_0098.${EXT} vgl_6434_0018a.${EXT} vgl_6548_0026a.${EXT} nightshot_iso_100.${EXT}" -IMGDIR=@srcdir@/testimages +IMGDIR=@CMAKE_CURRENT_SOURCE_DIR@/testimages OUTDIR=`mktemp -d /tmp/__tjbenchtest_output.XXXXXX` -EXEDIR=. +EXEDIR=@CMAKE_CURRENT_BINARY_DIR@ BMPARG= NSARG= YUVARG= ALLOC=0 ALLOCARG= +PROGARG= if [ "$EXT" = "bmp" ]; then BMPARG=-bmp; fi if [ -d $OUTDIR ]; then @@ -64,25 +65,28 @@ while [ $# -gt 0 ]; do ALLOCARG=-alloc ALLOC=1 ;; + -progressive) + PROGARG=-progressive + ;; esac shift done -exec >$EXEDIR/tjbenchtest$YUVARG$ALLOCARG.log +exec >$EXEDIR/tjbenchtest$YUVARG$ALLOCARG$PROGARG.log # Standard tests for image in $IMAGES; do cp $IMGDIR/$image $OUTDIR basename=`basename $image .${EXT}` - runme $EXEDIR/cjpeg -quality 95 -dct fast -grayscale -outfile $OUTDIR/${basename}_GRAY_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT} - runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x2 -outfile $OUTDIR/${basename}_420_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT} - runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x1 -outfile $OUTDIR/${basename}_422_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT} - runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 1x1 -outfile $OUTDIR/${basename}_444_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT} - runme $EXEDIR/cjpeg -quality 95 -dct int -grayscale -outfile $OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT} - runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x2 -outfile $OUTDIR/${basename}_420_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT} - runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x1 -outfile $OUTDIR/${basename}_422_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT} - runme $EXEDIR/cjpeg -quality 95 -dct int -sample 1x1 -outfile $OUTDIR/${basename}_444_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct fast $PROGARG -grayscale -outfile $OUTDIR/${basename}_GRAY_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct fast $PROGARG -sample 2x2 -outfile $OUTDIR/${basename}_420_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct fast $PROGARG -sample 2x1 -outfile $OUTDIR/${basename}_422_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct fast $PROGARG -sample 1x1 -outfile $OUTDIR/${basename}_444_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct int $PROGARG -grayscale -outfile $OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct int $PROGARG -sample 2x2 -outfile $OUTDIR/${basename}_420_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct int $PROGARG -sample 2x1 -outfile $OUTDIR/${basename}_422_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT} + runme $EXEDIR/cjpeg -quality 95 -dct int $PROGARG -sample 1x1 -outfile $OUTDIR/${basename}_444_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT} for samp in GRAY 420 422 444; do runme $EXEDIR/djpeg -rgb $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_default_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg runme $EXEDIR/djpeg -dct fast -rgb $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_fast_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg @@ -96,7 +100,7 @@ for image in $IMAGES; do # Compression for dct in accurate fast; do - runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -quiet -benchtime 0.01 -warmup 0 -${dct}dct $YUVARG $ALLOCARG + runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -quiet -benchtime 0.01 -warmup 0 -${dct}dct $YUVARG $ALLOCARG $PROGARG for samp in GRAY 420 422 444; do runme cmp $OUTDIR/${basename}_${samp}_Q95.jpg $OUTDIR/${basename}_${samp}_${dct}_cjpeg.jpg done @@ -109,7 +113,7 @@ for image in $IMAGES; do fi # Tiled compression & decompression - runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG $ALLOCARG + runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG $ALLOCARG $PROGARG for samp in GRAY 444; do if [ $ALLOC = 1 ]; then runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT} @@ -122,7 +126,7 @@ for image in $IMAGES; do done fi done - runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG $ALLOCARG + runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG $ALLOCARG $PROGARG for samp in 420 422; do if [ $ALLOC = 1 ]; then runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT} @@ -138,7 +142,7 @@ for image in $IMAGES; do # Tiled decompression for samp in GRAY 444; do - runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG $ALLOCARG + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG $ALLOCARG $PROGARG if [ $ALLOC = 1 ]; then runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT} rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} @@ -151,7 +155,7 @@ for image in $IMAGES; do fi done for samp in 420 422; do - runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG $ALLOCARG + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG $ALLOCARG $PROGARG if [ $ALLOC = 1 ]; then runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT} rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} @@ -167,10 +171,10 @@ for image in $IMAGES; do # Scaled decompression for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do - scalearg=`echo $scale | sed s@_@/@g` + scalearg=`echo $scale | sed 's/\_/\//g'` for samp in GRAY 420 422 444; do runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_${scale}_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg - runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG $ALLOCARG + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG $ALLOCARG $PROGARG runme cmp $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT} $OUTDIR/${basename}_${samp}_${scale}_djpeg.${EXT} rm $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT} done @@ -189,7 +193,7 @@ for image in $IMAGES; do for xform in hflip vflip transpose transverse rot90 rot180 rot270; do for samp in GRAY 444; do runme $EXEDIR/djpeg -rgb $BMPARG -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg - runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -warmup 0 $YUVARG $ALLOCARG + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -warmup 0 $YUVARG $ALLOCARG $PROGARG if [ $ALLOC = 1 ]; then runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} @@ -203,7 +207,7 @@ for image in $IMAGES; do done for samp in 420 422; do runme $EXEDIR/djpeg -nosmooth -rgb $BMPARG -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg - runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample $YUVARG $ALLOCARG + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample $YUVARG $ALLOCARG $PROGARG if [ $ALLOC = 1 ]; then runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} @@ -220,7 +224,7 @@ for image in $IMAGES; do # Grayscale transform for xform in hflip vflip transpose transverse rot90 rot180 rot270; do for samp in GRAY 444 422 420; do - runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -warmup 0 -grayscale $YUVARG $ALLOCARG + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -warmup 0 -grayscale $YUVARG $ALLOCARG $PROGARG if [ $ALLOC = 1 ]; then runme cmp $OUTDIR/${basename}_${samp}_Q95_full.${EXT} $OUTDIR/${basename}_GRAY_${xform}_jpegtran.${EXT} rm $OUTDIR/${basename}_${samp}_Q95_full.${EXT} @@ -238,9 +242,9 @@ for image in $IMAGES; do for xform in hflip vflip transpose transverse rot90 rot180 rot270; do for samp in GRAY 444 422 420; do for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do - scalearg=`echo $scale | sed s@_@/@g` + scalearg=`echo $scale | sed 's/\_/\//g'` runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg - runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG $ALLOCARG + runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG $ALLOCARG $PROGARG runme cmp $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT} $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.${EXT} rm $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT} done diff --git a/tjbenchtest.java.in b/tjbenchtest.java.in index 0fd2896..689561d 100755 --- a/tjbenchtest.java.in +++ b/tjbenchtest.java.in @@ -16,25 +16,28 @@ onexit() runme() { echo \*\*\* $* - $* + "$@" } IMAGES="vgl_5674_0098.bmp vgl_6434_0018a.bmp vgl_6548_0026a.bmp nightshot_iso_100.bmp" -IMGDIR=@srcdir@/testimages +IMGDIR=@CMAKE_CURRENT_SOURCE_DIR@/testimages OUTDIR=`mktemp -d /tmp/__tjbenchtest_java_output.XXXXXX` -EXEDIR=. -JAVA="@JAVA@ -cp java/turbojpeg.jar -Djava.library.path=.libs" +EXEDIR=@CMAKE_CURRENT_BINARY_DIR@ +JAVA="@Java_JAVA_EXECUTABLE@" +JAVAARGS="-cp $EXEDIR/java/turbojpeg.jar -Djava.library.path=$EXEDIR" BMPARG= NSARG= YUVARG= +PROGARG= if [ -d $OUTDIR ]; then rm -rf $OUTDIR fi mkdir -p $OUTDIR -if [ $# -gt 0 ]; then - if [ "$1" = "-yuv" ]; then +while [ $# -gt 0 ]; do + case "$1" in + -yuv) NSARG=-nosmooth YUVARG=-yuv @@ -55,24 +58,29 @@ if [ $# -gt 0 ]; then # phenomenon is not yet fully understood but is also believed to be some sort # of round-off error.) IMAGES="vgl_6548_0026a.bmp" - fi -fi + ;; + -progressive) + PROGARG=-progressive + ;; + esac + shift +done -exec >$EXEDIR/tjbenchtest-java$YUVARG.log +exec >$EXEDIR/tjbenchtest-java$YUVARG$PROGARG.log # Standard tests for image in $IMAGES; do cp $IMGDIR/$image $OUTDIR basename=`basename $image .bmp` - runme $EXEDIR/cjpeg -quality 95 -dct fast -grayscale -outfile $OUTDIR/${basename}_GRAY_fast_cjpeg.jpg $IMGDIR/${basename}.bmp - runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x2 -outfile $OUTDIR/${basename}_420_fast_cjpeg.jpg $IMGDIR/${basename}.bmp - runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x1 -outfile $OUTDIR/${basename}_422_fast_cjpeg.jpg $IMGDIR/${basename}.bmp - runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 1x1 -outfile $OUTDIR/${basename}_444_fast_cjpeg.jpg $IMGDIR/${basename}.bmp - runme $EXEDIR/cjpeg -quality 95 -dct int -grayscale -outfile $OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp - runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x2 -outfile $OUTDIR/${basename}_420_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp - runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x1 -outfile $OUTDIR/${basename}_422_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp - runme $EXEDIR/cjpeg -quality 95 -dct int -sample 1x1 -outfile $OUTDIR/${basename}_444_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct fast $PROGARG -grayscale -outfile $OUTDIR/${basename}_GRAY_fast_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct fast $PROGARG -sample 2x2 -outfile $OUTDIR/${basename}_420_fast_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct fast $PROGARG -sample 2x1 -outfile $OUTDIR/${basename}_422_fast_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct fast $PROGARG -sample 1x1 -outfile $OUTDIR/${basename}_444_fast_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct int $PROGARG -grayscale -outfile $OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct int $PROGARG -sample 2x2 -outfile $OUTDIR/${basename}_420_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct int $PROGARG -sample 2x1 -outfile $OUTDIR/${basename}_422_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct int $PROGARG -sample 1x1 -outfile $OUTDIR/${basename}_444_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp for samp in GRAY 420 422 444; do runme $EXEDIR/djpeg -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_default_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg runme $EXEDIR/djpeg -dct fast -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_fast_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg @@ -86,7 +94,7 @@ for image in $IMAGES; do # Compression for dct in accurate fast; do - runme $JAVA TJBench $OUTDIR/$image 95 -rgb -quiet -benchtime 0.01 -warmup 0 -${dct}dct $YUVARG + runme "$JAVA" $JAVAARGS TJBench $OUTDIR/$image 95 -rgb -quiet -benchtime 0.01 -warmup 0 -${dct}dct $YUVARG $PROGARG for samp in GRAY 420 422 444; do runme cmp $OUTDIR/${basename}_${samp}_Q95.jpg $OUTDIR/${basename}_${samp}_${dct}_cjpeg.jpg done @@ -99,7 +107,7 @@ for image in $IMAGES; do fi # Tiled compression & decompression - runme $JAVA TJBench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG + runme "$JAVA" $JAVAARGS TJBench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG $PROGARG for samp in GRAY 444; do for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ $OUTDIR/${basename}_${samp}_Q95_full.bmp; do @@ -107,7 +115,7 @@ for image in $IMAGES; do rm $i done done - runme $JAVA TJBench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG + runme "$JAVA" $JAVAARGS TJBench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG $PROGARG for samp in 420 422; do for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ $OUTDIR/${basename}_${samp}_Q95_full.bmp; do @@ -118,7 +126,7 @@ for image in $IMAGES; do # Tiled decompression for samp in GRAY 444; do - runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG + runme "$JAVA" $JAVAARGS TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG $PROGARG for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ $OUTDIR/${basename}_${samp}_Q95_full.bmp; do runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.bmp @@ -126,7 +134,7 @@ for image in $IMAGES; do done done for samp in 420 422; do - runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG + runme "$JAVA" $JAVAARGS TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG $PROGARG for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ $OUTDIR/${basename}_${samp}_Q95_full.bmp; do runme cmp $i -i 54:54 $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.bmp @@ -137,10 +145,10 @@ for image in $IMAGES; do # Scaled decompression for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do - scalearg=`echo $scale | sed s@_@/@g` + scalearg=`echo $scale | sed 's/\_/\//g'` for samp in GRAY 420 422 444; do runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG -bmp -outfile $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg - runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG + runme "$JAVA" $JAVAARGS TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG $PROGARG runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp rm $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp done @@ -159,7 +167,7 @@ for image in $IMAGES; do for xform in hflip vflip transpose transverse rot90 rot180 rot270; do for samp in GRAY 444; do runme $EXEDIR/djpeg -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg - runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 $YUVARG + runme "$JAVA" $JAVAARGS TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 $YUVARG $PROGARG for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ $OUTDIR/${basename}_${samp}_Q95_full.bmp; do runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp @@ -168,7 +176,7 @@ for image in $IMAGES; do done for samp in 420 422; do runme $EXEDIR/djpeg -nosmooth -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg - runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample $YUVARG + runme "$JAVA" $JAVAARGS TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample $YUVARG $PROGARG for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ $OUTDIR/${basename}_${samp}_Q95_full.bmp; do runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp @@ -180,7 +188,7 @@ for image in $IMAGES; do # Grayscale transform for xform in hflip vflip transpose transverse rot90 rot180 rot270; do for samp in GRAY 444 422 420; do - runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 -grayscale $YUVARG + runme "$JAVA" $JAVAARGS TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 -grayscale $YUVARG $PROGARG for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \ $OUTDIR/${basename}_${samp}_Q95_full.bmp; do runme cmp -i 54:54 $i $OUTDIR/${basename}_GRAY_${xform}_jpegtran.bmp @@ -193,9 +201,9 @@ for image in $IMAGES; do for xform in hflip vflip transpose transverse rot90 rot180 rot270; do for samp in GRAY 444 422 420; do for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do - scalearg=`echo $scale | sed s@_@/@g` + scalearg=`echo $scale | sed 's/\_/\//g'` runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg - runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG + runme "$JAVA" $JAVAARGS TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG $PROGARG runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp rm $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp done diff --git a/tjexample.c b/tjexample.c new file mode 100644 index 0000000..61200e6 --- /dev/null +++ b/tjexample.c @@ -0,0 +1,395 @@ +/* + * Copyright (C)2011-2012, 2014-2015, 2017 D. R. Commander. + * All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the libjpeg-turbo Project nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This program demonstrates how to compress, decompress, and transform JPEG + * images using the TurboJPEG C API + */ + +#include +#include +#include +#include +#include + + +#ifdef _WIN32 +#define strcasecmp stricmp +#define strncasecmp strnicmp +#endif + +#define _throw(action, message) { \ + printf("ERROR in line %d while %s:\n%s\n", __LINE__, action, message); \ + retval = -1; goto bailout; \ +} + +#define _throwtj(action) _throw(action, tjGetErrorStr2(tjInstance)) + +#define _throwunix(action) _throw(action, strerror(errno)) + +#define DEFAULT_SUBSAMP TJSAMP_444 +#define DEFAULT_QUALITY 95 + + +const char *subsampName[TJ_NUMSAMP] = { + "4:4:4", "4:2:2", "4:2:0", "Grayscale", "4:4:0", "4:1:1" +}; + +const char *colorspaceName[TJ_NUMCS] = { + "RGB", "YCbCr", "GRAY", "CMYK", "YCCK" +}; + +tjscalingfactor *scalingFactors = NULL; +int numScalingFactors = 0; + + +/* DCT filter example. This produces a negative of the image. */ + +int customFilter(short *coeffs, tjregion arrayRegion, tjregion planeRegion, + int componentIndex, int transformIndex, + tjtransform *transform) +{ + int i; + + for (i = 0; i < arrayRegion.w * arrayRegion.h; i++) + coeffs[i] = -coeffs[i]; + + return 0; +} + + +void usage(char *programName) +{ + int i; + + printf("\nUSAGE: %s [options]\n\n", + programName); + + printf("Input and output images can be in Windows BMP or PBMPLUS (PPM/PGM) format. If\n"); + printf("either filename ends in a .jpg extension, then the TurboJPEG API will be used\n"); + printf("to compress or decompress the image.\n\n"); + + printf("Compression Options (used if the output image is a JPEG image)\n"); + printf("--------------------------------------------------------------\n\n"); + + printf("-subsamp <444|422|420|gray> = Apply this level of chrominance subsampling when\n"); + printf(" compressing the output image. The default is to use the same level of\n"); + printf(" subsampling as in the input image, if the input image is also a JPEG\n"); + printf(" image, or to use grayscale if the input image is a grayscale non-JPEG\n"); + printf(" image, or to use %s subsampling otherwise.\n\n", + subsampName[DEFAULT_SUBSAMP]); + + printf("-q <1-100> = Compress the output image with this JPEG quality level\n"); + printf(" (default = %d).\n\n", DEFAULT_QUALITY); + + printf("Decompression Options (used if the input image is a JPEG image)\n"); + printf("---------------------------------------------------------------\n\n"); + + printf("-scale M/N = Scale the input image by a factor of M/N when decompressing it.\n"); + printf("(M/N = "); + for (i = 0; i < numScalingFactors; i++) { + printf("%d/%d", scalingFactors[i].num, scalingFactors[i].denom); + if (numScalingFactors == 2 && i != numScalingFactors - 1) + printf(" or "); + else if (numScalingFactors > 2) { + if (i != numScalingFactors - 1) + printf(", "); + if (i == numScalingFactors - 2) + printf("or "); + } + } + printf(")\n\n"); + + printf("-hflip, -vflip, -transpose, -transverse, -rot90, -rot180, -rot270 =\n"); + printf(" Perform one of these lossless transform operations on the input image\n"); + printf(" prior to decompressing it (these options are mutually exclusive.)\n\n"); + + printf("-grayscale = Perform lossless grayscale conversion on the input image prior\n"); + printf(" to decompressing it (can be combined with the other transform operations\n"); + printf(" above.)\n\n"); + + printf("-crop WxH+X+Y = Perform lossless cropping on the input image prior to\n"); + printf(" decompressing it. X and Y specify the upper left corner of the cropping\n"); + printf(" region, and W and H specify the width and height of the cropping region.\n"); + printf(" X and Y must be evenly divible by the MCU block size (8x8 if the input\n"); + printf(" image was compressed using no subsampling or grayscale, 16x8 if it was\n"); + printf(" compressed using 4:2:2 subsampling, or 16x16 if it was compressed using\n"); + printf(" 4:2:0 subsampling.)\n\n"); + + printf("General Options\n"); + printf("---------------\n\n"); + + printf("-fastupsample = Use the fastest chrominance upsampling algorithm available in\n"); + printf(" the underlying codec.\n\n"); + + printf("-fastdct = Use the fastest DCT/IDCT algorithms available in the underlying\n"); + printf(" codec.\n\n"); + + printf("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the\n"); + printf(" underlying codec.\n\n"); + + exit(1); +} + + +int main(int argc, char **argv) +{ + tjscalingfactor scalingFactor = { 1, 1 }; + int outSubsamp = -1, outQual = -1; + tjtransform xform; + int flags = 0; + int width, height; + char *inFormat, *outFormat; + FILE *jpegFile = NULL; + unsigned char *imgBuf = NULL, *jpegBuf = NULL; + int retval = 0, i, pixelFormat = TJPF_UNKNOWN; + tjhandle tjInstance = NULL; + + if ((scalingFactors = tjGetScalingFactors(&numScalingFactors)) == NULL) + _throwtj("getting scaling factors"); + memset(&xform, 0, sizeof(tjtransform)); + + if (argc < 3) + usage(argv[0]); + + /* Parse arguments. */ + for (i = 3; i < argc; i++) { + if (!strncasecmp(argv[i], "-sc", 3) && i < argc - 1) { + int match = 0, temp1 = 0, temp2 = 0, j; + + if (sscanf(argv[++i], "%d/%d", &temp1, &temp2) < 2) + usage(argv[0]); + for (j = 0; j < numScalingFactors; j++) { + if ((double)temp1 / (double)temp2 == (double)scalingFactors[j].num / + (double)scalingFactors[j].denom) { + scalingFactor = scalingFactors[j]; + match = 1; + break; + } + } + if (match != 1) + usage(argv[0]); + } else if (!strncasecmp(argv[i], "-su", 3) && i < argc - 1) { + i++; + if (!strncasecmp(argv[i], "g", 1)) + outSubsamp = TJSAMP_GRAY; + else if (!strcasecmp(argv[i], "444")) + outSubsamp = TJSAMP_444; + else if (!strcasecmp(argv[i], "422")) + outSubsamp = TJSAMP_422; + else if (!strcasecmp(argv[i], "420")) + outSubsamp = TJSAMP_420; + else + usage(argv[0]); + } else if (!strncasecmp(argv[i], "-q", 2) && i < argc - 1) { + outQual = atoi(argv[++i]); + if (outQual < 1 || outQual > 100) + usage(argv[0]); + } else if (!strncasecmp(argv[i], "-g", 2)) + xform.options |= TJXOPT_GRAY; + else if (!strcasecmp(argv[i], "-hflip")) + xform.op = TJXOP_HFLIP; + else if (!strcasecmp(argv[i], "-vflip")) + xform.op = TJXOP_VFLIP; + else if (!strcasecmp(argv[i], "-transpose")) + xform.op = TJXOP_TRANSPOSE; + else if (!strcasecmp(argv[i], "-transverse")) + xform.op = TJXOP_TRANSVERSE; + else if (!strcasecmp(argv[i], "-rot90")) + xform.op = TJXOP_ROT90; + else if (!strcasecmp(argv[i], "-rot180")) + xform.op = TJXOP_ROT180; + else if (!strcasecmp(argv[i], "-rot270")) + xform.op = TJXOP_ROT270; + else if (!strcasecmp(argv[i], "-custom")) + xform.customFilter = customFilter; + else if (!strncasecmp(argv[i], "-c", 2) && i < argc - 1) { + if (sscanf(argv[++i], "%dx%d+%d+%d", &xform.r.w, &xform.r.h, &xform.r.x, + &xform.r.y) < 4 || + xform.r.x < 0 || xform.r.y < 0 || xform.r.w < 1 || xform.r.h < 1) + usage(argv[0]); + xform.options |= TJXOPT_CROP; + } else if (!strcasecmp(argv[i], "-fastupsample")) { + printf("Using fast upsampling code\n"); + flags |= TJFLAG_FASTUPSAMPLE; + } else if (!strcasecmp(argv[i], "-fastdct")) { + printf("Using fastest DCT/IDCT algorithm\n"); + flags |= TJFLAG_FASTDCT; + } else if (!strcasecmp(argv[i], "-accuratedct")) { + printf("Using most accurate DCT/IDCT algorithm\n"); + flags |= TJFLAG_ACCURATEDCT; + } else usage(argv[0]); + } + + /* Determine input and output image formats based on file extensions. */ + inFormat = strrchr(argv[1], '.'); + outFormat = strrchr(argv[2], '.'); + if (inFormat == NULL || outFormat == NULL || strlen(inFormat) < 2 || + strlen(outFormat) < 2) + usage(argv[0]); + inFormat = &inFormat[1]; + outFormat = &outFormat[1]; + + if (!strcasecmp(inFormat, "jpg")) { + /* Input image is a JPEG image. Decompress and/or transform it. */ + long size; + int inSubsamp, inColorspace; + int doTransform = (xform.op != TJXOP_NONE || xform.options != 0 || + xform.customFilter != NULL); + unsigned long jpegSize; + + /* Read the JPEG file into memory. */ + if ((jpegFile = fopen(argv[1], "rb")) == NULL) + _throwunix("opening input file"); + if (fseek(jpegFile, 0, SEEK_END) < 0 || ((size = ftell(jpegFile)) < 0) || + fseek(jpegFile, 0, SEEK_SET) < 0) + _throwunix("determining input file size"); + if (size == 0) + _throw("determining input file size", "Input file contains no data"); + jpegSize = (unsigned long)size; + if ((jpegBuf = (unsigned char *)tjAlloc(jpegSize)) == NULL) + _throwunix("allocating JPEG buffer"); + if (fread(jpegBuf, jpegSize, 1, jpegFile) < 1) + _throwunix("reading input file"); + fclose(jpegFile); jpegFile = NULL; + + if (doTransform) { + /* Transform it. */ + unsigned char *dstBuf = NULL; /* Dynamically allocate the JPEG buffer */ + unsigned long dstSize = 0; + + if ((tjInstance = tjInitTransform()) == NULL) + _throwtj("initializing transformer"); + xform.options |= TJXOPT_TRIM; + if (tjTransform(tjInstance, jpegBuf, jpegSize, 1, &dstBuf, &dstSize, + &xform, flags) < 0) + _throwtj("transforming input image"); + tjFree(jpegBuf); + jpegBuf = dstBuf; + jpegSize = dstSize; + } else { + if ((tjInstance = tjInitDecompress()) == NULL) + _throwtj("initializing decompressor"); + } + + if (tjDecompressHeader3(tjInstance, jpegBuf, jpegSize, &width, &height, + &inSubsamp, &inColorspace) < 0) + _throwtj("reading JPEG header"); + + printf("%s Image: %d x %d pixels, %s subsampling, %s colorspace\n", + (doTransform ? "Transformed" : "Input"), width, height, + subsampName[inSubsamp], colorspaceName[inColorspace]); + + if (!strcasecmp(outFormat, "jpg") && doTransform && + scalingFactor.num == 1 && scalingFactor.denom == 1 && outSubsamp < 0 && + outQual < 0) { + /* Input image has been transformed, and no re-compression options + have been selected. Write the transformed image to disk and exit. */ + if ((jpegFile = fopen(argv[2], "wb")) == NULL) + _throwunix("opening output file"); + if (fwrite(jpegBuf, jpegSize, 1, jpegFile) < 1) + _throwunix("writing output file"); + fclose(jpegFile); jpegFile = NULL; + goto bailout; + } + + /* Scaling and/or a non-JPEG output image format and/or compression options + have been selected, so we need to decompress the input/transformed + image. */ + width = TJSCALED(width, scalingFactor); + height = TJSCALED(height, scalingFactor); + if (outSubsamp < 0) + outSubsamp = inSubsamp; + + pixelFormat = TJPF_BGRX; + if ((imgBuf = (unsigned char *)tjAlloc(width * height * + tjPixelSize[pixelFormat])) == NULL) + _throwunix("allocating uncompressed image buffer"); + + if (tjDecompress2(tjInstance, jpegBuf, jpegSize, imgBuf, width, 0, height, + pixelFormat, flags) < 0) + _throwtj("decompressing JPEG image"); + tjFree(jpegBuf); jpegBuf = NULL; + tjDestroy(tjInstance); tjInstance = NULL; + } else { + /* Input image is not a JPEG image. Load it into memory. */ + if ((imgBuf = tjLoadImage(argv[1], &width, 1, &height, &pixelFormat, + 0)) == NULL) + _throwtj("loading input image"); + if (outSubsamp < 0) { + if (pixelFormat == TJPF_GRAY) + outSubsamp = TJSAMP_GRAY; + else + outSubsamp = TJSAMP_444; + } + printf("Input Image: %d x %d pixels\n", width, height); + } + + printf("Output Image (%s): %d x %d pixels", outFormat, width, height); + + if (!strcasecmp(outFormat, "jpg")) { + /* Output image format is JPEG. Compress the uncompressed image. */ + unsigned char *jpegBuf = NULL; /* Dynamically allocate the JPEG buffer */ + unsigned long jpegSize = 0; + + if (outQual < 0) + outQual = DEFAULT_QUALITY; + printf(", %s subsampling, quality = %d\n", subsampName[outSubsamp], + outQual); + + if ((tjInstance = tjInitCompress()) == NULL) + _throwtj("initializing compressor"); + if (tjCompress2(tjInstance, imgBuf, width, 0, height, pixelFormat, + &jpegBuf, &jpegSize, outSubsamp, outQual, flags) < 0) + _throwtj("compressing image"); + tjDestroy(tjInstance); tjInstance = NULL; + + /* Write the JPEG image to disk. */ + if ((jpegFile = fopen(argv[2], "wb")) == NULL) + _throwunix("opening output file"); + if (fwrite(jpegBuf, jpegSize, 1, jpegFile) < 1) + _throwunix("writing output file"); + tjDestroy(tjInstance); tjInstance = NULL; + fclose(jpegFile); jpegFile = NULL; + tjFree(jpegBuf); jpegBuf = NULL; + } else { + /* Output image format is not JPEG. Save the uncompressed image + directly to disk. */ + printf("\n"); + if (tjSaveImage(argv[2], imgBuf, width, 0, height, pixelFormat, 0) < 0) + _throwtj("saving output image"); + } + +bailout: + if (imgBuf) tjFree(imgBuf); + if (tjInstance) tjDestroy(tjInstance); + if (jpegBuf) tjFree(jpegBuf); + if (jpegFile) fclose(jpegFile); + return retval; +} diff --git a/tjexampletest.in b/tjexampletest.in index 4cb9e9d..0d3047e 100755 --- a/tjexampletest.in +++ b/tjexampletest.in @@ -20,10 +20,9 @@ runme() } IMAGES="vgl_5674_0098.bmp vgl_6434_0018a.bmp vgl_6548_0026a.bmp nightshot_iso_100.bmp" -IMGDIR=@srcdir@/testimages +IMGDIR=@CMAKE_CURRENT_SOURCE_DIR@/testimages OUTDIR=`mktemp -d /tmp/__tjexampletest_output.XXXXXX` -EXEDIR=. -JAVA="@JAVA@ -cp java/turbojpeg.jar -Djava.library.path=.libs" +EXEDIR=@CMAKE_CURRENT_BINARY_DIR@ if [ -d $OUTDIR ]; then rm -rf $OUTDIR @@ -58,7 +57,7 @@ for image in $IMAGES; do # Compression for dct in fast accurate; do for samp in GRAY 420 422 444; do - runme $JAVA TJExample $OUTDIR/$image $OUTDIR/${basename}_${samp}_${dct}.jpg -q 95 -samp ${samp} -${dct}dct + runme $EXEDIR/tjexample $OUTDIR/$image $OUTDIR/${basename}_${samp}_${dct}.jpg -q 95 -subsamp ${samp} -${dct}dct runme cmp $OUTDIR/${basename}_${samp}_${dct}.jpg $OUTDIR/${basename}_${samp}_${dct}_cjpeg.jpg done done @@ -72,12 +71,12 @@ for image in $IMAGES; do dctarg= fi for samp in GRAY 420 422 444; do - runme $JAVA TJExample $OUTDIR/${basename}_${samp}_${srcdct}.jpg $OUTDIR/${basename}_${samp}_${dct}.bmp ${dctarg} + runme $EXEDIR/tjexample $OUTDIR/${basename}_${samp}_${srcdct}.jpg $OUTDIR/${basename}_${samp}_${dct}.bmp ${dctarg} runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${dct}.bmp $OUTDIR/${basename}_${samp}_${dct}_djpeg.bmp rm $OUTDIR/${basename}_${samp}_${dct}.bmp done for samp in 420 422; do - runme $JAVA TJExample $OUTDIR/${basename}_${samp}_${srcdct}.jpg $OUTDIR/${basename}_${samp}_${dct}_nosmooth.bmp -fastupsample ${dctarg} + runme $EXEDIR/tjexample $OUTDIR/${basename}_${samp}_${srcdct}.jpg $OUTDIR/${basename}_${samp}_${dct}_nosmooth.bmp -fastupsample ${dctarg} runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${dct}_nosmooth.bmp $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.bmp rm $OUTDIR/${basename}_${samp}_${dct}_nosmooth.bmp done @@ -85,10 +84,10 @@ for image in $IMAGES; do # Scaled decompression for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do - scalearg=`echo $scale | sed s@_@/@g` + scalearg=`echo $scale | sed 's/\_/\//g'` for samp in GRAY 420 422 444; do runme $EXEDIR/djpeg -rgb -bmp -scale ${scalearg} -outfile $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg - runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${scale}.bmp -scale ${scalearg} + runme $EXEDIR/tjexample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${scale}.bmp -scale ${scalearg} runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${scale}.bmp $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp rm $OUTDIR/${basename}_${samp}_${scale}.bmp done @@ -106,16 +105,16 @@ for image in $IMAGES; do done for xform in hflip vflip transpose transverse rot90 rot180 rot270; do for samp in GRAY 420 422 444; do - runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.jpg -$xform -crop 16,16,70x60 + runme $EXEDIR/tjexample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.jpg -$xform -crop 70x60+16+16 runme cmp $OUTDIR/${basename}_${samp}_${xform}.jpg $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg runme $EXEDIR/djpeg -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg - runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -crop 16,16,70x60 + runme $EXEDIR/tjexample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -crop 70x60+16+16 runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp rm $OUTDIR/${basename}_${samp}_${xform}.bmp done for samp in 420 422; do runme $EXEDIR/djpeg -nosmooth -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg - runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -crop 16,16,70x60 -fastupsample + runme $EXEDIR/tjexample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -crop 70x60+16+16 -fastupsample runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp rm $OUTDIR/${basename}_${samp}_${xform}.bmp done @@ -124,9 +123,9 @@ for image in $IMAGES; do # Grayscale transform for xform in hflip vflip transpose transverse rot90 rot180 rot270; do for samp in GRAY 444 422 420; do - runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.jpg -$xform -grayscale -crop 16,16,70x60 + runme $EXEDIR/tjexample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.jpg -$xform -grayscale -crop 70x60+16+16 runme cmp $OUTDIR/${basename}_${samp}_${xform}.jpg $OUTDIR/${basename}_GRAY_${xform}_jpegtran.jpg - runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -grayscale -crop 16,16,70x60 + runme $EXEDIR/tjexample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -grayscale -crop 70x60+16+16 runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}.bmp $OUTDIR/${basename}_GRAY_${xform}_jpegtran.bmp rm $OUTDIR/${basename}_${samp}_${xform}.bmp done @@ -136,9 +135,9 @@ for image in $IMAGES; do for xform in hflip vflip transpose transverse rot90 rot180 rot270; do for samp in GRAY 444 422 420; do for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do - scalearg=`echo $scale | sed s@_@/@g` + scalearg=`echo $scale | sed 's/\_/\//g'` runme $EXEDIR/djpeg -rgb -bmp -scale ${scalearg} -outfile $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg - runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp -$xform -scale ${scalearg} -crop 16,16,70x60 + runme $EXEDIR/tjexample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp -$xform -scale ${scalearg} -crop 70x60+16+16 runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp rm $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp done diff --git a/tjexampletest.java.in b/tjexampletest.java.in new file mode 100755 index 0000000..d4b63bc --- /dev/null +++ b/tjexampletest.java.in @@ -0,0 +1,151 @@ +#!/bin/bash + +set -u +set -e +trap onexit INT +trap onexit TERM +trap onexit EXIT + +onexit() +{ + if [ -d $OUTDIR ]; then + rm -rf $OUTDIR + fi +} + +runme() +{ + echo \*\*\* $* + "$@" +} + +IMAGES="vgl_5674_0098.bmp vgl_6434_0018a.bmp vgl_6548_0026a.bmp nightshot_iso_100.bmp" +IMGDIR=@CMAKE_CURRENT_SOURCE_DIR@/testimages +OUTDIR=`mktemp -d /tmp/__tjexampletest_java_output.XXXXXX` +EXEDIR=@CMAKE_CURRENT_BINARY_DIR@ +JAVA="@Java_JAVA_EXECUTABLE@" +JAVAARGS="-cp $EXEDIR/java/turbojpeg.jar -Djava.library.path=$EXEDIR" + +if [ -d $OUTDIR ]; then + rm -rf $OUTDIR +fi +mkdir -p $OUTDIR + +exec >$EXEDIR/tjexampletest-java.log + +for image in $IMAGES; do + + cp $IMGDIR/$image $OUTDIR + basename=`basename $image .bmp` + runme $EXEDIR/cjpeg -quality 95 -dct fast -grayscale -outfile $OUTDIR/${basename}_GRAY_fast_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x2 -outfile $OUTDIR/${basename}_420_fast_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x1 -outfile $OUTDIR/${basename}_422_fast_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 1x1 -outfile $OUTDIR/${basename}_444_fast_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct int -grayscale -outfile $OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x2 -outfile $OUTDIR/${basename}_420_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x1 -outfile $OUTDIR/${basename}_422_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp + runme $EXEDIR/cjpeg -quality 95 -dct int -sample 1x1 -outfile $OUTDIR/${basename}_444_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp + for samp in GRAY 420 422 444; do + runme $EXEDIR/djpeg -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_default_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct fast -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_fast_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct int -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_accurate_djpeg.bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg + done + for samp in 420 422; do + runme $EXEDIR/djpeg -nosmooth -bmp -outfile $OUTDIR/${basename}_${samp}_default_nosmooth_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct fast -nosmooth -bmp -outfile $OUTDIR/${basename}_${samp}_fast_nosmooth_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme $EXEDIR/djpeg -dct int -nosmooth -bmp -outfile $OUTDIR/${basename}_${samp}_accurate_nosmooth_djpeg.bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg + done + + # Compression + for dct in fast accurate; do + for samp in GRAY 420 422 444; do + runme "$JAVA" $JAVAARGS TJExample $OUTDIR/$image $OUTDIR/${basename}_${samp}_${dct}.jpg -q 95 -subsamp ${samp} -${dct}dct + runme cmp $OUTDIR/${basename}_${samp}_${dct}.jpg $OUTDIR/${basename}_${samp}_${dct}_cjpeg.jpg + done + done + + # Decompression + for dct in fast accurate default; do + srcdct=${dct} + dctarg=-${dct}dct + if [ "${dct}" = "default" ]; then + srcdct=fast + dctarg= + fi + for samp in GRAY 420 422 444; do + runme "$JAVA" $JAVAARGS TJExample $OUTDIR/${basename}_${samp}_${srcdct}.jpg $OUTDIR/${basename}_${samp}_${dct}.bmp ${dctarg} + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${dct}.bmp $OUTDIR/${basename}_${samp}_${dct}_djpeg.bmp + rm $OUTDIR/${basename}_${samp}_${dct}.bmp + done + for samp in 420 422; do + runme "$JAVA" $JAVAARGS TJExample $OUTDIR/${basename}_${samp}_${srcdct}.jpg $OUTDIR/${basename}_${samp}_${dct}_nosmooth.bmp -fastupsample ${dctarg} + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${dct}_nosmooth.bmp $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.bmp + rm $OUTDIR/${basename}_${samp}_${dct}_nosmooth.bmp + done + done + + # Scaled decompression + for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do + scalearg=`echo $scale | sed 's/\_/\//g'` + for samp in GRAY 420 422 444; do + runme $EXEDIR/djpeg -rgb -bmp -scale ${scalearg} -outfile $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg + runme "$JAVA" $JAVAARGS TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${scale}.bmp -scale ${scalearg} + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${scale}.bmp $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp + rm $OUTDIR/${basename}_${samp}_${scale}.bmp + done + done + + # Transforms + for samp in GRAY 420 422 444; do + runme $EXEDIR/jpegtran -crop 70x60+16+16 -flip horizontal -trim -outfile $OUTDIR/${basename}_${samp}_hflip_jpegtran.jpg $OUTDIR/${basename}_${samp}_fast.jpg + runme $EXEDIR/jpegtran -crop 70x60+16+16 -flip vertical -trim -outfile $OUTDIR/${basename}_${samp}_vflip_jpegtran.jpg $OUTDIR/${basename}_${samp}_fast.jpg + runme $EXEDIR/jpegtran -crop 70x60+16+16 -transpose -trim -outfile $OUTDIR/${basename}_${samp}_transpose_jpegtran.jpg $OUTDIR/${basename}_${samp}_fast.jpg + runme $EXEDIR/jpegtran -crop 70x60+16+16 -transverse -trim -outfile $OUTDIR/${basename}_${samp}_transverse_jpegtran.jpg $OUTDIR/${basename}_${samp}_fast.jpg + runme $EXEDIR/jpegtran -crop 70x60+16+16 -rotate 90 -trim -outfile $OUTDIR/${basename}_${samp}_rot90_jpegtran.jpg $OUTDIR/${basename}_${samp}_fast.jpg + runme $EXEDIR/jpegtran -crop 70x60+16+16 -rotate 180 -trim -outfile $OUTDIR/${basename}_${samp}_rot180_jpegtran.jpg $OUTDIR/${basename}_${samp}_fast.jpg + runme $EXEDIR/jpegtran -crop 70x60+16+16 -rotate 270 -trim -outfile $OUTDIR/${basename}_${samp}_rot270_jpegtran.jpg $OUTDIR/${basename}_${samp}_fast.jpg + done + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 420 422 444; do + runme "$JAVA" $JAVAARGS TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.jpg -$xform -crop 70x60+16+16 + runme cmp $OUTDIR/${basename}_${samp}_${xform}.jpg $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + runme $EXEDIR/djpeg -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + runme "$JAVA" $JAVAARGS TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -crop 70x60+16+16 + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp + rm $OUTDIR/${basename}_${samp}_${xform}.bmp + done + for samp in 420 422; do + runme $EXEDIR/djpeg -nosmooth -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + runme "$JAVA" $JAVAARGS TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -crop 70x60+16+16 -fastupsample + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp + rm $OUTDIR/${basename}_${samp}_${xform}.bmp + done + done + + # Grayscale transform + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444 422 420; do + runme "$JAVA" $JAVAARGS TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.jpg -$xform -grayscale -crop 70x60+16+16 + runme cmp $OUTDIR/${basename}_${samp}_${xform}.jpg $OUTDIR/${basename}_GRAY_${xform}_jpegtran.jpg + runme "$JAVA" $JAVAARGS TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -grayscale -crop 70x60+16+16 + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}.bmp $OUTDIR/${basename}_GRAY_${xform}_jpegtran.bmp + rm $OUTDIR/${basename}_${samp}_${xform}.bmp + done + done + + # Transforms with scaling + for xform in hflip vflip transpose transverse rot90 rot180 rot270; do + for samp in GRAY 444 422 420; do + for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do + scalearg=`echo $scale | sed 's/\_/\//g'` + runme $EXEDIR/djpeg -rgb -bmp -scale ${scalearg} -outfile $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg + runme "$JAVA" $JAVAARGS TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp -$xform -scale ${scalearg} -crop 70x60+16+16 + runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp + rm $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp + done + done + done + +done + +echo SUCCESS! diff --git a/tjunittest.c b/tjunittest.c index f793796..ae72e83 100644 --- a/tjunittest.c +++ b/tjunittest.c @@ -1,5 +1,5 @@ /* - * Copyright (C)2009-2014, 2017 D. R. Commander. All Rights Reserved. + * Copyright (C)2009-2014, 2017-2018 D. R. Commander. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,701 +34,859 @@ #include #include #include -#include "./tjutil.h" -#include "./turbojpeg.h" +#include "tjutil.h" +#include "turbojpeg.h" +#include "md5/md5.h" +#include "cmyk.h" #ifdef _WIN32 - #include - #define random() rand() +#include +#define random() rand() +#else +#include #endif void usage(char *progName) { - printf("\nUSAGE: %s [options]\n\n", progName); - printf("Options:\n"); - printf("-yuv = test YUV encoding/decoding support\n"); - printf("-noyuvpad = do not pad each line of each Y, U, and V plane to the nearest\n"); - printf(" 4-byte boundary\n"); - printf("-alloc = test automatic buffer allocation\n\n"); - exit(1); + printf("\nUSAGE: %s [options]\n\n", progName); + printf("Options:\n"); + printf("-yuv = test YUV encoding/decoding support\n"); + printf("-noyuvpad = do not pad each line of each Y, U, and V plane to the nearest\n"); + printf(" 4-byte boundary\n"); + printf("-alloc = test automatic buffer allocation\n"); + printf("-bmp = tjLoadImage()/tjSaveImage() unit test\n\n"); + exit(1); } -#define _throwtj() {printf("TurboJPEG ERROR:\n%s\n", tjGetErrorStr()); \ - bailout();} -#define _tj(f) {if((f)==-1) _throwtj();} -#define _throw(m) {printf("ERROR: %s\n", m); bailout();} +#define _throwtj() { \ + printf("TurboJPEG ERROR:\n%s\n", tjGetErrorStr()); \ + bailout() \ +} +#define _tj(f) { if ((f) == -1) _throwtj(); } +#define _throw(m) { printf("ERROR: %s\n", m); bailout() } +#define _throwmd5(filename, md5sum, ref) { \ + printf("\n%s has an MD5 sum of %s.\n Should be %s.\n", filename, md5sum, \ + ref); \ + bailout() \ +} -const char *subNameLong[TJ_NUMSAMP]= -{ - "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1" +const char *subNameLong[TJ_NUMSAMP] = { + "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1" }; -const char *subName[TJ_NUMSAMP]={"444", "422", "420", "GRAY", "440", "411"}; - -const char *pixFormatStr[TJ_NUMPF]= -{ - "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "Grayscale", - "RGBA", "BGRA", "ABGR", "ARGB", "CMYK" +const char *subName[TJ_NUMSAMP] = { + "444", "422", "420", "GRAY", "440", "411" }; -const int alphaOffset[TJ_NUMPF] = {-1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1}; +const char *pixFormatStr[TJ_NUMPF] = { + "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "Grayscale", + "RGBA", "BGRA", "ABGR", "ARGB", "CMYK" +}; -const int _3byteFormats[]={TJPF_RGB, TJPF_BGR}; -const int _4byteFormats[]={TJPF_RGBX, TJPF_BGRX, TJPF_XBGR, TJPF_XRGB, - TJPF_CMYK}; -const int _onlyGray[]={TJPF_GRAY}; -const int _onlyRGB[]={TJPF_RGB}; +const int _3byteFormats[] = { TJPF_RGB, TJPF_BGR }; +const int _4byteFormats[] = { + TJPF_RGBX, TJPF_BGRX, TJPF_XBGR, TJPF_XRGB, TJPF_CMYK +}; +const int _onlyGray[] = { TJPF_GRAY }; +const int _onlyRGB[] = { TJPF_RGB }; -int doyuv=0, alloc=0, pad=4; +int doYUV = 0, alloc = 0, pad = 4; -int exitStatus=0; -#define bailout() {exitStatus=-1; goto bailout;} +int exitStatus = 0; +#define bailout() { exitStatus = -1; goto bailout; } void initBuf(unsigned char *buf, int w, int h, int pf, int flags) { - int roffset=tjRedOffset[pf]; - int goffset=tjGreenOffset[pf]; - int boffset=tjBlueOffset[pf]; - int ps=tjPixelSize[pf]; - int index, row, col, halfway=16; - - if(pf==TJPF_GRAY) - { - memset(buf, 0, w*h*ps); - for(row=0; row=halfway) buf[index*ps+3]=0; - } - else - { - buf[index*ps+2]=0; - if(row=halfway) buf[index*ps+goffset]=255; - } - } - } - } + int roffset = tjRedOffset[pf]; + int goffset = tjGreenOffset[pf]; + int boffset = tjBlueOffset[pf]; + int ps = tjPixelSize[pf]; + int index, row, col, halfway = 16; + + if (pf == TJPF_GRAY) { + memset(buf, 0, w * h * ps); + for (row = 0; row < h; row++) { + for (col = 0; col < w; col++) { + if (flags & TJFLAG_BOTTOMUP) index = (h - row - 1) * w + col; + else index = row * w + col; + if (((row / 8) + (col / 8)) % 2 == 0) + buf[index] = (row < halfway) ? 255 : 0; + else buf[index] = (row < halfway) ? 76 : 226; + } + } + } else if (pf == TJPF_CMYK) { + memset(buf, 255, w * h * ps); + for (row = 0; row < h; row++) { + for (col = 0; col < w; col++) { + if (flags & TJFLAG_BOTTOMUP) index = (h - row - 1) * w + col; + else index = row * w + col; + if (((row / 8) + (col / 8)) % 2 == 0) { + if (row >= halfway) buf[index * ps + 3] = 0; + } else { + buf[index * ps + 2] = 0; + if (row < halfway) buf[index * ps + 1] = 0; + } + } + } + } else { + memset(buf, 0, w * h * ps); + for (row = 0; row < h; row++) { + for (col = 0; col < w; col++) { + if (flags & TJFLAG_BOTTOMUP) index = (h - row - 1) * w + col; + else index = row * w + col; + if (((row / 8) + (col / 8)) % 2 == 0) { + if (row < halfway) { + buf[index * ps + roffset] = 255; + buf[index * ps + goffset] = 255; + buf[index * ps + boffset] = 255; + } + } else { + buf[index * ps + roffset] = 255; + if (row >= halfway) buf[index * ps + goffset] = 255; + } + } + } + } } #define checkval(v, cv) { \ - if(vcv+1) { \ - printf("\nComp. %s at %d,%d should be %d, not %d\n", \ - #v, row, col, cv, v); \ - retval=0; exitStatus=-1; goto bailout; \ - }} + if (v < cv - 1 || v > cv + 1) { \ + printf("\nComp. %s at %d,%d should be %d, not %d\n", #v, row, col, cv, \ + v); \ + retval = 0; exitStatus = -1; goto bailout; \ + } \ +} #define checkval0(v) { \ - if(v>1) { \ - printf("\nComp. %s at %d,%d should be 0, not %d\n", #v, row, col, v); \ - retval=0; exitStatus=-1; goto bailout; \ - }} + if (v > 1) { \ + printf("\nComp. %s at %d,%d should be 0, not %d\n", #v, row, col, v); \ + retval = 0; exitStatus = -1; goto bailout; \ + } \ +} #define checkval255(v) { \ - if(v<254) { \ - printf("\nComp. %s at %d,%d should be 255, not %d\n", #v, row, col, v); \ - retval=0; exitStatus=-1; goto bailout; \ - }} + if (v < 254) { \ + printf("\nComp. %s at %d,%d should be 255, not %d\n", #v, row, col, v); \ + retval = 0; exitStatus = -1; goto bailout; \ + } \ +} int checkBuf(unsigned char *buf, int w, int h, int pf, int subsamp, - tjscalingfactor sf, int flags) + tjscalingfactor sf, int flags) { - int roffset=tjRedOffset[pf]; - int goffset=tjGreenOffset[pf]; - int boffset=tjBlueOffset[pf]; - int aoffset=alphaOffset[pf]; - int ps=tjPixelSize[pf]; - int index, row, col, retval=1; - int halfway=16*sf.num/sf.denom; - int blocksize=8*sf.num/sf.denom; - - if(pf==TJPF_CMYK) - { - for(row=0; row=0? buf[index*ps+aoffset]:0xFF; - if(((row/blocksize)+(col/blocksize))%2==0) - { - if(row= 0 ? buf[index * ps + aoffset] : 0xFF; + if (((row / blocksize) + (col / blocksize)) % 2 == 0) { + if (row < halfway) { + checkval255(r); checkval255(g); checkval255(b); + } else { + checkval0(r); checkval0(g); checkval0(b); + } + } else { + if (subsamp == TJSAMP_GRAY) { + if (row < halfway) { + checkval(r, 76); checkval(g, 76); checkval(b, 76); + } else { + checkval(r, 226); checkval(g, 226); checkval(b, 226); + } + } else { + if (row < halfway) { + checkval255(r); checkval0(g); checkval0(b); + } else { + checkval255(r); checkval255(g); checkval0(b); + } + } + } + checkval255(a); + } + } + +bailout: + if (retval == 0) { + for (row = 0; row < h; row++) { + for (col = 0; col < w; col++) { + if (pf == TJPF_CMYK) + printf("%.3d/%.3d/%.3d/%.3d ", buf[(row * w + col) * ps], + buf[(row * w + col) * ps + 1], buf[(row * w + col) * ps + 2], + buf[(row * w + col) * ps + 3]); + else + printf("%.3d/%.3d/%.3d ", buf[(row * w + col) * ps + roffset], + buf[(row * w + col) * ps + goffset], + buf[(row * w + col) * ps + boffset]); + } + printf("\n"); + } + } + return retval; } -#define PAD(v, p) ((v+(p)-1)&(~((p)-1))) +#define PAD(v, p) ((v + (p) - 1) & (~((p) - 1))) int checkBufYUV(unsigned char *buf, int w, int h, int subsamp, - tjscalingfactor sf) + tjscalingfactor sf) { - int row, col; - int hsf=tjMCUWidth[subsamp]/8, vsf=tjMCUHeight[subsamp]/8; - int pw=PAD(w, hsf), ph=PAD(h, vsf); - int cw=pw/hsf, ch=ph/vsf; - int ypitch=PAD(pw, pad), uvpitch=PAD(cw, pad); - int retval=1; - int halfway=16*sf.num/sf.denom; - int blocksize=8*sf.num/sf.denom; - - for(row=0; row0) memset(*dstBuf, 0, *dstSize); - - - if(!alloc) flags|=TJFLAG_NOREALLOC; - if(doyuv) - { - unsigned long yuvSize=tjBufSizeYUV2(w, pad, h, subsamp); - tjscalingfactor sf={1, 1}; - tjhandle handle2=tjInitCompress(); - if(!handle2) _throwtj(); - - if((yuvBuf=(unsigned char *)malloc(yuvSize))==NULL) - _throw("Memory allocation failure"); - memset(yuvBuf, 0, yuvSize); - - printf("%s %s -> YUV %s ... ", pfStr, buStrLong, subNameLong[subsamp]); - _tj(tjEncodeYUV3(handle2, srcBuf, w, 0, h, pf, yuvBuf, pad, subsamp, - flags)); - tjDestroy(handle2); - if(checkBufYUV(yuvBuf, w, h, subsamp, sf)) printf("Passed.\n"); - else printf("FAILED!\n"); - - printf("YUV %s %s -> JPEG Q%d ... ", subNameLong[subsamp], buStrLong, - jpegQual); - _tj(tjCompressFromYUV(handle, yuvBuf, w, pad, h, subsamp, dstBuf, - dstSize, jpegQual, flags)); - } - else - { - printf("%s %s -> %s Q%d ... ", pfStr, buStrLong, subNameLong[subsamp], - jpegQual); - _tj(tjCompress2(handle, srcBuf, w, 0, h, pf, dstBuf, dstSize, subsamp, - jpegQual, flags)); - } - - snprintf(tempStr, 1024, "%s_enc_%s_%s_%s_Q%d.jpg", basename, pfStr, buStr, - subName[subsamp], jpegQual); - writeJPEG(*dstBuf, *dstSize, tempStr); - printf("Done.\n Result in %s\n", tempStr); - - bailout: - if(yuvBuf) free(yuvBuf); - if(srcBuf) free(srcBuf); + char tempStr[1024]; + unsigned char *srcBuf = NULL, *yuvBuf = NULL; + const char *pfStr = pixFormatStr[pf]; + const char *buStrLong = + (flags & TJFLAG_BOTTOMUP) ? "Bottom-Up" : "Top-Down "; + const char *buStr = (flags & TJFLAG_BOTTOMUP) ? "BU" : "TD"; + + if ((srcBuf = (unsigned char *)malloc(w * h * tjPixelSize[pf])) == NULL) + _throw("Memory allocation failure"); + initBuf(srcBuf, w, h, pf, flags); + + if (*dstBuf && *dstSize > 0) memset(*dstBuf, 0, *dstSize); + + if (!alloc) flags |= TJFLAG_NOREALLOC; + if (doYUV) { + unsigned long yuvSize = tjBufSizeYUV2(w, pad, h, subsamp); + tjscalingfactor sf = { 1, 1 }; + tjhandle handle2 = tjInitCompress(); + + if (!handle2) _throwtj(); + + if ((yuvBuf = (unsigned char *)malloc(yuvSize)) == NULL) + _throw("Memory allocation failure"); + memset(yuvBuf, 0, yuvSize); + + printf("%s %s -> YUV %s ... ", pfStr, buStrLong, subNameLong[subsamp]); + _tj(tjEncodeYUV3(handle2, srcBuf, w, 0, h, pf, yuvBuf, pad, subsamp, + flags)); + tjDestroy(handle2); + if (checkBufYUV(yuvBuf, w, h, subsamp, sf)) printf("Passed.\n"); + else printf("FAILED!\n"); + + printf("YUV %s %s -> JPEG Q%d ... ", subNameLong[subsamp], buStrLong, + jpegQual); + _tj(tjCompressFromYUV(handle, yuvBuf, w, pad, h, subsamp, dstBuf, dstSize, + jpegQual, flags)); + } else { + printf("%s %s -> %s Q%d ... ", pfStr, buStrLong, subNameLong[subsamp], + jpegQual); + _tj(tjCompress2(handle, srcBuf, w, 0, h, pf, dstBuf, dstSize, subsamp, + jpegQual, flags)); + } + + snprintf(tempStr, 1024, "%s_enc_%s_%s_%s_Q%d.jpg", basename, pfStr, buStr, + subName[subsamp], jpegQual); + writeJPEG(*dstBuf, *dstSize, tempStr); + printf("Done.\n Result in %s\n", tempStr); + +bailout: + if (yuvBuf) free(yuvBuf); + if (srcBuf) free(srcBuf); } void _decompTest(tjhandle handle, unsigned char *jpegBuf, - unsigned long jpegSize, int w, int h, int pf, char *basename, int subsamp, - int flags, tjscalingfactor sf) + unsigned long jpegSize, int w, int h, int pf, char *basename, + int subsamp, int flags, tjscalingfactor sf) { - unsigned char *dstBuf=NULL, *yuvBuf=NULL; - int _hdrw=0, _hdrh=0, _hdrsubsamp=-1; - int scaledWidth=TJSCALED(w, sf); - int scaledHeight=TJSCALED(h, sf); - unsigned long dstSize=0; - - _tj(tjDecompressHeader2(handle, jpegBuf, jpegSize, &_hdrw, &_hdrh, - &_hdrsubsamp)); - if(_hdrw!=w || _hdrh!=h || _hdrsubsamp!=subsamp) - _throw("Incorrect JPEG header"); - - dstSize=scaledWidth*scaledHeight*tjPixelSize[pf]; - if((dstBuf=(unsigned char *)malloc(dstSize))==NULL) - _throw("Memory allocation failure"); - memset(dstBuf, 0, dstSize); - - if(doyuv) - { - unsigned long yuvSize=tjBufSizeYUV2(scaledWidth, pad, scaledHeight, - subsamp); - tjhandle handle2=tjInitDecompress(); - if(!handle2) _throwtj(); - - if((yuvBuf=(unsigned char *)malloc(yuvSize))==NULL) - _throw("Memory allocation failure"); - memset(yuvBuf, 0, yuvSize); - - printf("JPEG -> YUV %s ", subNameLong[subsamp]); - if(sf.num!=1 || sf.denom!=1) - printf("%d/%d ... ", sf.num, sf.denom); - else printf("... "); - _tj(tjDecompressToYUV2(handle, jpegBuf, jpegSize, yuvBuf, scaledWidth, - pad, scaledHeight, flags)); - if(checkBufYUV(yuvBuf, scaledWidth, scaledHeight, subsamp, sf)) - printf("Passed.\n"); - else printf("FAILED!\n"); - - printf("YUV %s -> %s %s ... ", subNameLong[subsamp], pixFormatStr[pf], - (flags&TJFLAG_BOTTOMUP)? "Bottom-Up":"Top-Down "); - _tj(tjDecodeYUV(handle2, yuvBuf, pad, subsamp, dstBuf, scaledWidth, 0, - scaledHeight, pf, flags)); - tjDestroy(handle2); - } - else - { - printf("JPEG -> %s %s ", pixFormatStr[pf], - (flags&TJFLAG_BOTTOMUP)? "Bottom-Up":"Top-Down "); - if(sf.num!=1 || sf.denom!=1) - printf("%d/%d ... ", sf.num, sf.denom); - else printf("... "); - _tj(tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, scaledWidth, 0, - scaledHeight, pf, flags)); - } - - if(checkBuf(dstBuf, scaledWidth, scaledHeight, pf, subsamp, sf, flags)) - printf("Passed."); - else printf("FAILED!"); - printf("\n"); - - bailout: - if(yuvBuf) free(yuvBuf); - if(dstBuf) free(dstBuf); + unsigned char *dstBuf = NULL, *yuvBuf = NULL; + int _hdrw = 0, _hdrh = 0, _hdrsubsamp = -1; + int scaledWidth = TJSCALED(w, sf); + int scaledHeight = TJSCALED(h, sf); + unsigned long dstSize = 0; + + _tj(tjDecompressHeader2(handle, jpegBuf, jpegSize, &_hdrw, &_hdrh, + &_hdrsubsamp)); + if (_hdrw != w || _hdrh != h || _hdrsubsamp != subsamp) + _throw("Incorrect JPEG header"); + + dstSize = scaledWidth * scaledHeight * tjPixelSize[pf]; + if ((dstBuf = (unsigned char *)malloc(dstSize)) == NULL) + _throw("Memory allocation failure"); + memset(dstBuf, 0, dstSize); + + if (doYUV) { + unsigned long yuvSize = tjBufSizeYUV2(scaledWidth, pad, scaledHeight, + subsamp); + tjhandle handle2 = tjInitDecompress(); + + if (!handle2) _throwtj(); + + if ((yuvBuf = (unsigned char *)malloc(yuvSize)) == NULL) + _throw("Memory allocation failure"); + memset(yuvBuf, 0, yuvSize); + + printf("JPEG -> YUV %s ", subNameLong[subsamp]); + if (sf.num != 1 || sf.denom != 1) + printf("%d/%d ... ", sf.num, sf.denom); + else printf("... "); + _tj(tjDecompressToYUV2(handle, jpegBuf, jpegSize, yuvBuf, scaledWidth, pad, + scaledHeight, flags)); + if (checkBufYUV(yuvBuf, scaledWidth, scaledHeight, subsamp, sf)) + printf("Passed.\n"); + else printf("FAILED!\n"); + + printf("YUV %s -> %s %s ... ", subNameLong[subsamp], pixFormatStr[pf], + (flags & TJFLAG_BOTTOMUP) ? "Bottom-Up" : "Top-Down "); + _tj(tjDecodeYUV(handle2, yuvBuf, pad, subsamp, dstBuf, scaledWidth, 0, + scaledHeight, pf, flags)); + tjDestroy(handle2); + } else { + printf("JPEG -> %s %s ", pixFormatStr[pf], + (flags & TJFLAG_BOTTOMUP) ? "Bottom-Up" : "Top-Down "); + if (sf.num != 1 || sf.denom != 1) + printf("%d/%d ... ", sf.num, sf.denom); + else printf("... "); + _tj(tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, scaledWidth, 0, + scaledHeight, pf, flags)); + } + + if (checkBuf(dstBuf, scaledWidth, scaledHeight, pf, subsamp, sf, flags)) + printf("Passed."); + else printf("FAILED!"); + printf("\n"); + +bailout: + if (yuvBuf) free(yuvBuf); + if (dstBuf) free(dstBuf); } void decompTest(tjhandle handle, unsigned char *jpegBuf, - unsigned long jpegSize, int w, int h, int pf, char *basename, int subsamp, - int flags) + unsigned long jpegSize, int w, int h, int pf, char *basename, + int subsamp, int flags) { - int i, n=0; - tjscalingfactor *sf=tjGetScalingFactors(&n); - if(!sf || !n) _throwtj(); - - for(i=0; i=TJPF_RGBX && pf<=TJPF_XRGB) - { - printf("\n"); - decompTest(dhandle, dstBuf, size, w, h, pf+(TJPF_RGBA-TJPF_RGBX), - basename, subsamp, flags); - } - printf("\n"); - } - } - printf("--------------------\n\n"); - - bailout: - if(chandle) tjDestroy(chandle); - if(dhandle) tjDestroy(dhandle); - - if(dstBuf) tjFree(dstBuf); + tjhandle chandle = NULL, dhandle = NULL; + unsigned char *dstBuf = NULL; + unsigned long size = 0; + int pfi, pf, i; + + if (!alloc) + size = tjBufSize(w, h, subsamp); + if (size != 0) + if ((dstBuf = (unsigned char *)tjAlloc(size)) == NULL) + _throw("Memory allocation failure."); + + if ((chandle = tjInitCompress()) == NULL || + (dhandle = tjInitDecompress()) == NULL) + _throwtj(); + + for (pfi = 0; pfi < nformats; pfi++) { + for (i = 0; i < 2; i++) { + int flags = 0; + + if (subsamp == TJSAMP_422 || subsamp == TJSAMP_420 || + subsamp == TJSAMP_440 || subsamp == TJSAMP_411) + flags |= TJFLAG_FASTUPSAMPLE; + if (i == 1) flags |= TJFLAG_BOTTOMUP; + pf = formats[pfi]; + compTest(chandle, &dstBuf, &size, w, h, pf, basename, subsamp, 100, + flags); + decompTest(dhandle, dstBuf, size, w, h, pf, basename, subsamp, flags); + if (pf >= TJPF_RGBX && pf <= TJPF_XRGB) { + printf("\n"); + decompTest(dhandle, dstBuf, size, w, h, pf + (TJPF_RGBA - TJPF_RGBX), + basename, subsamp, flags); + } + printf("\n"); + } + } + printf("--------------------\n\n"); + +bailout: + if (chandle) tjDestroy(chandle); + if (dhandle) tjDestroy(dhandle); + if (dstBuf) tjFree(dstBuf); } void bufSizeTest(void) { - int w, h, i, subsamp; - unsigned char *srcBuf=NULL, *dstBuf=NULL; - tjhandle handle=NULL; - unsigned long dstSize=0; - - if((handle=tjInitCompress())==NULL) _throwtj(); - - printf("Buffer size regression test\n"); - for(subsamp=0; subsamp= 0 && buf[row * pitch + i * ps + aoffset] != 0xFF) + return 0; + } + } + } + return 1; +} + + +int doBmpTest(const char *ext, int width, int align, int height, int pf, + int flags) +{ + char filename[80], *md5sum, md5buf[65]; + int ps = tjPixelSize[pf], pitch = PAD(width * ps, align), loadWidth = 0, + loadHeight = 0, retval = 0, pixelFormat = pf; + unsigned char *buf = NULL; + char *md5ref; + + if (pf == TJPF_GRAY) { + md5ref = !strcasecmp(ext, "ppm") ? "112c682e82ce5de1cca089e20d60000b" : + "51976530acf75f02beddf5d21149101d"; + } else { + md5ref = !strcasecmp(ext, "ppm") ? "c0c9f772b464d1896326883a5c79c545" : + "6d659071b9bfcdee2def22cb58ddadca"; + } + + if ((buf = (unsigned char *)tjAlloc(pitch * height)) == NULL) + _throw("Could not allocate memory"); + initBitmap(buf, width, pitch, height, pf, flags); + + snprintf(filename, 80, "test_bmp_%s_%d_%s.%s", pixFormatStr[pf], align, + (flags & TJFLAG_BOTTOMUP) ? "bu" : "td", ext); + _tj(tjSaveImage(filename, buf, width, pitch, height, pf, flags)); + md5sum = MD5File(filename, md5buf); + if (strcasecmp(md5sum, md5ref)) + _throwmd5(filename, md5sum, md5ref); + + tjFree(buf); buf = NULL; + if ((buf = tjLoadImage(filename, &loadWidth, align, &loadHeight, &pf, + flags)) == NULL) + _throwtj(); + if (width != loadWidth || height != loadHeight) { + printf("\n Image dimensions of %s are bogus\n", filename); + retval = -1; goto bailout; + } + if (!cmpBitmap(buf, width, pitch, height, pf, flags, 0)) { + printf("\n Pixel data in %s is bogus\n", filename); + retval = -1; goto bailout; + } + if (pf == TJPF_GRAY) { + tjFree(buf); buf = NULL; + pf = TJPF_XBGR; + if ((buf = tjLoadImage(filename, &loadWidth, align, &loadHeight, &pf, + flags)) == NULL) + _throwtj(); + pitch = PAD(width * tjPixelSize[pf], align); + if (!cmpBitmap(buf, width, pitch, height, pf, flags, 1)) { + printf("\n Converting %s to RGB failed\n", filename); + retval = -1; goto bailout; + } + + tjFree(buf); buf = NULL; + pf = TJPF_CMYK; + if ((buf = tjLoadImage(filename, &loadWidth, align, &loadHeight, &pf, + flags)) == NULL) + _throwtj(); + pitch = PAD(width * tjPixelSize[pf], align); + if (!cmpBitmap(buf, width, pitch, height, pf, flags, 1)) { + printf("\n Converting %s to CMYK failed\n", filename); + retval = -1; goto bailout; + } + } + /* Verify that tjLoadImage() returns the proper "preferred" pixel format for + the file type. */ + tjFree(buf); buf = NULL; + pf = pixelFormat; + pixelFormat = TJPF_UNKNOWN; + if ((buf = tjLoadImage(filename, &loadWidth, align, &loadHeight, + &pixelFormat, flags)) == NULL) + _throwtj(); + if ((pf == TJPF_GRAY && pixelFormat != TJPF_GRAY) || + (pf != TJPF_GRAY && !strcasecmp(ext, "bmp") && + pixelFormat != TJPF_BGR) || + (pf != TJPF_GRAY && !strcasecmp(ext, "ppm") && + pixelFormat != TJPF_RGB)) { + printf("\n tjLoadImage() returned unexpected pixel format: %s\n", + pixFormatStr[pixelFormat]); + retval = -1; + } + unlink(filename); + +bailout: + if (buf) tjFree(buf); + if (exitStatus < 0) return exitStatus; + return retval; +} + + +int bmpTest(void) +{ + int align, width = 35, height = 39, format; + + for (align = 1; align <= 8; align *= 2) { + for (format = 0; format < TJ_NUMPF; format++) { + printf("%s Top-Down BMP (row alignment = %d bytes) ... ", + pixFormatStr[format], align); + if (doBmpTest("bmp", width, align, height, format, 0) == -1) + return -1; + printf("OK.\n"); + + printf("%s Top-Down PPM (row alignment = %d bytes) ... ", + pixFormatStr[format], align); + if (doBmpTest("ppm", width, align, height, format, + TJFLAG_BOTTOMUP) == -1) + return -1; + printf("OK.\n"); + + printf("%s Bottom-Up BMP (row alignment = %d bytes) ... ", + pixFormatStr[format], align); + if (doBmpTest("bmp", width, align, height, format, 0) == -1) + return -1; + printf("OK.\n"); + + printf("%s Bottom-Up PPM (row alignment = %d bytes) ... ", + pixFormatStr[format], align); + if (doBmpTest("ppm", width, align, height, format, + TJFLAG_BOTTOMUP) == -1) + return -1; + printf("OK.\n"); + } + } + + return 0; } int main(int argc, char *argv[]) { - int i, num4bf=5; - #ifdef _WIN32 - srand((unsigned int)time(NULL)); - #endif - if(argc>1) - { - for(i=1; i 1) { + for (i = 1; i < argc; i++) { + if (!strcasecmp(argv[i], "-yuv")) doYUV = 1; + else if (!strcasecmp(argv[i], "-noyuvpad")) pad = 1; + else if (!strcasecmp(argv[i], "-alloc")) alloc = 1; + else if (!strcasecmp(argv[i], "-bmp")) return bmpTest(); + else usage(argv[0]); + } + } + if (alloc) printf("Testing automatic buffer allocation\n"); + if (doYUV) num4bf = 4; + doTest(35, 39, _3byteFormats, 2, TJSAMP_444, "test"); + doTest(39, 41, _4byteFormats, num4bf, TJSAMP_444, "test"); + doTest(41, 35, _3byteFormats, 2, TJSAMP_422, "test"); + doTest(35, 39, _4byteFormats, num4bf, TJSAMP_422, "test"); + doTest(39, 41, _3byteFormats, 2, TJSAMP_420, "test"); + doTest(41, 35, _4byteFormats, num4bf, TJSAMP_420, "test"); + doTest(35, 39, _3byteFormats, 2, TJSAMP_440, "test"); + doTest(39, 41, _4byteFormats, num4bf, TJSAMP_440, "test"); + doTest(41, 35, _3byteFormats, 2, TJSAMP_411, "test"); + doTest(35, 39, _4byteFormats, num4bf, TJSAMP_411, "test"); + doTest(39, 41, _onlyGray, 1, TJSAMP_GRAY, "test"); + doTest(41, 35, _3byteFormats, 2, TJSAMP_GRAY, "test"); + doTest(35, 39, _4byteFormats, 4, TJSAMP_GRAY, "test"); + bufSizeTest(); + if (doYUV) { + printf("\n--------------------\n\n"); + doTest(48, 48, _onlyRGB, 1, TJSAMP_444, "test_yuv0"); + doTest(48, 48, _onlyRGB, 1, TJSAMP_422, "test_yuv0"); + doTest(48, 48, _onlyRGB, 1, TJSAMP_420, "test_yuv0"); + doTest(48, 48, _onlyRGB, 1, TJSAMP_440, "test_yuv0"); + doTest(48, 48, _onlyRGB, 1, TJSAMP_411, "test_yuv0"); + doTest(48, 48, _onlyRGB, 1, TJSAMP_GRAY, "test_yuv0"); + doTest(48, 48, _onlyGray, 1, TJSAMP_GRAY, "test_yuv0"); + } + + return exitStatus; } diff --git a/tjutil.c b/tjutil.c index 6618d15..b44086d 100644 --- a/tjutil.c +++ b/tjutil.c @@ -30,25 +30,26 @@ #include -static double getfreq(void) +static double getFreq(void) { - LARGE_INTEGER freq; - if(!QueryPerformanceFrequency(&freq)) return 0.0; - return (double)freq.QuadPart; + LARGE_INTEGER freq; + + if (!QueryPerformanceFrequency(&freq)) return 0.0; + return (double)freq.QuadPart; } -static double f=-1.0; +static double f = -1.0; -double gettime(void) +double getTime(void) { - LARGE_INTEGER t; - if(f<0.0) f=getfreq(); - if(f==0.0) return (double)GetTickCount()/1000.; - else - { - QueryPerformanceCounter(&t); - return (double)t.QuadPart/f; - } + LARGE_INTEGER t; + + if (f < 0.0) f = getFreq(); + if (f == 0.0) return (double)GetTickCount() / 1000.; + else { + QueryPerformanceCounter(&t); + return (double)t.QuadPart / f; + } } #else @@ -56,11 +57,12 @@ double gettime(void) #include #include -double gettime(void) +double getTime(void) { - struct timeval tv; - if(gettimeofday(&tv, NULL)<0) return 0.0; - else return (double)tv.tv_sec+((double)tv.tv_usec/1000000.); + struct timeval tv; + + if (gettimeofday(&tv, NULL) < 0) return 0.0; + else return (double)tv.tv_sec + ((double)tv.tv_usec / 1000000.); } #endif diff --git a/tjutil.h b/tjutil.h index bdad348..f72840c 100644 --- a/tjutil.h +++ b/tjutil.h @@ -27,21 +27,21 @@ */ #ifdef _WIN32 - #ifndef __MINGW32__ - #include - #define snprintf(str, n, format, ...) \ - _snprintf_s(str, n, _TRUNCATE, format, __VA_ARGS__) - #endif - #define strcasecmp stricmp - #define strncasecmp strnicmp +#ifndef __MINGW32__ +#include +#define snprintf(str, n, format, ...) \ + _snprintf_s(str, n, _TRUNCATE, format, __VA_ARGS__) +#endif +#define strcasecmp stricmp +#define strncasecmp strnicmp #endif #ifndef min - #define min(a,b) ((a)<(b)?(a):(b)) +#define min(a, b) ((a) < (b) ? (a) : (b)) #endif #ifndef max - #define max(a,b) ((a)>(b)?(a):(b)) +#define max(a, b) ((a) > (b) ? (a) : (b)) #endif -extern double gettime(void); +extern double getTime(void); diff --git a/transupp.c b/transupp.c index b51ef39..f3370ac 100644 --- a/transupp.c +++ b/transupp.c @@ -28,11 +28,11 @@ #if JPEG_LIB_VERSION >= 70 -#define dstinfo_min_DCT_h_scaled_size dstinfo->min_DCT_h_scaled_size -#define dstinfo_min_DCT_v_scaled_size dstinfo->min_DCT_v_scaled_size +#define dstinfo_min_DCT_h_scaled_size dstinfo->min_DCT_h_scaled_size +#define dstinfo_min_DCT_v_scaled_size dstinfo->min_DCT_v_scaled_size #else -#define dstinfo_min_DCT_h_scaled_size DCTSIZE -#define dstinfo_min_DCT_v_scaled_size DCTSIZE +#define dstinfo_min_DCT_h_scaled_size DCTSIZE +#define dstinfo_min_DCT_v_scaled_size DCTSIZE #endif @@ -89,10 +89,10 @@ LOCAL(void) -do_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, - JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, - jvirt_barray_ptr *src_coef_arrays, - jvirt_barray_ptr *dst_coef_arrays) +do_crop(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) /* Crop. This is only used when no rotate/flip is requested with the crop. */ { JDIMENSION dst_blk_y, x_crop_blocks, y_crop_blocks; @@ -110,12 +110,12 @@ do_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; dst_blk_y += compptr->v_samp_factor) { dst_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, - (JDIMENSION) compptr->v_samp_factor, TRUE); + ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION)compptr->v_samp_factor, TRUE); src_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, src_coef_arrays[ci], + ((j_common_ptr)srcinfo, src_coef_arrays[ci], dst_blk_y + y_crop_blocks, - (JDIMENSION) compptr->v_samp_factor, FALSE); + (JDIMENSION)compptr->v_samp_factor, FALSE); for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { jcopy_block_row(src_buffer[offset_y] + x_crop_blocks, dst_buffer[offset_y], @@ -127,9 +127,8 @@ do_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, LOCAL(void) -do_flip_h_no_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, - JDIMENSION x_crop_offset, - jvirt_barray_ptr *src_coef_arrays) +do_flip_h_no_crop(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, jvirt_barray_ptr *src_coef_arrays) /* Horizontal flip; done in-place, so no separate dest array is required. * NB: this only works when y_crop_offset is zero. */ @@ -147,7 +146,7 @@ do_flip_h_no_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, * Partial iMCUs at the right edge are left untouched. */ MCU_cols = srcinfo->output_width / - (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); + (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); for (ci = 0; ci < dstinfo->num_components; ci++) { compptr = dstinfo->comp_info + ci; @@ -156,8 +155,8 @@ do_flip_h_no_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, for (blk_y = 0; blk_y < compptr->height_in_blocks; blk_y += compptr->v_samp_factor) { buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, src_coef_arrays[ci], blk_y, - (JDIMENSION) compptr->v_samp_factor, TRUE); + ((j_common_ptr)srcinfo, src_coef_arrays[ci], blk_y, + (JDIMENSION)compptr->v_samp_factor, TRUE); for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { /* Do the mirroring */ for (blk_x = 0; blk_x * 2 < comp_width; blk_x++) { @@ -183,8 +182,7 @@ do_flip_h_no_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, */ for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) { jcopy_block_row(buffer[offset_y] + blk_x + x_crop_blocks, - buffer[offset_y] + blk_x, - (JDIMENSION) 1); + buffer[offset_y] + blk_x, (JDIMENSION)1); } } } @@ -194,10 +192,10 @@ do_flip_h_no_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, LOCAL(void) -do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, - JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, - jvirt_barray_ptr *src_coef_arrays, - jvirt_barray_ptr *dst_coef_arrays) +do_flip_h(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) /* Horizontal flip in general cropping case */ { JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y; @@ -213,7 +211,7 @@ do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, * this is essentially the same as the routine above. */ MCU_cols = srcinfo->output_width / - (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); + (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); for (ci = 0; ci < dstinfo->num_components; ci++) { compptr = dstinfo->comp_info + ci; @@ -223,16 +221,17 @@ do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; dst_blk_y += compptr->v_samp_factor) { dst_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, - (JDIMENSION) compptr->v_samp_factor, TRUE); + ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION)compptr->v_samp_factor, TRUE); src_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, src_coef_arrays[ci], + ((j_common_ptr)srcinfo, src_coef_arrays[ci], dst_blk_y + y_crop_blocks, - (JDIMENSION) compptr->v_samp_factor, FALSE); + (JDIMENSION)compptr->v_samp_factor, FALSE); for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { dst_row_ptr = dst_buffer[offset_y]; src_row_ptr = src_buffer[offset_y]; - for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) { + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; + dst_blk_x++) { if (x_crop_blocks + dst_blk_x < comp_width) { /* Do the mirrorable blocks */ dst_ptr = dst_row_ptr[dst_blk_x]; @@ -245,8 +244,7 @@ do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, } else { /* Copy last partial block(s) verbatim */ jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks, - dst_row_ptr + dst_blk_x, - (JDIMENSION) 1); + dst_row_ptr + dst_blk_x, (JDIMENSION)1); } } } @@ -256,10 +254,10 @@ do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, LOCAL(void) -do_flip_v (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, - JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, - jvirt_barray_ptr *src_coef_arrays, - jvirt_barray_ptr *dst_coef_arrays) +do_flip_v(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) /* Vertical flip */ { JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y; @@ -278,7 +276,7 @@ do_flip_v (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, * Partial iMCUs at the bottom edge are copied verbatim. */ MCU_rows = srcinfo->output_height / - (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size); + (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size); for (ci = 0; ci < dstinfo->num_components; ci++) { compptr = dstinfo->comp_info + ci; @@ -288,21 +286,21 @@ do_flip_v (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; dst_blk_y += compptr->v_samp_factor) { dst_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, - (JDIMENSION) compptr->v_samp_factor, TRUE); + ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION)compptr->v_samp_factor, TRUE); if (y_crop_blocks + dst_blk_y < comp_height) { /* Row is within the mirrorable area. */ src_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, src_coef_arrays[ci], + ((j_common_ptr)srcinfo, src_coef_arrays[ci], comp_height - y_crop_blocks - dst_blk_y - - (JDIMENSION) compptr->v_samp_factor, - (JDIMENSION) compptr->v_samp_factor, FALSE); + (JDIMENSION)compptr->v_samp_factor, + (JDIMENSION)compptr->v_samp_factor, FALSE); } else { /* Bottom-edge blocks will be copied verbatim. */ src_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, src_coef_arrays[ci], + ((j_common_ptr)srcinfo, src_coef_arrays[ci], dst_blk_y + y_crop_blocks, - (JDIMENSION) compptr->v_samp_factor, FALSE); + (JDIMENSION)compptr->v_samp_factor, FALSE); } for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { if (y_crop_blocks + dst_blk_y < comp_height) { @@ -336,10 +334,10 @@ do_flip_v (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, LOCAL(void) -do_transpose (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, - JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, - jvirt_barray_ptr *src_coef_arrays, - jvirt_barray_ptr *dst_coef_arrays) +do_transpose(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) /* Transpose source into destination */ { JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks; @@ -360,21 +358,22 @@ do_transpose (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; dst_blk_y += compptr->v_samp_factor) { dst_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, - (JDIMENSION) compptr->v_samp_factor, TRUE); + ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION)compptr->v_samp_factor, TRUE); for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x += compptr->h_samp_factor) { src_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, src_coef_arrays[ci], + ((j_common_ptr)srcinfo, src_coef_arrays[ci], dst_blk_x + x_crop_blocks, - (JDIMENSION) compptr->h_samp_factor, FALSE); + (JDIMENSION)compptr->h_samp_factor, FALSE); for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) { dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x]; - src_ptr = src_buffer[offset_x][dst_blk_y + offset_y + y_crop_blocks]; + src_ptr = + src_buffer[offset_x][dst_blk_y + offset_y + y_crop_blocks]; for (i = 0; i < DCTSIZE; i++) for (j = 0; j < DCTSIZE; j++) - dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j]; } } } @@ -384,10 +383,10 @@ do_transpose (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, LOCAL(void) -do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, - JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, - jvirt_barray_ptr *src_coef_arrays, - jvirt_barray_ptr *dst_coef_arrays) +do_rot_90(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) /* 90 degree rotation is equivalent to * 1. Transposing the image; * 2. Horizontal mirroring. @@ -406,7 +405,7 @@ do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, * not mirrored. */ MCU_cols = srcinfo->output_height / - (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); + (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); for (ci = 0; ci < dstinfo->num_components; ci++) { compptr = dstinfo->comp_info + ci; @@ -416,24 +415,24 @@ do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; dst_blk_y += compptr->v_samp_factor) { dst_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, - (JDIMENSION) compptr->v_samp_factor, TRUE); + ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION)compptr->v_samp_factor, TRUE); for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x += compptr->h_samp_factor) { if (x_crop_blocks + dst_blk_x < comp_width) { /* Block is within the mirrorable area. */ src_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, src_coef_arrays[ci], + ((j_common_ptr)srcinfo, src_coef_arrays[ci], comp_width - x_crop_blocks - dst_blk_x - - (JDIMENSION) compptr->h_samp_factor, - (JDIMENSION) compptr->h_samp_factor, FALSE); + (JDIMENSION)compptr->h_samp_factor, + (JDIMENSION)compptr->h_samp_factor, FALSE); } else { /* Edge blocks are transposed but not mirrored. */ src_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, src_coef_arrays[ci], + ((j_common_ptr)srcinfo, src_coef_arrays[ci], dst_blk_x + x_crop_blocks, - (JDIMENSION) compptr->h_samp_factor, FALSE); + (JDIMENSION)compptr->h_samp_factor, FALSE); } for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) { dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x]; @@ -443,10 +442,10 @@ do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, [dst_blk_y + offset_y + y_crop_blocks]; for (i = 0; i < DCTSIZE; i++) { for (j = 0; j < DCTSIZE; j++) - dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j]; i++; for (j = 0; j < DCTSIZE; j++) - dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j]; } } else { /* Edge blocks are transposed but not mirrored. */ @@ -454,7 +453,7 @@ do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, [dst_blk_y + offset_y + y_crop_blocks]; for (i = 0; i < DCTSIZE; i++) for (j = 0; j < DCTSIZE; j++) - dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j]; } } } @@ -465,10 +464,10 @@ do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, LOCAL(void) -do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, - JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, - jvirt_barray_ptr *src_coef_arrays, - jvirt_barray_ptr *dst_coef_arrays) +do_rot_270(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) /* 270 degree rotation is equivalent to * 1. Horizontal mirroring; * 2. Transposing the image. @@ -487,7 +486,7 @@ do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, * not mirrored. */ MCU_rows = srcinfo->output_width / - (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size); + (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size); for (ci = 0; ci < dstinfo->num_components; ci++) { compptr = dstinfo->comp_info + ci; @@ -497,15 +496,15 @@ do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; dst_blk_y += compptr->v_samp_factor) { dst_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, - (JDIMENSION) compptr->v_samp_factor, TRUE); + ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION)compptr->v_samp_factor, TRUE); for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x += compptr->h_samp_factor) { src_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, src_coef_arrays[ci], + ((j_common_ptr)srcinfo, src_coef_arrays[ci], dst_blk_x + x_crop_blocks, - (JDIMENSION) compptr->h_samp_factor, FALSE); + (JDIMENSION)compptr->h_samp_factor, FALSE); for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) { dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x]; if (y_crop_blocks + dst_blk_y < comp_height) { @@ -514,9 +513,9 @@ do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1]; for (i = 0; i < DCTSIZE; i++) { for (j = 0; j < DCTSIZE; j++) { - dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j]; j++; - dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j]; } } } else { @@ -525,7 +524,7 @@ do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, [dst_blk_y + offset_y + y_crop_blocks]; for (i = 0; i < DCTSIZE; i++) for (j = 0; j < DCTSIZE; j++) - dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j]; } } } @@ -536,10 +535,10 @@ do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, LOCAL(void) -do_rot_180 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, - JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, - jvirt_barray_ptr *src_coef_arrays, - jvirt_barray_ptr *dst_coef_arrays) +do_rot_180(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) /* 180 degree rotation is equivalent to * 1. Vertical mirroring; * 2. Horizontal mirroring. @@ -555,9 +554,9 @@ do_rot_180 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, jpeg_component_info *compptr; MCU_cols = srcinfo->output_width / - (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); + (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); MCU_rows = srcinfo->output_height / - (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size); + (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size); for (ci = 0; ci < dstinfo->num_components; ci++) { compptr = dstinfo->comp_info + ci; @@ -568,32 +567,34 @@ do_rot_180 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; dst_blk_y += compptr->v_samp_factor) { dst_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, - (JDIMENSION) compptr->v_samp_factor, TRUE); + ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION)compptr->v_samp_factor, TRUE); if (y_crop_blocks + dst_blk_y < comp_height) { /* Row is within the vertically mirrorable area. */ src_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, src_coef_arrays[ci], + ((j_common_ptr)srcinfo, src_coef_arrays[ci], comp_height - y_crop_blocks - dst_blk_y - - (JDIMENSION) compptr->v_samp_factor, - (JDIMENSION) compptr->v_samp_factor, FALSE); + (JDIMENSION)compptr->v_samp_factor, + (JDIMENSION)compptr->v_samp_factor, FALSE); } else { /* Bottom-edge rows are only mirrored horizontally. */ src_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, src_coef_arrays[ci], + ((j_common_ptr)srcinfo, src_coef_arrays[ci], dst_blk_y + y_crop_blocks, - (JDIMENSION) compptr->v_samp_factor, FALSE); + (JDIMENSION)compptr->v_samp_factor, FALSE); } for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { dst_row_ptr = dst_buffer[offset_y]; if (y_crop_blocks + dst_blk_y < comp_height) { /* Row is within the mirrorable area. */ src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1]; - for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) { + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; + dst_blk_x++) { dst_ptr = dst_row_ptr[dst_blk_x]; if (x_crop_blocks + dst_blk_x < comp_width) { /* Process the blocks that can be mirrored both ways. */ - src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1]; + src_ptr = + src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1]; for (i = 0; i < DCTSIZE; i += 2) { /* For even row, negate every odd column. */ for (j = 0; j < DCTSIZE; j += 2) { @@ -620,11 +621,13 @@ do_rot_180 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, } else { /* Remaining rows are just mirrored horizontally. */ src_row_ptr = src_buffer[offset_y]; - for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) { + for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; + dst_blk_x++) { if (x_crop_blocks + dst_blk_x < comp_width) { /* Process the blocks that can be mirrored. */ dst_ptr = dst_row_ptr[dst_blk_x]; - src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1]; + src_ptr = + src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1]; for (i = 0; i < DCTSIZE2; i += 2) { *dst_ptr++ = *src_ptr++; *dst_ptr++ = - *src_ptr++; @@ -632,8 +635,7 @@ do_rot_180 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, } else { /* Any remaining right-edge blocks are only copied. */ jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks, - dst_row_ptr + dst_blk_x, - (JDIMENSION) 1); + dst_row_ptr + dst_blk_x, (JDIMENSION)1); } } } @@ -644,10 +646,10 @@ do_rot_180 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, LOCAL(void) -do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, - JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, - jvirt_barray_ptr *src_coef_arrays, - jvirt_barray_ptr *dst_coef_arrays) +do_transverse(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JDIMENSION x_crop_offset, JDIMENSION y_crop_offset, + jvirt_barray_ptr *src_coef_arrays, + jvirt_barray_ptr *dst_coef_arrays) /* Transverse transpose is equivalent to * 1. 180 degree rotation; * 2. Transposition; @@ -666,9 +668,9 @@ do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, jpeg_component_info *compptr; MCU_cols = srcinfo->output_height / - (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); + (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size); MCU_rows = srcinfo->output_width / - (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size); + (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size); for (ci = 0; ci < dstinfo->num_components; ci++) { compptr = dstinfo->comp_info + ci; @@ -679,23 +681,23 @@ do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks; dst_blk_y += compptr->v_samp_factor) { dst_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y, - (JDIMENSION) compptr->v_samp_factor, TRUE); + ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y, + (JDIMENSION)compptr->v_samp_factor, TRUE); for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) { for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x += compptr->h_samp_factor) { if (x_crop_blocks + dst_blk_x < comp_width) { /* Block is within the mirrorable area. */ src_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, src_coef_arrays[ci], + ((j_common_ptr)srcinfo, src_coef_arrays[ci], comp_width - x_crop_blocks - dst_blk_x - - (JDIMENSION) compptr->h_samp_factor, - (JDIMENSION) compptr->h_samp_factor, FALSE); + (JDIMENSION)compptr->h_samp_factor, + (JDIMENSION)compptr->h_samp_factor, FALSE); } else { src_buffer = (*srcinfo->mem->access_virt_barray) - ((j_common_ptr) srcinfo, src_coef_arrays[ci], + ((j_common_ptr)srcinfo, src_coef_arrays[ci], dst_blk_x + x_crop_blocks, - (JDIMENSION) compptr->h_samp_factor, FALSE); + (JDIMENSION)compptr->h_samp_factor, FALSE); } for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) { dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x]; @@ -706,15 +708,15 @@ do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1]; for (i = 0; i < DCTSIZE; i++) { for (j = 0; j < DCTSIZE; j++) { - dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j]; j++; - dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j]; } i++; for (j = 0; j < DCTSIZE; j++) { - dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j]; j++; - dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j]; } } } else { @@ -723,9 +725,9 @@ do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1]; for (i = 0; i < DCTSIZE; i++) { for (j = 0; j < DCTSIZE; j++) { - dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j]; j++; - dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j]; } } } @@ -736,10 +738,10 @@ do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, [dst_blk_y + offset_y + y_crop_blocks]; for (i = 0; i < DCTSIZE; i++) { for (j = 0; j < DCTSIZE; j++) - dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j]; i++; for (j = 0; j < DCTSIZE; j++) - dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j]; + dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j]; } } else { /* At lower right corner, just transpose, no mirroring */ @@ -747,7 +749,7 @@ do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, [dst_blk_y + offset_y + y_crop_blocks]; for (i = 0; i < DCTSIZE; i++) for (j = 0; j < DCTSIZE; j++) - dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j]; + dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j]; } } } @@ -764,13 +766,13 @@ do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, */ LOCAL(boolean) -jt_read_integer (const char **strptr, JDIMENSION *result) +jt_read_integer(const char **strptr, JDIMENSION *result) { const char *ptr = *strptr; JDIMENSION val = 0; for (; isdigit(*ptr); ptr++) { - val = val * 10 + (JDIMENSION) (*ptr - '0'); + val = val * 10 + (JDIMENSION)(*ptr - '0'); } *result = val; if (ptr == *strptr) @@ -794,7 +796,7 @@ jt_read_integer (const char **strptr, JDIMENSION *result) */ GLOBAL(boolean) -jtransform_parse_crop_spec (jpeg_transform_info *info, const char *spec) +jtransform_parse_crop_spec(jpeg_transform_info *info, const char *spec) { info->crop = FALSE; info->crop_width_set = JCROP_UNSET; @@ -804,7 +806,7 @@ jtransform_parse_crop_spec (jpeg_transform_info *info, const char *spec) if (isdigit(*spec)) { /* fetch width */ - if (! jt_read_integer(&spec, &info->crop_width)) + if (!jt_read_integer(&spec, &info->crop_width)) return FALSE; if (*spec == 'f' || *spec == 'F') { spec++; @@ -815,7 +817,7 @@ jtransform_parse_crop_spec (jpeg_transform_info *info, const char *spec) if (*spec == 'x' || *spec == 'X') { /* fetch height */ spec++; - if (! jt_read_integer(&spec, &info->crop_height)) + if (!jt_read_integer(&spec, &info->crop_height)) return FALSE; if (*spec == 'f' || *spec == 'F') { spec++; @@ -827,14 +829,14 @@ jtransform_parse_crop_spec (jpeg_transform_info *info, const char *spec) /* fetch xoffset */ info->crop_xoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS; spec++; - if (! jt_read_integer(&spec, &info->crop_xoffset)) + if (!jt_read_integer(&spec, &info->crop_xoffset)) return FALSE; } if (*spec == '+' || *spec == '-') { /* fetch yoffset */ info->crop_yoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS; spec++; - if (! jt_read_integer(&spec, &info->crop_yoffset)) + if (!jt_read_integer(&spec, &info->crop_yoffset)) return FALSE; } /* We had better have gotten to the end of the string. */ @@ -848,7 +850,7 @@ jtransform_parse_crop_spec (jpeg_transform_info *info, const char *spec) /* Trim off any partial iMCUs on the indicated destination edge */ LOCAL(void) -trim_right_edge (jpeg_transform_info *info, JDIMENSION full_width) +trim_right_edge(jpeg_transform_info *info, JDIMENSION full_width) { JDIMENSION MCU_cols; @@ -859,7 +861,7 @@ trim_right_edge (jpeg_transform_info *info, JDIMENSION full_width) } LOCAL(void) -trim_bottom_edge (jpeg_transform_info *info, JDIMENSION full_height) +trim_bottom_edge(jpeg_transform_info *info, JDIMENSION full_height) { JDIMENSION MCU_rows; @@ -888,8 +890,8 @@ trim_bottom_edge (jpeg_transform_info *info, JDIMENSION full_height) */ GLOBAL(boolean) -jtransform_request_workspace (j_decompress_ptr srcinfo, - jpeg_transform_info *info) +jtransform_request_workspace(j_decompress_ptr srcinfo, + jpeg_transform_info *info) { jvirt_barray_ptr *coef_arrays; boolean need_workspace, transpose_it; @@ -1093,14 +1095,12 @@ jtransform_request_workspace (j_decompress_ptr srcinfo, */ if (need_workspace) { coef_arrays = (jvirt_barray_ptr *) - (*srcinfo->mem->alloc_small) ((j_common_ptr) srcinfo, JPOOL_IMAGE, + (*srcinfo->mem->alloc_small) ((j_common_ptr)srcinfo, JPOOL_IMAGE, sizeof(jvirt_barray_ptr) * info->num_components); width_in_iMCUs = (JDIMENSION) - jdiv_round_up((long) info->output_width, - (long) info->iMCU_sample_width); + jdiv_round_up((long)info->output_width, (long)info->iMCU_sample_width); height_in_iMCUs = (JDIMENSION) - jdiv_round_up((long) info->output_height, - (long) info->iMCU_sample_height); + jdiv_round_up((long)info->output_height, (long)info->iMCU_sample_height); for (ci = 0; ci < info->num_components; ci++) { compptr = srcinfo->comp_info + ci; if (info->num_components == 1) { @@ -1116,8 +1116,8 @@ jtransform_request_workspace (j_decompress_ptr srcinfo, width_in_blocks = width_in_iMCUs * h_samp_factor; height_in_blocks = height_in_iMCUs * v_samp_factor; coef_arrays[ci] = (*srcinfo->mem->request_virt_barray) - ((j_common_ptr) srcinfo, JPOOL_IMAGE, FALSE, - width_in_blocks, height_in_blocks, (JDIMENSION) v_samp_factor); + ((j_common_ptr)srcinfo, JPOOL_IMAGE, FALSE, + width_in_blocks, height_in_blocks, (JDIMENSION)v_samp_factor); } info->workspace_coef_arrays = coef_arrays; } else @@ -1130,7 +1130,7 @@ jtransform_request_workspace (j_decompress_ptr srcinfo, /* Transpose destination image parameters */ LOCAL(void) -transpose_critical_parameters (j_compress_ptr dstinfo) +transpose_critical_parameters(j_compress_ptr dstinfo) { int tblno, i, j, ci, itemp; jpeg_component_info *compptr; @@ -1162,9 +1162,10 @@ transpose_critical_parameters (j_compress_ptr dstinfo) if (qtblptr != NULL) { for (i = 0; i < DCTSIZE; i++) { for (j = 0; j < i; j++) { - qtemp = qtblptr->quantval[i*DCTSIZE+j]; - qtblptr->quantval[i*DCTSIZE+j] = qtblptr->quantval[j*DCTSIZE+i]; - qtblptr->quantval[j*DCTSIZE+i] = qtemp; + qtemp = qtblptr->quantval[i * DCTSIZE + j]; + qtblptr->quantval[i * DCTSIZE + j] = + qtblptr->quantval[j * DCTSIZE + i]; + qtblptr->quantval[j * DCTSIZE + i] = qtemp; } } } @@ -1178,8 +1179,8 @@ transpose_critical_parameters (j_compress_ptr dstinfo) */ LOCAL(void) -adjust_exif_parameters (JOCTET *data, unsigned int length, - JDIMENSION new_width, JDIMENSION new_height) +adjust_exif_parameters(JOCTET *data, unsigned int length, JDIMENSION new_width, + JDIMENSION new_height) { boolean is_motorola; /* Flag for byte order */ unsigned int number_of_tags, tagnum; @@ -1225,9 +1226,9 @@ adjust_exif_parameters (JOCTET *data, unsigned int length, if (is_motorola) { number_of_tags = GETJOCTET(data[firstoffset]); number_of_tags <<= 8; - number_of_tags += GETJOCTET(data[firstoffset+1]); + number_of_tags += GETJOCTET(data[firstoffset + 1]); } else { - number_of_tags = GETJOCTET(data[firstoffset+1]); + number_of_tags = GETJOCTET(data[firstoffset + 1]); number_of_tags <<= 8; number_of_tags += GETJOCTET(data[firstoffset]); } @@ -1241,9 +1242,9 @@ adjust_exif_parameters (JOCTET *data, unsigned int length, if (is_motorola) { tagnum = GETJOCTET(data[firstoffset]); tagnum <<= 8; - tagnum += GETJOCTET(data[firstoffset+1]); + tagnum += GETJOCTET(data[firstoffset + 1]); } else { - tagnum = GETJOCTET(data[firstoffset+1]); + tagnum = GETJOCTET(data[firstoffset + 1]); tagnum <<= 8; tagnum += GETJOCTET(data[firstoffset]); } @@ -1254,17 +1255,17 @@ adjust_exif_parameters (JOCTET *data, unsigned int length, /* Get the ExifSubIFD offset */ if (is_motorola) { - if (GETJOCTET(data[firstoffset+8]) != 0) return; - if (GETJOCTET(data[firstoffset+9]) != 0) return; - offset = GETJOCTET(data[firstoffset+10]); + if (GETJOCTET(data[firstoffset + 8]) != 0) return; + if (GETJOCTET(data[firstoffset + 9]) != 0) return; + offset = GETJOCTET(data[firstoffset + 10]); offset <<= 8; - offset += GETJOCTET(data[firstoffset+11]); + offset += GETJOCTET(data[firstoffset + 11]); } else { - if (GETJOCTET(data[firstoffset+11]) != 0) return; - if (GETJOCTET(data[firstoffset+10]) != 0) return; - offset = GETJOCTET(data[firstoffset+9]); + if (GETJOCTET(data[firstoffset + 11]) != 0) return; + if (GETJOCTET(data[firstoffset + 10]) != 0) return; + offset = GETJOCTET(data[firstoffset + 9]); offset <<= 8; - offset += GETJOCTET(data[firstoffset+8]); + offset += GETJOCTET(data[firstoffset + 8]); } if (offset > length - 2) return; /* check end of data segment */ @@ -1272,9 +1273,9 @@ adjust_exif_parameters (JOCTET *data, unsigned int length, if (is_motorola) { number_of_tags = GETJOCTET(data[offset]); number_of_tags <<= 8; - number_of_tags += GETJOCTET(data[offset+1]); + number_of_tags += GETJOCTET(data[offset + 1]); } else { - number_of_tags = GETJOCTET(data[offset+1]); + number_of_tags = GETJOCTET(data[offset + 1]); number_of_tags <<= 8; number_of_tags += GETJOCTET(data[offset]); } @@ -1288,9 +1289,9 @@ adjust_exif_parameters (JOCTET *data, unsigned int length, if (is_motorola) { tagnum = GETJOCTET(data[offset]); tagnum <<= 8; - tagnum += GETJOCTET(data[offset+1]); + tagnum += GETJOCTET(data[offset + 1]); } else { - tagnum = GETJOCTET(data[offset+1]); + tagnum = GETJOCTET(data[offset + 1]); tagnum <<= 8; tagnum += GETJOCTET(data[offset]); } @@ -1300,27 +1301,27 @@ adjust_exif_parameters (JOCTET *data, unsigned int length, else new_value = new_height; /* ExifImageHeight Tag */ if (is_motorola) { - data[offset+2] = 0; /* Format = unsigned long (4 octets) */ - data[offset+3] = 4; - data[offset+4] = 0; /* Number Of Components = 1 */ - data[offset+5] = 0; - data[offset+6] = 0; - data[offset+7] = 1; - data[offset+8] = 0; - data[offset+9] = 0; - data[offset+10] = (JOCTET)((new_value >> 8) & 0xFF); - data[offset+11] = (JOCTET)(new_value & 0xFF); + data[offset + 2] = 0; /* Format = unsigned long (4 octets) */ + data[offset + 3] = 4; + data[offset + 4] = 0; /* Number Of Components = 1 */ + data[offset + 5] = 0; + data[offset + 6] = 0; + data[offset + 7] = 1; + data[offset + 8] = 0; + data[offset + 9] = 0; + data[offset + 10] = (JOCTET)((new_value >> 8) & 0xFF); + data[offset + 11] = (JOCTET)(new_value & 0xFF); } else { - data[offset+2] = 4; /* Format = unsigned long (4 octets) */ - data[offset+3] = 0; - data[offset+4] = 1; /* Number Of Components = 1 */ - data[offset+5] = 0; - data[offset+6] = 0; - data[offset+7] = 0; - data[offset+8] = (JOCTET)(new_value & 0xFF); - data[offset+9] = (JOCTET)((new_value >> 8) & 0xFF); - data[offset+10] = 0; - data[offset+11] = 0; + data[offset + 2] = 4; /* Format = unsigned long (4 octets) */ + data[offset + 3] = 0; + data[offset + 4] = 1; /* Number Of Components = 1 */ + data[offset + 5] = 0; + data[offset + 6] = 0; + data[offset + 7] = 0; + data[offset + 8] = (JOCTET)(new_value & 0xFF); + data[offset + 9] = (JOCTET)((new_value >> 8) & 0xFF); + data[offset + 10] = 0; + data[offset + 11] = 0; } } offset += 12; @@ -1340,10 +1341,9 @@ adjust_exif_parameters (JOCTET *data, unsigned int length, */ GLOBAL(jvirt_barray_ptr *) -jtransform_adjust_parameters (j_decompress_ptr srcinfo, - j_compress_ptr dstinfo, - jvirt_barray_ptr *src_coef_arrays, - jpeg_transform_info *info) +jtransform_adjust_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + jvirt_barray_ptr *src_coef_arrays, + jpeg_transform_info *info) { /* If force-to-grayscale is requested, adjust destination parameters */ if (info->force_grayscale) { @@ -1409,7 +1409,7 @@ jtransform_adjust_parameters (j_decompress_ptr srcinfo, /* Adjust Exif properties */ if (srcinfo->marker_list != NULL && - srcinfo->marker_list->marker == JPEG_APP0+1 && + srcinfo->marker_list->marker == JPEG_APP0 + 1 && srcinfo->marker_list->data_length >= 6 && GETJOCTET(srcinfo->marker_list->data[0]) == 0x45 && GETJOCTET(srcinfo->marker_list->data[1]) == 0x78 && @@ -1425,15 +1425,15 @@ jtransform_adjust_parameters (j_decompress_ptr srcinfo, dstinfo->jpeg_height != srcinfo->image_height) /* Align data segment to start of TIFF structure for parsing */ adjust_exif_parameters(srcinfo->marker_list->data + 6, - srcinfo->marker_list->data_length - 6, - dstinfo->jpeg_width, dstinfo->jpeg_height); + srcinfo->marker_list->data_length - 6, + dstinfo->jpeg_width, dstinfo->jpeg_height); #else if (dstinfo->image_width != srcinfo->image_width || dstinfo->image_height != srcinfo->image_height) /* Align data segment to start of TIFF structure for parsing */ adjust_exif_parameters(srcinfo->marker_list->data + 6, - srcinfo->marker_list->data_length - 6, - dstinfo->image_width, dstinfo->image_height); + srcinfo->marker_list->data_length - 6, + dstinfo->image_width, dstinfo->image_height); #endif } @@ -1454,10 +1454,9 @@ jtransform_adjust_parameters (j_decompress_ptr srcinfo, */ GLOBAL(void) -jtransform_execute_transform (j_decompress_ptr srcinfo, - j_compress_ptr dstinfo, - jvirt_barray_ptr *src_coef_arrays, - jpeg_transform_info *info) +jtransform_execute_transform(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + jvirt_barray_ptr *src_coef_arrays, + jpeg_transform_info *info) { jvirt_barray_ptr *dst_coef_arrays = info->workspace_coef_arrays; @@ -1536,19 +1535,19 @@ jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height, switch (transform) { case JXFORM_FLIP_H: case JXFORM_ROT_270: - if (image_width % (JDIMENSION) MCU_width) + if (image_width % (JDIMENSION)MCU_width) result = FALSE; break; case JXFORM_FLIP_V: case JXFORM_ROT_90: - if (image_height % (JDIMENSION) MCU_height) + if (image_height % (JDIMENSION)MCU_height) result = FALSE; break; case JXFORM_TRANSVERSE: case JXFORM_ROT_180: - if (image_width % (JDIMENSION) MCU_width) + if (image_width % (JDIMENSION)MCU_width) result = FALSE; - if (image_height % (JDIMENSION) MCU_height) + if (image_height % (JDIMENSION)MCU_height) result = FALSE; break; default: @@ -1566,7 +1565,7 @@ jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height, */ GLOBAL(void) -jcopy_markers_setup (j_decompress_ptr srcinfo, JCOPY_OPTION option) +jcopy_markers_setup(j_decompress_ptr srcinfo, JCOPY_OPTION option) { #ifdef SAVE_MARKERS_SUPPORTED int m; @@ -1576,9 +1575,12 @@ jcopy_markers_setup (j_decompress_ptr srcinfo, JCOPY_OPTION option) jpeg_save_markers(srcinfo, JPEG_COM, 0xFFFF); } /* Save all types of APPn markers iff ALL option */ - if (option == JCOPYOPT_ALL) { - for (m = 0; m < 16; m++) + if (option == JCOPYOPT_ALL || option == JCOPYOPT_ALL_EXCEPT_ICC) { + for (m = 0; m < 16; m++) { + if (option == JCOPYOPT_ALL_EXCEPT_ICC && m == 2) + continue; jpeg_save_markers(srcinfo, JPEG_APP0 + m, 0xFFFF); + } } #endif /* SAVE_MARKERS_SUPPORTED */ } @@ -1591,8 +1593,8 @@ jcopy_markers_setup (j_decompress_ptr srcinfo, JCOPY_OPTION option) */ GLOBAL(void) -jcopy_markers_execute (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, - JCOPY_OPTION option) +jcopy_markers_execute(j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + JCOPY_OPTION option) { jpeg_saved_marker_ptr marker; @@ -1612,7 +1614,7 @@ jcopy_markers_execute (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, GETJOCTET(marker->data[4]) == 0) continue; /* reject duplicate JFIF */ if (dstinfo->write_Adobe_marker && - marker->marker == JPEG_APP0+14 && + marker->marker == JPEG_APP0 + 14 && marker->data_length >= 5 && GETJOCTET(marker->data[0]) == 0x41 && GETJOCTET(marker->data[1]) == 0x64 && diff --git a/transupp.h b/transupp.h index bf3118a..80264cc 100644 --- a/transupp.h +++ b/transupp.h @@ -3,8 +3,8 @@ * * This file was part of the Independent JPEG Group's software: * Copyright (C) 1997-2011, Thomas G. Lane, Guido Vollbeding. - * It was modified by The libjpeg-turbo Project to include only code relevant - * to libjpeg-turbo. + * libjpeg-turbo Modifications: + * Copyright (C) 2017, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -22,7 +22,7 @@ /* If you happen not to want the image transform support, disable it here */ #ifndef TRANSFORMS_SUPPORTED -#define TRANSFORMS_SUPPORTED 1 /* 0 disables transform code */ +#define TRANSFORMS_SUPPORTED 1 /* 0 disables transform code */ #endif /* @@ -156,25 +156,27 @@ typedef struct { #if TRANSFORMS_SUPPORTED /* Parse a crop specification (written in X11 geometry style) */ -EXTERN(boolean) jtransform_parse_crop_spec - (jpeg_transform_info *info, const char *spec); +EXTERN(boolean) jtransform_parse_crop_spec(jpeg_transform_info *info, + const char *spec); /* Request any required workspace */ -EXTERN(boolean) jtransform_request_workspace - (j_decompress_ptr srcinfo, jpeg_transform_info *info); +EXTERN(boolean) jtransform_request_workspace(j_decompress_ptr srcinfo, + jpeg_transform_info *info); /* Adjust output image parameters */ EXTERN(jvirt_barray_ptr *) jtransform_adjust_parameters - (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, - jvirt_barray_ptr *src_coef_arrays, jpeg_transform_info *info); + (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, + jvirt_barray_ptr *src_coef_arrays, jpeg_transform_info *info); /* Execute the actual transformation, if any */ -EXTERN(void) jtransform_execute_transform - (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, - jvirt_barray_ptr *src_coef_arrays, jpeg_transform_info *info); +EXTERN(void) jtransform_execute_transform(j_decompress_ptr srcinfo, + j_compress_ptr dstinfo, + jvirt_barray_ptr *src_coef_arrays, + jpeg_transform_info *info); /* Determine whether lossless transformation is perfectly * possible for a specified image and transformation. */ -EXTERN(boolean) jtransform_perfect_transform - (JDIMENSION image_width, JDIMENSION image_height, int MCU_width, - int MCU_height, JXFORM_CODE transform); +EXTERN(boolean) jtransform_perfect_transform(JDIMENSION image_width, + JDIMENSION image_height, + int MCU_width, int MCU_height, + JXFORM_CODE transform); /* jtransform_execute_transform used to be called * jtransform_execute_transformation, but some compilers complain about @@ -193,15 +195,16 @@ EXTERN(boolean) jtransform_perfect_transform typedef enum { JCOPYOPT_NONE, /* copy no optional markers */ JCOPYOPT_COMMENTS, /* copy only comment (COM) markers */ - JCOPYOPT_ALL /* copy all optional markers */ + JCOPYOPT_ALL, /* copy all optional markers */ + JCOPYOPT_ALL_EXCEPT_ICC /* copy all optional markers except APP2 */ } JCOPY_OPTION; #define JCOPYOPT_DEFAULT JCOPYOPT_COMMENTS /* recommended default */ /* Setup decompression object to save desired markers in memory */ -EXTERN(void) jcopy_markers_setup - (j_decompress_ptr srcinfo, JCOPY_OPTION option); +EXTERN(void) jcopy_markers_setup(j_decompress_ptr srcinfo, + JCOPY_OPTION option); /* Copy markers saved in the given source object to the destination object */ -EXTERN(void) jcopy_markers_execute - (j_decompress_ptr srcinfo, j_compress_ptr dstinfo, - JCOPY_OPTION option); +EXTERN(void) jcopy_markers_execute(j_decompress_ptr srcinfo, + j_compress_ptr dstinfo, + JCOPY_OPTION option); diff --git a/turbojpeg-jni.c b/turbojpeg-jni.c index eaba670..d0a0935 100644 --- a/turbojpeg-jni.c +++ b/turbojpeg-jni.c @@ -1,5 +1,5 @@ /* - * Copyright (C)2011-2016 D. R. Commander. All Rights Reserved. + * Copyright (C)2011-2018 D. R. Commander. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,1130 +37,1151 @@ #include "java/org_libjpegturbo_turbojpeg_TJDecompressor.h" #include "java/org_libjpegturbo_turbojpeg_TJ.h" -#define PAD(v, p) ((v+(p)-1)&(~((p)-1))) +#define PAD(v, p) ((v + (p) - 1) & (~((p) - 1))) -#define _throw(msg, exceptionClass) { \ - jclass _exccls=(*env)->FindClass(env, exceptionClass); \ - if(!_exccls || (*env)->ExceptionCheck(env)) goto bailout; \ - (*env)->ThrowNew(env, _exccls, msg); \ - goto bailout; \ +#define bailif0(f) { \ + if (!(f) || (*env)->ExceptionCheck(env)) { \ + goto bailout; \ + } \ } -#define _throwtj() _throw(tjGetErrorStr(), "org/libjpegturbo/turbojpeg/TJException") +#define _throw(msg, exceptionClass) { \ + jclass _exccls = (*env)->FindClass(env, exceptionClass); \ + \ + bailif0(_exccls); \ + (*env)->ThrowNew(env, _exccls, msg); \ + goto bailout; \ +} -#define _throwarg(msg) _throw(msg, "java/lang/IllegalArgumentException") +#define _throwtj() { \ + jclass _exccls; \ + jmethodID _excid; \ + jobject _excobj; \ + jstring _errstr; \ + \ + bailif0(_errstr = (*env)->NewStringUTF(env, tjGetErrorStr2(handle))); \ + bailif0(_exccls = (*env)->FindClass(env, \ + "org/libjpegturbo/turbojpeg/TJException")); \ + bailif0(_excid = (*env)->GetMethodID(env, _exccls, "", \ + "(Ljava/lang/String;I)V")); \ + bailif0(_excobj = (*env)->NewObject(env, _exccls, _excid, _errstr, \ + tjGetErrorCode(handle))); \ + (*env)->Throw(env, _excobj); \ + goto bailout; \ +} -#define _throwmem() _throw("Memory allocation failure", "java/lang/OutOfMemoryError"); +#define _throwarg(msg) _throw(msg, "java/lang/IllegalArgumentException") -#define bailif0(f) {if(!(f) || (*env)->ExceptionCheck(env)) { \ - goto bailout; \ -}} +#define _throwmem() \ + _throw("Memory allocation failure", "java/lang/OutOfMemoryError"); -#define gethandle() \ - jclass _cls=(*env)->GetObjectClass(env, obj); \ - jfieldID _fid; \ - if(!_cls || (*env)->ExceptionCheck(env)) goto bailout; \ - bailif0(_fid=(*env)->GetFieldID(env, _cls, "handle", "J")); \ - handle=(tjhandle)(size_t)(*env)->GetLongField(env, obj, _fid); \ +#define gethandle() \ + jclass _cls = (*env)->GetObjectClass(env, obj); \ + jfieldID _fid; \ + \ + bailif0(_cls); \ + bailif0(_fid = (*env)->GetFieldID(env, _cls, "handle", "J")); \ + handle = (tjhandle)(size_t)(*env)->GetLongField(env, obj, _fid); #ifdef _WIN32 -#define setenv(envvar, value, dummy) _putenv_s(envvar, value) +#define setenv(envvar, value, dummy) _putenv_s(envvar, value) #endif -#define prop2env(property, envvar) \ -{ \ - if((jName=(*env)->NewStringUTF(env, property))!=NULL \ - && (jValue=(*env)->CallStaticObjectMethod(env, cls, mid, jName))!=NULL) \ - { \ - if((value=(*env)->GetStringUTFChars(env, jValue, 0))!=NULL) \ - { \ - setenv(envvar, value, 1); \ - (*env)->ReleaseStringUTFChars(env, jValue, value); \ - } \ - } \ +#define prop2env(property, envvar) { \ + if ((jName = (*env)->NewStringUTF(env, property)) != NULL && \ + (jValue = (*env)->CallStaticObjectMethod(env, cls, mid, \ + jName)) != NULL) { \ + if ((value = (*env)->GetStringUTFChars(env, jValue, 0)) != NULL) { \ + setenv(envvar, value, 1); \ + (*env)->ReleaseStringUTFChars(env, jValue, value); \ + } \ + } \ } int ProcessSystemProperties(JNIEnv *env) { - jclass cls; jmethodID mid; - jstring jName, jValue; - const char *value; - - bailif0(cls=(*env)->FindClass(env, "java/lang/System")); - bailif0(mid=(*env)->GetStaticMethodID(env, cls, "getProperty", - "(Ljava/lang/String;)Ljava/lang/String;")); - - prop2env("turbojpeg.optimize", "TJ_OPTIMIZE"); - prop2env("turbojpeg.arithmetic", "TJ_ARITHMETIC"); - prop2env("turbojpeg.restart", "TJ_RESTART"); - prop2env("turbojpeg.progressive", "TJ_PROGRESSIVE"); - return 0; - - bailout: - return -1; + jclass cls; + jmethodID mid; + jstring jName, jValue; + const char *value; + + bailif0(cls = (*env)->FindClass(env, "java/lang/System")); + bailif0(mid = (*env)->GetStaticMethodID(env, cls, "getProperty", + "(Ljava/lang/String;)Ljava/lang/String;")); + + prop2env("turbojpeg.optimize", "TJ_OPTIMIZE"); + prop2env("turbojpeg.arithmetic", "TJ_ARITHMETIC"); + prop2env("turbojpeg.restart", "TJ_RESTART"); + prop2env("turbojpeg.progressive", "TJ_PROGRESSIVE"); + return 0; + +bailout: + return -1; } /* TurboJPEG 1.2.x: TJ::bufSize() */ JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSize - (JNIEnv *env, jclass cls, jint width, jint height, jint jpegSubsamp) + (JNIEnv *env, jclass cls, jint width, jint height, jint jpegSubsamp) { - jint retval=(jint)tjBufSize(width, height, jpegSubsamp); - if(retval==-1) _throwarg(tjGetErrorStr()); + jint retval = (jint)tjBufSize(width, height, jpegSubsamp); + + if (retval == -1) _throwarg(tjGetErrorStr()); - bailout: - return retval; +bailout: + return retval; } /* TurboJPEG 1.4.x: TJ::bufSizeYUV() */ JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII - (JNIEnv *env, jclass cls, jint width, jint pad, jint height, jint subsamp) + (JNIEnv *env, jclass cls, jint width, jint pad, jint height, jint subsamp) { - jint retval=(jint)tjBufSizeYUV2(width, pad, height, subsamp); - if(retval==-1) _throwarg(tjGetErrorStr()); + jint retval = (jint)tjBufSizeYUV2(width, pad, height, subsamp); - bailout: - return retval; + if (retval == -1) _throwarg(tjGetErrorStr()); + +bailout: + return retval; } /* TurboJPEG 1.2.x: TJ::bufSizeYUV() */ JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__III - (JNIEnv *env, jclass cls, jint width, jint height, jint subsamp) + (JNIEnv *env, jclass cls, jint width, jint height, jint subsamp) { - return Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII(env, cls, width, - 4, height, subsamp); + return Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII(env, cls, width, + 4, height, + subsamp); } /* TurboJPEG 1.4.x: TJ::planeSizeYUV() */ JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_planeSizeYUV__IIIII - (JNIEnv *env, jclass cls, jint componentID, jint width, jint stride, - jint height, jint subsamp) + (JNIEnv *env, jclass cls, jint componentID, jint width, jint stride, + jint height, jint subsamp) { - jint retval=(jint)tjPlaneSizeYUV(componentID, width, stride, height, - subsamp); - if(retval==-1) _throwarg(tjGetErrorStr()); + jint retval = (jint)tjPlaneSizeYUV(componentID, width, stride, height, + subsamp); + + if (retval == -1) _throwarg(tjGetErrorStr()); - bailout: - return retval; +bailout: + return retval; } /* TurboJPEG 1.4.x: TJ::planeWidth() */ JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_planeWidth__III - (JNIEnv *env, jclass cls, jint componentID, jint width, jint subsamp) + (JNIEnv *env, jclass cls, jint componentID, jint width, jint subsamp) { - jint retval=(jint)tjPlaneWidth(componentID, width, subsamp); - if(retval==-1) _throwarg(tjGetErrorStr()); + jint retval = (jint)tjPlaneWidth(componentID, width, subsamp); - bailout: - return retval; + if (retval == -1) _throwarg(tjGetErrorStr()); + +bailout: + return retval; } /* TurboJPEG 1.4.x: TJ::planeHeight() */ JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_planeHeight__III - (JNIEnv *env, jclass cls, jint componentID, jint height, jint subsamp) + (JNIEnv *env, jclass cls, jint componentID, jint height, jint subsamp) { - jint retval=(jint)tjPlaneHeight(componentID, height, subsamp); - if(retval==-1) _throwarg(tjGetErrorStr()); + jint retval = (jint)tjPlaneHeight(componentID, height, subsamp); + + if (retval == -1) _throwarg(tjGetErrorStr()); - bailout: - return retval; +bailout: + return retval; } /* TurboJPEG 1.2.x: TJCompressor::init() */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_init - (JNIEnv *env, jobject obj) + (JNIEnv *env, jobject obj) { - jclass cls; - jfieldID fid; - tjhandle handle; + jclass cls; + jfieldID fid; + tjhandle handle; - if((handle=tjInitCompress())==NULL) - _throwtj(); + if ((handle = tjInitCompress()) == NULL) + _throw(tjGetErrorStr(), "org/libjpegturbo/turbojpeg/TJException"); - bailif0(cls=(*env)->GetObjectClass(env, obj)); - bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J")); - (*env)->SetLongField(env, obj, fid, (size_t)handle); + bailif0(cls = (*env)->GetObjectClass(env, obj)); + bailif0(fid = (*env)->GetFieldID(env, cls, "handle", "J")); + (*env)->SetLongField(env, obj, fid, (size_t)handle); - bailout: - return; +bailout: + return; } static jint TJCompressor_compress - (JNIEnv *env, jobject obj, jarray src, jint srcElementSize, jint x, jint y, - jint width, jint pitch, jint height, jint pf, jbyteArray dst, - jint jpegSubsamp, jint jpegQual, jint flags) + (JNIEnv *env, jobject obj, jarray src, jint srcElementSize, jint x, jint y, + jint width, jint pitch, jint height, jint pf, jbyteArray dst, + jint jpegSubsamp, jint jpegQual, jint flags) { - tjhandle handle=0; - unsigned long jpegSize=0; - jsize arraySize=0, actualPitch; - unsigned char *srcBuf=NULL, *jpegBuf=NULL; - - gethandle(); - - if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1 - || pitch<0) - _throwarg("Invalid argument in compress()"); - if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF) - _throwarg("Mismatch between Java and C API"); - - actualPitch=(pitch==0)? width*tjPixelSize[pf]:pitch; - arraySize=(y+height-1)*actualPitch + (x+width)*tjPixelSize[pf]; - if((*env)->GetArrayLength(env, src)*srcElementSizeGetArrayLength(env, dst)<(jsize)jpegSize) - _throwarg("Destination buffer is not large enough"); - - bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0)); - bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); - - if(ProcessSystemProperties(env)<0) goto bailout; - - if(tjCompress2(handle, &srcBuf[y*actualPitch + x*tjPixelSize[pf]], width, - pitch, height, pf, &jpegBuf, &jpegSize, jpegSubsamp, jpegQual, - flags|TJFLAG_NOREALLOC)==-1) - _throwtj(); - - bailout: - if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0); - if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0); - return (jint)jpegSize; + tjhandle handle = 0; + unsigned long jpegSize = 0; + jsize arraySize = 0, actualPitch; + unsigned char *srcBuf = NULL, *jpegBuf = NULL; + + gethandle(); + + if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF || width < 1 || + height < 1 || pitch < 0) + _throwarg("Invalid argument in compress()"); + if (org_libjpegturbo_turbojpeg_TJ_NUMPF != TJ_NUMPF) + _throwarg("Mismatch between Java and C API"); + + actualPitch = (pitch == 0) ? width * tjPixelSize[pf] : pitch; + arraySize = (y + height - 1) * actualPitch + (x + width) * tjPixelSize[pf]; + if ((*env)->GetArrayLength(env, src) * srcElementSize < arraySize) + _throwarg("Source buffer is not large enough"); + jpegSize = tjBufSize(width, height, jpegSubsamp); + if ((*env)->GetArrayLength(env, dst) < (jsize)jpegSize) + _throwarg("Destination buffer is not large enough"); + + if (ProcessSystemProperties(env) < 0) goto bailout; + + bailif0(srcBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0)); + bailif0(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if (tjCompress2(handle, &srcBuf[y * actualPitch + x * tjPixelSize[pf]], + width, pitch, height, pf, &jpegBuf, &jpegSize, jpegSubsamp, + jpegQual, flags | TJFLAG_NOREALLOC) == -1) + _throwtj(); + +bailout: + if (jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0); + if (srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0); + return (jint)jpegSize; } /* TurboJPEG 1.3.x: TJCompressor::compress() byte source */ JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIIIII_3BIII - (JNIEnv *env, jobject obj, jbyteArray src, jint x, jint y, jint width, - jint pitch, jint height, jint pf, jbyteArray dst, jint jpegSubsamp, - jint jpegQual, jint flags) + (JNIEnv *env, jobject obj, jbyteArray src, jint x, jint y, jint width, + jint pitch, jint height, jint pf, jbyteArray dst, jint jpegSubsamp, + jint jpegQual, jint flags) { - return TJCompressor_compress(env, obj, src, 1, x, y, width, pitch, height, - pf, dst, jpegSubsamp, jpegQual, flags); + return TJCompressor_compress(env, obj, src, 1, x, y, width, pitch, height, + pf, dst, jpegSubsamp, jpegQual, flags); } /* TurboJPEG 1.2.x: TJCompressor::compress() byte source */ JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIII_3BIII - (JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch, - jint height, jint pf, jbyteArray dst, jint jpegSubsamp, jint jpegQual, - jint flags) + (JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch, + jint height, jint pf, jbyteArray dst, jint jpegSubsamp, jint jpegQual, + jint flags) { - return TJCompressor_compress(env, obj, src, 1, 0, 0, width, pitch, height, - pf, dst, jpegSubsamp, jpegQual, flags); + return TJCompressor_compress(env, obj, src, 1, 0, 0, width, pitch, height, + pf, dst, jpegSubsamp, jpegQual, flags); } /* TurboJPEG 1.3.x: TJCompressor::compress() int source */ JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIIIII_3BIII - (JNIEnv *env, jobject obj, jintArray src, jint x, jint y, jint width, - jint stride, jint height, jint pf, jbyteArray dst, jint jpegSubsamp, - jint jpegQual, jint flags) + (JNIEnv *env, jobject obj, jintArray src, jint x, jint y, jint width, + jint stride, jint height, jint pf, jbyteArray dst, jint jpegSubsamp, + jint jpegQual, jint flags) { - if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) - _throwarg("Invalid argument in compress()"); - if(tjPixelSize[pf]!=sizeof(jint)) - _throwarg("Pixel format must be 32-bit when compressing from an integer buffer."); + if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in compress()"); + if (tjPixelSize[pf] != sizeof(jint)) + _throwarg("Pixel format must be 32-bit when compressing from an integer buffer."); - return TJCompressor_compress(env, obj, src, sizeof(jint), x, y, width, - stride*sizeof(jint), height, pf, dst, jpegSubsamp, jpegQual, flags); + return TJCompressor_compress(env, obj, src, sizeof(jint), x, y, width, + stride * sizeof(jint), height, pf, dst, + jpegSubsamp, jpegQual, flags); - bailout: - return 0; +bailout: + return 0; } /* TurboJPEG 1.2.x: TJCompressor::compress() int source */ JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIII_3BIII - (JNIEnv *env, jobject obj, jintArray src, jint width, jint stride, - jint height, jint pf, jbyteArray dst, jint jpegSubsamp, jint jpegQual, - jint flags) + (JNIEnv *env, jobject obj, jintArray src, jint width, jint stride, + jint height, jint pf, jbyteArray dst, jint jpegSubsamp, jint jpegQual, + jint flags) { - if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) - _throwarg("Invalid argument in compress()"); - if(tjPixelSize[pf]!=sizeof(jint)) - _throwarg("Pixel format must be 32-bit when compressing from an integer buffer."); + if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in compress()"); + if (tjPixelSize[pf] != sizeof(jint)) + _throwarg("Pixel format must be 32-bit when compressing from an integer buffer."); - return TJCompressor_compress(env, obj, src, sizeof(jint), 0, 0, width, - stride*sizeof(jint), height, pf, dst, jpegSubsamp, jpegQual, flags); + return TJCompressor_compress(env, obj, src, sizeof(jint), 0, 0, width, + stride * sizeof(jint), height, pf, dst, + jpegSubsamp, jpegQual, flags); - bailout: - return 0; +bailout: + return 0; } /* TurboJPEG 1.4.x: TJCompressor::compressFromYUV() */ JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV___3_3B_3II_3III_3BII - (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets, - jint width, jintArray jSrcStrides, jint height, jint subsamp, - jbyteArray dst, jint jpegQual, jint flags) + (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets, + jint width, jintArray jSrcStrides, jint height, jint subsamp, + jbyteArray dst, jint jpegQual, jint flags) { - tjhandle handle=0; - unsigned long jpegSize=0; - jbyteArray jSrcPlanes[3]={NULL, NULL, NULL}; - const unsigned char *srcPlanes[3]; - unsigned char *jpegBuf=NULL; - int *srcOffsets=NULL, *srcStrides=NULL; - int nc=(subsamp==org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY? 1:3), i; - - gethandle(); - - if(subsamp<0 || subsamp>=org_libjpegturbo_turbojpeg_TJ_NUMSAMP) - _throwarg("Invalid argument in compressFromYUV()"); - if(org_libjpegturbo_turbojpeg_TJ_NUMSAMP!=TJ_NUMSAMP) - _throwarg("Mismatch between Java and C API"); - - if((*env)->GetArrayLength(env, srcobjs)GetArrayLength(env, jSrcOffsets)GetArrayLength(env, jSrcStrides)GetArrayLength(env, dst)<(jsize)jpegSize) - _throwarg("Destination buffer is not large enough"); - - bailif0(srcOffsets=(*env)->GetPrimitiveArrayCritical(env, jSrcOffsets, 0)); - bailif0(srcStrides=(*env)->GetPrimitiveArrayCritical(env, jSrcStrides, 0)); - for(i=0; iGetObjectArrayElement(env, srcobjs, i)); - if((*env)->GetArrayLength(env, jSrcPlanes[i])GetPrimitiveArrayCritical(env, jSrcPlanes[i], - 0)); - srcPlanes[i]=&srcPlanes[i][srcOffsets[i]]; - } - bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); - - if(ProcessSystemProperties(env)<0) goto bailout; - - if(tjCompressFromYUVPlanes(handle, srcPlanes, width, srcStrides, height, - subsamp, &jpegBuf, &jpegSize, jpegQual, flags|TJFLAG_NOREALLOC)==-1) - _throwtj(); - - bailout: - if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0); - for(i=0; iReleasePrimitiveArrayCritical(env, jSrcPlanes[i], - (unsigned char *)srcPlanes[i], 0); - } - if(srcStrides) - (*env)->ReleasePrimitiveArrayCritical(env, jSrcStrides, srcStrides, 0); - if(srcOffsets) - (*env)->ReleasePrimitiveArrayCritical(env, jSrcOffsets, srcOffsets, 0); - return (jint)jpegSize; + tjhandle handle = 0; + unsigned long jpegSize = 0; + jbyteArray jSrcPlanes[3] = { NULL, NULL, NULL }; + const unsigned char *srcPlanes[3]; + unsigned char *jpegBuf = NULL; + int *srcOffsets = NULL, *srcStrides = NULL; + int nc = (subsamp == org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY ? 1 : 3), i; + + gethandle(); + + if (subsamp < 0 || subsamp >= org_libjpegturbo_turbojpeg_TJ_NUMSAMP) + _throwarg("Invalid argument in compressFromYUV()"); + if (org_libjpegturbo_turbojpeg_TJ_NUMSAMP != TJ_NUMSAMP) + _throwarg("Mismatch between Java and C API"); + + if ((*env)->GetArrayLength(env, srcobjs) < nc) + _throwarg("Planes array is too small for the subsampling type"); + if ((*env)->GetArrayLength(env, jSrcOffsets) < nc) + _throwarg("Offsets array is too small for the subsampling type"); + if ((*env)->GetArrayLength(env, jSrcStrides) < nc) + _throwarg("Strides array is too small for the subsampling type"); + + jpegSize = tjBufSize(width, height, subsamp); + if ((*env)->GetArrayLength(env, dst) < (jsize)jpegSize) + _throwarg("Destination buffer is not large enough"); + + if (ProcessSystemProperties(env) < 0) goto bailout; + + bailif0(srcOffsets = (*env)->GetPrimitiveArrayCritical(env, jSrcOffsets, 0)); + bailif0(srcStrides = (*env)->GetPrimitiveArrayCritical(env, jSrcStrides, 0)); + for (i = 0; i < nc; i++) { + int planeSize = tjPlaneSizeYUV(i, width, srcStrides[i], height, subsamp); + int pw = tjPlaneWidth(i, width, subsamp); + + if (planeSize < 0 || pw < 0) + _throwarg(tjGetErrorStr()); + + if (srcOffsets[i] < 0) + _throwarg("Invalid argument in compressFromYUV()"); + if (srcStrides[i] < 0 && srcOffsets[i] - planeSize + pw < 0) + _throwarg("Negative plane stride would cause memory to be accessed below plane boundary"); + + bailif0(jSrcPlanes[i] = (*env)->GetObjectArrayElement(env, srcobjs, i)); + if ((*env)->GetArrayLength(env, jSrcPlanes[i]) < srcOffsets[i] + planeSize) + _throwarg("Source plane is not large enough"); + + bailif0(srcPlanes[i] = + (*env)->GetPrimitiveArrayCritical(env, jSrcPlanes[i], 0)); + srcPlanes[i] = &srcPlanes[i][srcOffsets[i]]; + } + bailif0(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if (tjCompressFromYUVPlanes(handle, srcPlanes, width, srcStrides, height, + subsamp, &jpegBuf, &jpegSize, jpegQual, + flags | TJFLAG_NOREALLOC) == -1) + _throwtj(); + +bailout: + if (jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0); + for (i = 0; i < nc; i++) { + if (srcPlanes[i] && jSrcPlanes[i]) + (*env)->ReleasePrimitiveArrayCritical(env, jSrcPlanes[i], + (unsigned char *)srcPlanes[i], 0); + } + if (srcStrides) + (*env)->ReleasePrimitiveArrayCritical(env, jSrcStrides, srcStrides, 0); + if (srcOffsets) + (*env)->ReleasePrimitiveArrayCritical(env, jSrcOffsets, srcOffsets, 0); + return (jint)jpegSize; } static void TJCompressor_encodeYUV - (JNIEnv *env, jobject obj, jarray src, jint srcElementSize, jint x, jint y, - jint width, jint pitch, jint height, jint pf, jobjectArray dstobjs, - jintArray jDstOffsets, jintArray jDstStrides, jint subsamp, jint flags) + (JNIEnv *env, jobject obj, jarray src, jint srcElementSize, jint x, jint y, + jint width, jint pitch, jint height, jint pf, jobjectArray dstobjs, + jintArray jDstOffsets, jintArray jDstStrides, jint subsamp, jint flags) { - tjhandle handle=0; - jsize arraySize=0, actualPitch; - jbyteArray jDstPlanes[3]={NULL, NULL, NULL}; - unsigned char *srcBuf=NULL, *dstPlanes[3]; - int *dstOffsets=NULL, *dstStrides=NULL; - int nc=(subsamp==org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY? 1:3), i; - - gethandle(); - - if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1 - || pitch<0 || subsamp<0 || subsamp>=org_libjpegturbo_turbojpeg_TJ_NUMSAMP) - _throwarg("Invalid argument in encodeYUV()"); - if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF - || org_libjpegturbo_turbojpeg_TJ_NUMSAMP!=TJ_NUMSAMP) - _throwarg("Mismatch between Java and C API"); - - if((*env)->GetArrayLength(env, dstobjs)GetArrayLength(env, jDstOffsets)GetArrayLength(env, jDstStrides)GetArrayLength(env, src)*srcElementSizeGetPrimitiveArrayCritical(env, jDstOffsets, 0)); - bailif0(dstStrides=(*env)->GetPrimitiveArrayCritical(env, jDstStrides, 0)); - for(i=0; iGetObjectArrayElement(env, dstobjs, i)); - if((*env)->GetArrayLength(env, jDstPlanes[i])GetPrimitiveArrayCritical(env, jDstPlanes[i], - 0)); - dstPlanes[i]=&dstPlanes[i][dstOffsets[i]]; - } - bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0)); - - if(tjEncodeYUVPlanes(handle, &srcBuf[y*actualPitch + x*tjPixelSize[pf]], - width, pitch, height, pf, dstPlanes, dstStrides, subsamp, flags)==-1) - _throwtj(); - - bailout: - if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0); - for(i=0; iReleasePrimitiveArrayCritical(env, jDstPlanes[i], dstPlanes[i], - 0); - } - if(dstStrides) - (*env)->ReleasePrimitiveArrayCritical(env, jDstStrides, dstStrides, 0); - if(dstOffsets) - (*env)->ReleasePrimitiveArrayCritical(env, jDstOffsets, dstOffsets, 0); - return; + tjhandle handle = 0; + jsize arraySize = 0, actualPitch; + jbyteArray jDstPlanes[3] = { NULL, NULL, NULL }; + unsigned char *srcBuf = NULL, *dstPlanes[3]; + int *dstOffsets = NULL, *dstStrides = NULL; + int nc = (subsamp == org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY ? 1 : 3), i; + + gethandle(); + + if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF || width < 1 || + height < 1 || pitch < 0 || subsamp < 0 || + subsamp >= org_libjpegturbo_turbojpeg_TJ_NUMSAMP) + _throwarg("Invalid argument in encodeYUV()"); + if (org_libjpegturbo_turbojpeg_TJ_NUMPF != TJ_NUMPF || + org_libjpegturbo_turbojpeg_TJ_NUMSAMP != TJ_NUMSAMP) + _throwarg("Mismatch between Java and C API"); + + if ((*env)->GetArrayLength(env, dstobjs) < nc) + _throwarg("Planes array is too small for the subsampling type"); + if ((*env)->GetArrayLength(env, jDstOffsets) < nc) + _throwarg("Offsets array is too small for the subsampling type"); + if ((*env)->GetArrayLength(env, jDstStrides) < nc) + _throwarg("Strides array is too small for the subsampling type"); + + actualPitch = (pitch == 0) ? width * tjPixelSize[pf] : pitch; + arraySize = (y + height - 1) * actualPitch + (x + width) * tjPixelSize[pf]; + if ((*env)->GetArrayLength(env, src) * srcElementSize < arraySize) + _throwarg("Source buffer is not large enough"); + + bailif0(dstOffsets = (*env)->GetPrimitiveArrayCritical(env, jDstOffsets, 0)); + bailif0(dstStrides = (*env)->GetPrimitiveArrayCritical(env, jDstStrides, 0)); + for (i = 0; i < nc; i++) { + int planeSize = tjPlaneSizeYUV(i, width, dstStrides[i], height, subsamp); + int pw = tjPlaneWidth(i, width, subsamp); + + if (planeSize < 0 || pw < 0) + _throwarg(tjGetErrorStr()); + + if (dstOffsets[i] < 0) + _throwarg("Invalid argument in encodeYUV()"); + if (dstStrides[i] < 0 && dstOffsets[i] - planeSize + pw < 0) + _throwarg("Negative plane stride would cause memory to be accessed below plane boundary"); + + bailif0(jDstPlanes[i] = (*env)->GetObjectArrayElement(env, dstobjs, i)); + if ((*env)->GetArrayLength(env, jDstPlanes[i]) < dstOffsets[i] + planeSize) + _throwarg("Destination plane is not large enough"); + + bailif0(dstPlanes[i] = + (*env)->GetPrimitiveArrayCritical(env, jDstPlanes[i], 0)); + dstPlanes[i] = &dstPlanes[i][dstOffsets[i]]; + } + bailif0(srcBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0)); + + if (tjEncodeYUVPlanes(handle, &srcBuf[y * actualPitch + x * tjPixelSize[pf]], + width, pitch, height, pf, dstPlanes, dstStrides, + subsamp, flags) == -1) + _throwtj(); + +bailout: + if (srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0); + for (i = 0; i < nc; i++) { + if (dstPlanes[i] && jDstPlanes[i]) + (*env)->ReleasePrimitiveArrayCritical(env, jDstPlanes[i], dstPlanes[i], + 0); + } + if (dstStrides) + (*env)->ReleasePrimitiveArrayCritical(env, jDstStrides, dstStrides, 0); + if (dstOffsets) + (*env)->ReleasePrimitiveArrayCritical(env, jDstOffsets, dstOffsets, 0); } /* TurboJPEG 1.4.x: TJCompressor::encodeYUV() byte source */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIIIII_3_3B_3I_3III - (JNIEnv *env, jobject obj, jbyteArray src, jint x, jint y, jint width, - jint pitch, jint height, jint pf, jobjectArray dstobjs, - jintArray jDstOffsets, jintArray jDstStrides, jint subsamp, jint flags) + (JNIEnv *env, jobject obj, jbyteArray src, jint x, jint y, jint width, + jint pitch, jint height, jint pf, jobjectArray dstobjs, + jintArray jDstOffsets, jintArray jDstStrides, jint subsamp, jint flags) { - TJCompressor_encodeYUV(env, obj, src, 1, x, y, width, pitch, height, pf, - dstobjs, jDstOffsets, jDstStrides, subsamp, flags); + TJCompressor_encodeYUV(env, obj, src, 1, x, y, width, pitch, height, pf, + dstobjs, jDstOffsets, jDstStrides, subsamp, flags); } /* TurboJPEG 1.4.x: TJCompressor::encodeYUV() int source */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIIIII_3_3B_3I_3III - (JNIEnv *env, jobject obj, jintArray src, jint x, jint y, jint width, - jint stride, jint height, jint pf, jobjectArray dstobjs, - jintArray jDstOffsets, jintArray jDstStrides, jint subsamp, jint flags) + (JNIEnv *env, jobject obj, jintArray src, jint x, jint y, jint width, + jint stride, jint height, jint pf, jobjectArray dstobjs, + jintArray jDstOffsets, jintArray jDstStrides, jint subsamp, jint flags) { - if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) - _throwarg("Invalid argument in encodeYUV()"); - if(tjPixelSize[pf]!=sizeof(jint)) - _throwarg("Pixel format must be 32-bit when encoding from an integer buffer."); + if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in encodeYUV()"); + if (tjPixelSize[pf] != sizeof(jint)) + _throwarg("Pixel format must be 32-bit when encoding from an integer buffer."); - TJCompressor_encodeYUV(env, obj, src, sizeof(jint), x, y, width, - stride*sizeof(jint), height, pf, dstobjs, jDstOffsets, jDstStrides, - subsamp, flags); + TJCompressor_encodeYUV(env, obj, src, sizeof(jint), x, y, width, + stride * sizeof(jint), height, pf, dstobjs, + jDstOffsets, jDstStrides, subsamp, flags); - bailout: - return; +bailout: + return; } JNIEXPORT void JNICALL TJCompressor_encodeYUV_12 - (JNIEnv *env, jobject obj, jarray src, jint srcElementSize, jint width, - jint pitch, jint height, jint pf, jbyteArray dst, jint subsamp, jint flags) + (JNIEnv *env, jobject obj, jarray src, jint srcElementSize, jint width, + jint pitch, jint height, jint pf, jbyteArray dst, jint subsamp, jint flags) { - tjhandle handle=0; - jsize arraySize=0; - unsigned char *srcBuf=NULL, *dstBuf=NULL; - - gethandle(); - - if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1 - || pitch<0) - _throwarg("Invalid argument in encodeYUV()"); - if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF) - _throwarg("Mismatch between Java and C API"); - - arraySize=(pitch==0)? width*tjPixelSize[pf]*height:pitch*height; - if((*env)->GetArrayLength(env, src)*srcElementSizeGetArrayLength(env, dst) - <(jsize)tjBufSizeYUV(width, height, subsamp)) - _throwarg("Destination buffer is not large enough"); - - bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0)); - bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); - - if(tjEncodeYUV2(handle, srcBuf, width, pitch, height, pf, dstBuf, subsamp, - flags)==-1) - _throwtj(); - - bailout: - if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0); - if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0); - return; + tjhandle handle = 0; + jsize arraySize = 0; + unsigned char *srcBuf = NULL, *dstBuf = NULL; + + gethandle(); + + if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF || width < 1 || + height < 1 || pitch < 0) + _throwarg("Invalid argument in encodeYUV()"); + if (org_libjpegturbo_turbojpeg_TJ_NUMPF != TJ_NUMPF) + _throwarg("Mismatch between Java and C API"); + + arraySize = (pitch == 0) ? width * tjPixelSize[pf] * height : pitch * height; + if ((*env)->GetArrayLength(env, src) * srcElementSize < arraySize) + _throwarg("Source buffer is not large enough"); + if ((*env)->GetArrayLength(env, dst) < + (jsize)tjBufSizeYUV(width, height, subsamp)) + _throwarg("Destination buffer is not large enough"); + + bailif0(srcBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0)); + bailif0(dstBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if (tjEncodeYUV2(handle, srcBuf, width, pitch, height, pf, dstBuf, subsamp, + flags) == -1) + _throwtj(); + +bailout: + if (dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0); + if (srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0); } /* TurboJPEG 1.2.x: TJCompressor::encodeYUV() byte source */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BII - (JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch, - jint height, jint pf, jbyteArray dst, jint subsamp, jint flags) + (JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch, + jint height, jint pf, jbyteArray dst, jint subsamp, jint flags) { - TJCompressor_encodeYUV_12(env, obj, src, 1, width, pitch, height, pf, dst, - subsamp, flags); + TJCompressor_encodeYUV_12(env, obj, src, 1, width, pitch, height, pf, dst, + subsamp, flags); } /* TurboJPEG 1.2.x: TJCompressor::encodeYUV() int source */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BII - (JNIEnv *env, jobject obj, jintArray src, jint width, jint stride, - jint height, jint pf, jbyteArray dst, jint subsamp, jint flags) + (JNIEnv *env, jobject obj, jintArray src, jint width, jint stride, + jint height, jint pf, jbyteArray dst, jint subsamp, jint flags) { - if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) - _throwarg("Invalid argument in encodeYUV()"); - if(tjPixelSize[pf]!=sizeof(jint)) - _throwarg("Pixel format must be 32-bit when encoding from an integer buffer."); + if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in encodeYUV()"); + if (tjPixelSize[pf] != sizeof(jint)) + _throwarg("Pixel format must be 32-bit when encoding from an integer buffer."); - TJCompressor_encodeYUV_12(env, obj, src, sizeof(jint), width, - stride*sizeof(jint), height, pf, dst, subsamp, flags); + TJCompressor_encodeYUV_12(env, obj, src, sizeof(jint), width, + stride * sizeof(jint), height, pf, dst, subsamp, + flags); - bailout: - return; +bailout: + return; } /* TurboJPEG 1.2.x: TJCompressor::destroy() */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy - (JNIEnv *env, jobject obj) + (JNIEnv *env, jobject obj) { - tjhandle handle=0; + tjhandle handle = 0; - gethandle(); + gethandle(); - if(tjDestroy(handle)==-1) _throwtj(); - (*env)->SetLongField(env, obj, _fid, 0); + if (tjDestroy(handle) == -1) _throwtj(); + (*env)->SetLongField(env, obj, _fid, 0); - bailout: - return; +bailout: + return; } /* TurboJPEG 1.2.x: TJDecompressor::init() */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_init - (JNIEnv *env, jobject obj) + (JNIEnv *env, jobject obj) { - jclass cls; - jfieldID fid; - tjhandle handle; + jclass cls; + jfieldID fid; + tjhandle handle; - if((handle=tjInitDecompress())==NULL) _throwtj(); + if ((handle = tjInitDecompress()) == NULL) + _throw(tjGetErrorStr(), "org/libjpegturbo/turbojpeg/TJException"); - bailif0(cls=(*env)->GetObjectClass(env, obj)); - bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J")); - (*env)->SetLongField(env, obj, fid, (size_t)handle); + bailif0(cls = (*env)->GetObjectClass(env, obj)); + bailif0(fid = (*env)->GetFieldID(env, cls, "handle", "J")); + (*env)->SetLongField(env, obj, fid, (size_t)handle); - bailout: - return; +bailout: + return; } /* TurboJPEG 1.2.x: TJDecompressor::getScalingFactors() */ JNIEXPORT jobjectArray JNICALL Java_org_libjpegturbo_turbojpeg_TJ_getScalingFactors - (JNIEnv *env, jclass cls) + (JNIEnv *env, jclass cls) { - jclass sfcls=NULL; jfieldID fid=0; - tjscalingfactor *sf=NULL; int n=0, i; - jobject sfobj=NULL; - jobjectArray sfjava=NULL; - - if((sf=tjGetScalingFactors(&n))==NULL || n==0) - _throwarg(tjGetErrorStr()); - - bailif0(sfcls=(*env)->FindClass(env, "org/libjpegturbo/turbojpeg/TJScalingFactor")); - bailif0(sfjava=(jobjectArray)(*env)->NewObjectArray(env, n, sfcls, 0)); - - for(i=0; iAllocObject(env, sfcls)); - bailif0(fid=(*env)->GetFieldID(env, sfcls, "num", "I")); - (*env)->SetIntField(env, sfobj, fid, sf[i].num); - bailif0(fid=(*env)->GetFieldID(env, sfcls, "denom", "I")); - (*env)->SetIntField(env, sfobj, fid, sf[i].denom); - (*env)->SetObjectArrayElement(env, sfjava, i, sfobj); - } - - bailout: - return sfjava; + jclass sfcls = NULL; + jfieldID fid = 0; + tjscalingfactor *sf = NULL; + int n = 0, i; + jobject sfobj = NULL; + jobjectArray sfjava = NULL; + + if ((sf = tjGetScalingFactors(&n)) == NULL || n == 0) + _throwarg(tjGetErrorStr()); + + bailif0(sfcls = (*env)->FindClass(env, + "org/libjpegturbo/turbojpeg/TJScalingFactor")); + bailif0(sfjava = (jobjectArray)(*env)->NewObjectArray(env, n, sfcls, 0)); + + for (i = 0; i < n; i++) { + bailif0(sfobj = (*env)->AllocObject(env, sfcls)); + bailif0(fid = (*env)->GetFieldID(env, sfcls, "num", "I")); + (*env)->SetIntField(env, sfobj, fid, sf[i].num); + bailif0(fid = (*env)->GetFieldID(env, sfcls, "denom", "I")); + (*env)->SetIntField(env, sfobj, fid, sf[i].denom); + (*env)->SetObjectArrayElement(env, sfjava, i, sfobj); + } + +bailout: + return sfjava; } /* TurboJPEG 1.2.x: TJDecompressor::decompressHeader() */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader - (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize) + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize) { - tjhandle handle=0; - unsigned char *jpegBuf=NULL; - int width=0, height=0, jpegSubsamp=-1, jpegColorspace=-1; + tjhandle handle = 0; + unsigned char *jpegBuf = NULL; + int width = 0, height = 0, jpegSubsamp = -1, jpegColorspace = -1; - gethandle(); + gethandle(); - if((*env)->GetArrayLength(env, src)GetArrayLength(env, src) < jpegSize) + _throwarg("Source buffer is not large enough"); - bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0)); + bailif0(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0)); - if(tjDecompressHeader3(handle, jpegBuf, (unsigned long)jpegSize, - &width, &height, &jpegSubsamp, &jpegColorspace)==-1) - _throwtj(); + if (tjDecompressHeader3(handle, jpegBuf, (unsigned long)jpegSize, &width, + &height, &jpegSubsamp, &jpegColorspace) == -1) + _throwtj(); - (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); jpegBuf=NULL; + (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); + jpegBuf = NULL; - bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegSubsamp", "I")); - (*env)->SetIntField(env, obj, _fid, jpegSubsamp); - if((_fid=(*env)->GetFieldID(env, _cls, "jpegColorspace", "I"))==0) - (*env)->ExceptionClear(env); - else - (*env)->SetIntField(env, obj, _fid, jpegColorspace); - bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegWidth", "I")); - (*env)->SetIntField(env, obj, _fid, width); - bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegHeight", "I")); - (*env)->SetIntField(env, obj, _fid, height); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "jpegSubsamp", "I")); + (*env)->SetIntField(env, obj, _fid, jpegSubsamp); + if ((_fid = (*env)->GetFieldID(env, _cls, "jpegColorspace", "I")) == 0) + (*env)->ExceptionClear(env); + else + (*env)->SetIntField(env, obj, _fid, jpegColorspace); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "jpegWidth", "I")); + (*env)->SetIntField(env, obj, _fid, width); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "jpegHeight", "I")); + (*env)->SetIntField(env, obj, _fid, height); - bailout: - if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); - return; +bailout: + if (jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); } static void TJDecompressor_decompress - (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jarray dst, - jint dstElementSize, jint x, jint y, jint width, jint pitch, jint height, - jint pf, jint flags) + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jarray dst, + jint dstElementSize, jint x, jint y, jint width, jint pitch, jint height, + jint pf, jint flags) { - tjhandle handle=0; - jsize arraySize=0, actualPitch; - unsigned char *jpegBuf=NULL, *dstBuf=NULL; - - gethandle(); - - if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) - _throwarg("Invalid argument in decompress()"); - if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF) - _throwarg("Mismatch between Java and C API"); - - if((*env)->GetArrayLength(env, src)GetArrayLength(env, dst)*dstElementSizeGetPrimitiveArrayCritical(env, src, 0)); - bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); - - if(tjDecompress2(handle, jpegBuf, (unsigned long)jpegSize, - &dstBuf[y*actualPitch + x*tjPixelSize[pf]], width, pitch, height, pf, - flags)==-1) - _throwtj(); - - bailout: - if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0); - if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); - return; + tjhandle handle = 0; + jsize arraySize = 0, actualPitch; + unsigned char *jpegBuf = NULL, *dstBuf = NULL; + + gethandle(); + + if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in decompress()"); + if (org_libjpegturbo_turbojpeg_TJ_NUMPF != TJ_NUMPF) + _throwarg("Mismatch between Java and C API"); + + if ((*env)->GetArrayLength(env, src) < jpegSize) + _throwarg("Source buffer is not large enough"); + actualPitch = (pitch == 0) ? width * tjPixelSize[pf] : pitch; + arraySize = (y + height - 1) * actualPitch + (x + width) * tjPixelSize[pf]; + if ((*env)->GetArrayLength(env, dst) * dstElementSize < arraySize) + _throwarg("Destination buffer is not large enough"); + + bailif0(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0)); + bailif0(dstBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if (tjDecompress2(handle, jpegBuf, (unsigned long)jpegSize, + &dstBuf[y * actualPitch + x * tjPixelSize[pf]], width, + pitch, height, pf, flags) == -1) + _throwtj(); + +bailout: + if (dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0); + if (jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); } /* TurboJPEG 1.3.x: TJDecompressor::decompress() byte destination */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIIIII - (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst, - jint x, jint y, jint width, jint pitch, jint height, jint pf, jint flags) + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst, + jint x, jint y, jint width, jint pitch, jint height, jint pf, jint flags) { - TJDecompressor_decompress(env, obj, src, jpegSize, dst, 1, x, y, width, - pitch, height, pf, flags); + TJDecompressor_decompress(env, obj, src, jpegSize, dst, 1, x, y, width, + pitch, height, pf, flags); } /* TurboJPEG 1.2.x: TJDecompressor::decompress() byte destination */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIII - (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst, - jint width, jint pitch, jint height, jint pf, jint flags) + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst, + jint width, jint pitch, jint height, jint pf, jint flags) { - TJDecompressor_decompress(env, obj, src, jpegSize, dst, 1, 0, 0, width, - pitch, height, pf, flags); + TJDecompressor_decompress(env, obj, src, jpegSize, dst, 1, 0, 0, width, + pitch, height, pf, flags); } /* TurboJPEG 1.3.x: TJDecompressor::decompress() int destination */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIIIII - (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jintArray dst, - jint x, jint y, jint width, jint stride, jint height, jint pf, jint flags) + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jintArray dst, + jint x, jint y, jint width, jint stride, jint height, jint pf, jint flags) { - if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) - _throwarg("Invalid argument in decompress()"); - if(tjPixelSize[pf]!=sizeof(jint)) - _throwarg("Pixel format must be 32-bit when decompressing to an integer buffer."); + if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in decompress()"); + if (tjPixelSize[pf] != sizeof(jint)) + _throwarg("Pixel format must be 32-bit when decompressing to an integer buffer."); - TJDecompressor_decompress(env, obj, src, jpegSize, dst, sizeof(jint), x, y, - width, stride*sizeof(jint), height, pf, flags); + TJDecompressor_decompress(env, obj, src, jpegSize, dst, sizeof(jint), x, y, + width, stride * sizeof(jint), height, pf, flags); - bailout: - return; +bailout: + return; } /* TurboJPEG 1.2.x: TJDecompressor::decompress() int destination */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIII - (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jintArray dst, - jint width, jint stride, jint height, jint pf, jint flags) + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jintArray dst, + jint width, jint stride, jint height, jint pf, jint flags) { - if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) - _throwarg("Invalid argument in decompress()"); - if(tjPixelSize[pf]!=sizeof(jint)) - _throwarg("Pixel format must be 32-bit when decompressing to an integer buffer."); + if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in decompress()"); + if (tjPixelSize[pf] != sizeof(jint)) + _throwarg("Pixel format must be 32-bit when decompressing to an integer buffer."); - TJDecompressor_decompress(env, obj, src, jpegSize, dst, sizeof(jint), 0, 0, - width, stride*sizeof(jint), height, pf, flags); - - bailout: - return; + TJDecompressor_decompress(env, obj, src, jpegSize, dst, sizeof(jint), 0, 0, + width, stride * sizeof(jint), height, pf, flags); +bailout: + return; } /* TurboJPEG 1.4.x: TJDecompressor::decompressToYUV() */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3_3B_3II_3III - (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, - jobjectArray dstobjs, jintArray jDstOffsets, jint desiredWidth, - jintArray jDstStrides, jint desiredHeight, jint flags) + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, + jobjectArray dstobjs, jintArray jDstOffsets, jint desiredWidth, + jintArray jDstStrides, jint desiredHeight, jint flags) { - tjhandle handle=0; - jbyteArray jDstPlanes[3]={NULL, NULL, NULL}; - unsigned char *jpegBuf=NULL, *dstPlanes[3]; - int *dstOffsets=NULL, *dstStrides=NULL; - int jpegSubsamp=-1, jpegWidth=0, jpegHeight=0; - int nc=0, i, width, height, scaledWidth, scaledHeight, nsf=0; - tjscalingfactor *sf; - - - gethandle(); - - if((*env)->GetArrayLength(env, src)GetFieldID(env, _cls, "jpegSubsamp", "I")); - jpegSubsamp=(int)(*env)->GetIntField(env, obj, _fid); - bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegWidth", "I")); - jpegWidth=(int)(*env)->GetIntField(env, obj, _fid); - bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegHeight", "I")); - jpegHeight=(int)(*env)->GetIntField(env, obj, _fid); - - nc=(jpegSubsamp==org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY? 1:3); - - width=desiredWidth; height=desiredHeight; - if(width==0) width=jpegWidth; - if(height==0) height=jpegHeight; - sf=tjGetScalingFactors(&nsf); - if(!sf || nsf<1) - _throwarg(tjGetErrorStr()); - for(i=0; i=nsf) - _throwarg("Could not scale down to desired image dimensions"); - - bailif0(dstOffsets=(*env)->GetPrimitiveArrayCritical(env, jDstOffsets, 0)); - bailif0(dstStrides=(*env)->GetPrimitiveArrayCritical(env, jDstStrides, 0)); - for(i=0; iGetObjectArrayElement(env, dstobjs, i)); - if((*env)->GetArrayLength(env, jDstPlanes[i])GetPrimitiveArrayCritical(env, jDstPlanes[i], - 0)); - dstPlanes[i]=&dstPlanes[i][dstOffsets[i]]; - } - bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0)); - - if(tjDecompressToYUVPlanes(handle, jpegBuf, (unsigned long)jpegSize, - dstPlanes, desiredWidth, dstStrides, desiredHeight, flags)==-1) - _throwtj(); - - bailout: - if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); - for(i=0; iReleasePrimitiveArrayCritical(env, jDstPlanes[i], dstPlanes[i], - 0); - } - if(dstStrides) - (*env)->ReleasePrimitiveArrayCritical(env, jDstStrides, dstStrides, 0); - if(dstOffsets) - (*env)->ReleasePrimitiveArrayCritical(env, jDstOffsets, dstOffsets, 0); - return; + tjhandle handle = 0; + jbyteArray jDstPlanes[3] = { NULL, NULL, NULL }; + unsigned char *jpegBuf = NULL, *dstPlanes[3]; + int *dstOffsets = NULL, *dstStrides = NULL; + int jpegSubsamp = -1, jpegWidth = 0, jpegHeight = 0; + int nc = 0, i, width, height, scaledWidth, scaledHeight, nsf = 0; + tjscalingfactor *sf; + + gethandle(); + + if ((*env)->GetArrayLength(env, src) < jpegSize) + _throwarg("Source buffer is not large enough"); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "jpegSubsamp", "I")); + jpegSubsamp = (int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "jpegWidth", "I")); + jpegWidth = (int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "jpegHeight", "I")); + jpegHeight = (int)(*env)->GetIntField(env, obj, _fid); + + nc = (jpegSubsamp == org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY ? 1 : 3); + + width = desiredWidth; + height = desiredHeight; + if (width == 0) width = jpegWidth; + if (height == 0) height = jpegHeight; + sf = tjGetScalingFactors(&nsf); + if (!sf || nsf < 1) + _throwarg(tjGetErrorStr()); + for (i = 0; i < nsf; i++) { + scaledWidth = TJSCALED(jpegWidth, sf[i]); + scaledHeight = TJSCALED(jpegHeight, sf[i]); + if (scaledWidth <= width && scaledHeight <= height) + break; + } + if (i >= nsf) + _throwarg("Could not scale down to desired image dimensions"); + + bailif0(dstOffsets = (*env)->GetPrimitiveArrayCritical(env, jDstOffsets, 0)); + bailif0(dstStrides = (*env)->GetPrimitiveArrayCritical(env, jDstStrides, 0)); + for (i = 0; i < nc; i++) { + int planeSize = tjPlaneSizeYUV(i, scaledWidth, dstStrides[i], scaledHeight, + jpegSubsamp); + int pw = tjPlaneWidth(i, scaledWidth, jpegSubsamp); + + if (planeSize < 0 || pw < 0) + _throwarg(tjGetErrorStr()); + + if (dstOffsets[i] < 0) + _throwarg("Invalid argument in decompressToYUV()"); + if (dstStrides[i] < 0 && dstOffsets[i] - planeSize + pw < 0) + _throwarg("Negative plane stride would cause memory to be accessed below plane boundary"); + + bailif0(jDstPlanes[i] = (*env)->GetObjectArrayElement(env, dstobjs, i)); + if ((*env)->GetArrayLength(env, jDstPlanes[i]) < dstOffsets[i] + planeSize) + _throwarg("Destination plane is not large enough"); + + bailif0(dstPlanes[i] = + (*env)->GetPrimitiveArrayCritical(env, jDstPlanes[i], 0)); + dstPlanes[i] = &dstPlanes[i][dstOffsets[i]]; + } + bailif0(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0)); + + if (tjDecompressToYUVPlanes(handle, jpegBuf, (unsigned long)jpegSize, + dstPlanes, desiredWidth, dstStrides, + desiredHeight, flags) == -1) + _throwtj(); + +bailout: + if (jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); + for (i = 0; i < nc; i++) { + if (dstPlanes[i] && jDstPlanes[i]) + (*env)->ReleasePrimitiveArrayCritical(env, jDstPlanes[i], dstPlanes[i], + 0); + } + if (dstStrides) + (*env)->ReleasePrimitiveArrayCritical(env, jDstStrides, dstStrides, 0); + if (dstOffsets) + (*env)->ReleasePrimitiveArrayCritical(env, jDstOffsets, dstOffsets, 0); } /* TurboJPEG 1.2.x: TJDecompressor::decompressToYUV() */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BI - (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst, - jint flags) + (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst, + jint flags) { - tjhandle handle=0; - unsigned char *jpegBuf=NULL, *dstBuf=NULL; - int jpegSubsamp=-1, jpegWidth=0, jpegHeight=0; - - gethandle(); - - if((*env)->GetArrayLength(env, src)GetFieldID(env, _cls, "jpegSubsamp", "I")); - jpegSubsamp=(int)(*env)->GetIntField(env, obj, _fid); - bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegWidth", "I")); - jpegWidth=(int)(*env)->GetIntField(env, obj, _fid); - bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegHeight", "I")); - jpegHeight=(int)(*env)->GetIntField(env, obj, _fid); - if((*env)->GetArrayLength(env, dst) - <(jsize)tjBufSizeYUV(jpegWidth, jpegHeight, jpegSubsamp)) - _throwarg("Destination buffer is not large enough"); - - bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0)); - bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); - - if(tjDecompressToYUV(handle, jpegBuf, (unsigned long)jpegSize, dstBuf, - flags)==-1) - _throwtj(); - - bailout: - if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0); - if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); - return; + tjhandle handle = 0; + unsigned char *jpegBuf = NULL, *dstBuf = NULL; + int jpegSubsamp = -1, jpegWidth = 0, jpegHeight = 0; + + gethandle(); + + if ((*env)->GetArrayLength(env, src) < jpegSize) + _throwarg("Source buffer is not large enough"); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "jpegSubsamp", "I")); + jpegSubsamp = (int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "jpegWidth", "I")); + jpegWidth = (int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "jpegHeight", "I")); + jpegHeight = (int)(*env)->GetIntField(env, obj, _fid); + if ((*env)->GetArrayLength(env, dst) < + (jsize)tjBufSizeYUV(jpegWidth, jpegHeight, jpegSubsamp)) + _throwarg("Destination buffer is not large enough"); + + bailif0(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, src, 0)); + bailif0(dstBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if (tjDecompressToYUV(handle, jpegBuf, (unsigned long)jpegSize, dstBuf, + flags) == -1) + _throwtj(); + +bailout: + if (dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0); + if (jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0); } static void TJDecompressor_decodeYUV - (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets, - jintArray jSrcStrides, jint subsamp, jarray dst, jint dstElementSize, - jint x, jint y, jint width, jint pitch, jint height, jint pf, jint flags) + (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets, + jintArray jSrcStrides, jint subsamp, jarray dst, jint dstElementSize, + jint x, jint y, jint width, jint pitch, jint height, jint pf, jint flags) { - tjhandle handle=0; - jsize arraySize=0, actualPitch; - jbyteArray jSrcPlanes[3]={NULL, NULL, NULL}; - const unsigned char *srcPlanes[3]; - unsigned char *dstBuf=NULL; - int *srcOffsets=NULL, *srcStrides=NULL; - int nc=(subsamp==org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY? 1:3), i; - - gethandle(); - - if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || subsamp<0 - || subsamp>=org_libjpegturbo_turbojpeg_TJ_NUMSAMP) - _throwarg("Invalid argument in decodeYUV()"); - if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF - || org_libjpegturbo_turbojpeg_TJ_NUMSAMP!=TJ_NUMSAMP) - _throwarg("Mismatch between Java and C API"); - - if((*env)->GetArrayLength(env, srcobjs)GetArrayLength(env, jSrcOffsets)GetArrayLength(env, jSrcStrides)GetArrayLength(env, dst)*dstElementSizeGetPrimitiveArrayCritical(env, jSrcOffsets, 0)); - bailif0(srcStrides=(*env)->GetPrimitiveArrayCritical(env, jSrcStrides, 0)); - for(i=0; iGetObjectArrayElement(env, srcobjs, i)); - if((*env)->GetArrayLength(env, jSrcPlanes[i])GetPrimitiveArrayCritical(env, jSrcPlanes[i], - 0)); - srcPlanes[i]=&srcPlanes[i][srcOffsets[i]]; - } - bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0)); - - if(tjDecodeYUVPlanes(handle, srcPlanes, srcStrides, subsamp, - &dstBuf[y*actualPitch + x*tjPixelSize[pf]], width, pitch, height, pf, - flags)==-1) - _throwtj(); - - bailout: - if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0); - for(i=0; iReleasePrimitiveArrayCritical(env, jSrcPlanes[i], - (unsigned char *)srcPlanes[i], 0); - } - if(srcStrides) - (*env)->ReleasePrimitiveArrayCritical(env, jSrcStrides, srcStrides, 0); - if(srcOffsets) - (*env)->ReleasePrimitiveArrayCritical(env, jSrcOffsets, srcOffsets, 0); - return; + tjhandle handle = 0; + jsize arraySize = 0, actualPitch; + jbyteArray jSrcPlanes[3] = { NULL, NULL, NULL }; + const unsigned char *srcPlanes[3]; + unsigned char *dstBuf = NULL; + int *srcOffsets = NULL, *srcStrides = NULL; + int nc = (subsamp == org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY ? 1 : 3), i; + + gethandle(); + + if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF || subsamp < 0 || + subsamp >= org_libjpegturbo_turbojpeg_TJ_NUMSAMP) + _throwarg("Invalid argument in decodeYUV()"); + if (org_libjpegturbo_turbojpeg_TJ_NUMPF != TJ_NUMPF || + org_libjpegturbo_turbojpeg_TJ_NUMSAMP != TJ_NUMSAMP) + _throwarg("Mismatch between Java and C API"); + + if ((*env)->GetArrayLength(env, srcobjs) < nc) + _throwarg("Planes array is too small for the subsampling type"); + if ((*env)->GetArrayLength(env, jSrcOffsets) < nc) + _throwarg("Offsets array is too small for the subsampling type"); + if ((*env)->GetArrayLength(env, jSrcStrides) < nc) + _throwarg("Strides array is too small for the subsampling type"); + + actualPitch = (pitch == 0) ? width * tjPixelSize[pf] : pitch; + arraySize = (y + height - 1) * actualPitch + (x + width) * tjPixelSize[pf]; + if ((*env)->GetArrayLength(env, dst) * dstElementSize < arraySize) + _throwarg("Destination buffer is not large enough"); + + bailif0(srcOffsets = (*env)->GetPrimitiveArrayCritical(env, jSrcOffsets, 0)); + bailif0(srcStrides = (*env)->GetPrimitiveArrayCritical(env, jSrcStrides, 0)); + for (i = 0; i < nc; i++) { + int planeSize = tjPlaneSizeYUV(i, width, srcStrides[i], height, subsamp); + int pw = tjPlaneWidth(i, width, subsamp); + + if (planeSize < 0 || pw < 0) + _throwarg(tjGetErrorStr()); + + if (srcOffsets[i] < 0) + _throwarg("Invalid argument in decodeYUV()"); + if (srcStrides[i] < 0 && srcOffsets[i] - planeSize + pw < 0) + _throwarg("Negative plane stride would cause memory to be accessed below plane boundary"); + + bailif0(jSrcPlanes[i] = (*env)->GetObjectArrayElement(env, srcobjs, i)); + if ((*env)->GetArrayLength(env, jSrcPlanes[i]) < srcOffsets[i] + planeSize) + _throwarg("Source plane is not large enough"); + + bailif0(srcPlanes[i] = + (*env)->GetPrimitiveArrayCritical(env, jSrcPlanes[i], 0)); + srcPlanes[i] = &srcPlanes[i][srcOffsets[i]]; + } + bailif0(dstBuf = (*env)->GetPrimitiveArrayCritical(env, dst, 0)); + + if (tjDecodeYUVPlanes(handle, srcPlanes, srcStrides, subsamp, + &dstBuf[y * actualPitch + x * tjPixelSize[pf]], width, + pitch, height, pf, flags) == -1) + _throwtj(); + +bailout: + if (dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0); + for (i = 0; i < nc; i++) { + if (srcPlanes[i] && jSrcPlanes[i]) + (*env)->ReleasePrimitiveArrayCritical(env, jSrcPlanes[i], + (unsigned char *)srcPlanes[i], 0); + } + if (srcStrides) + (*env)->ReleasePrimitiveArrayCritical(env, jSrcStrides, srcStrides, 0); + if (srcOffsets) + (*env)->ReleasePrimitiveArrayCritical(env, jSrcOffsets, srcOffsets, 0); } /* TurboJPEG 1.4.x: TJDecompressor::decodeYUV() byte destination */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3_3B_3I_3II_3BIIIIIII - (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets, - jintArray jSrcStrides, jint subsamp, jbyteArray dst, jint x, jint y, - jint width, jint pitch, jint height, jint pf, jint flags) + (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets, + jintArray jSrcStrides, jint subsamp, jbyteArray dst, jint x, jint y, + jint width, jint pitch, jint height, jint pf, jint flags) { - TJDecompressor_decodeYUV(env, obj, srcobjs, jSrcOffsets, jSrcStrides, - subsamp, dst, 1, x, y, width, pitch, height, pf, flags); + TJDecompressor_decodeYUV(env, obj, srcobjs, jSrcOffsets, jSrcStrides, + subsamp, dst, 1, x, y, width, pitch, height, pf, + flags); } /* TurboJPEG 1.4.x: TJDecompressor::decodeYUV() int destination */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3_3B_3I_3II_3IIIIIIII - (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets, - jintArray jSrcStrides, jint subsamp, jintArray dst, jint x, jint y, - jint width, jint stride, jint height, jint pf, jint flags) + (JNIEnv *env, jobject obj, jobjectArray srcobjs, jintArray jSrcOffsets, + jintArray jSrcStrides, jint subsamp, jintArray dst, jint x, jint y, + jint width, jint stride, jint height, jint pf, jint flags) { - if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF) - _throwarg("Invalid argument in decodeYUV()"); - if(tjPixelSize[pf]!=sizeof(jint)) - _throwarg("Pixel format must be 32-bit when decoding to an integer buffer."); + if (pf < 0 || pf >= org_libjpegturbo_turbojpeg_TJ_NUMPF) + _throwarg("Invalid argument in decodeYUV()"); + if (tjPixelSize[pf] != sizeof(jint)) + _throwarg("Pixel format must be 32-bit when decoding to an integer buffer."); - TJDecompressor_decodeYUV(env, obj, srcobjs, jSrcOffsets, jSrcStrides, - subsamp, dst, sizeof(jint), x, y, width, stride*sizeof(jint), height, pf, - flags); + TJDecompressor_decodeYUV(env, obj, srcobjs, jSrcOffsets, jSrcStrides, + subsamp, dst, sizeof(jint), x, y, width, + stride * sizeof(jint), height, pf, flags); - bailout: - return; +bailout: + return; } /* TurboJPEG 1.2.x: TJTransformer::init() */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJTransformer_init - (JNIEnv *env, jobject obj) + (JNIEnv *env, jobject obj) { - jclass cls; - jfieldID fid; - tjhandle handle; + jclass cls; + jfieldID fid; + tjhandle handle; - if((handle=tjInitTransform())==NULL) _throwtj(); + if ((handle = tjInitTransform()) == NULL) + _throw(tjGetErrorStr(), "org/libjpegturbo/turbojpeg/TJException"); - bailif0(cls=(*env)->GetObjectClass(env, obj)); - bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J")); - (*env)->SetLongField(env, obj, fid, (size_t)handle); + bailif0(cls = (*env)->GetObjectClass(env, obj)); + bailif0(fid = (*env)->GetFieldID(env, cls, "handle", "J")); + (*env)->SetLongField(env, obj, fid, (size_t)handle); - bailout: - return; +bailout: + return; } -typedef struct _JNICustomFilterParams -{ - JNIEnv *env; - jobject tobj; - jobject cfobj; +typedef struct _JNICustomFilterParams { + JNIEnv *env; + jobject tobj; + jobject cfobj; } JNICustomFilterParams; static int JNICustomFilter(short *coeffs, tjregion arrayRegion, - tjregion planeRegion, int componentIndex, int transformIndex, - tjtransform *transform) + tjregion planeRegion, int componentIndex, + int transformIndex, tjtransform *transform) { - JNICustomFilterParams *params=(JNICustomFilterParams *)transform->data; - JNIEnv *env=params->env; - jobject tobj=params->tobj, cfobj=params->cfobj; - jobject arrayRegionObj, planeRegionObj, bufobj, borobj; - jclass cls; jmethodID mid; jfieldID fid; - - bailif0(bufobj=(*env)->NewDirectByteBuffer(env, coeffs, - sizeof(short)*arrayRegion.w*arrayRegion.h)); - bailif0(cls=(*env)->FindClass(env, "java/nio/ByteOrder")); - bailif0(mid=(*env)->GetStaticMethodID(env, cls, "nativeOrder", - "()Ljava/nio/ByteOrder;")); - bailif0(borobj=(*env)->CallStaticObjectMethod(env, cls, mid)); - bailif0(cls=(*env)->GetObjectClass(env, bufobj)); - bailif0(mid=(*env)->GetMethodID(env, cls, "order", - "(Ljava/nio/ByteOrder;)Ljava/nio/ByteBuffer;")); - (*env)->CallObjectMethod(env, bufobj, mid, borobj); - bailif0(mid=(*env)->GetMethodID(env, cls, "asShortBuffer", - "()Ljava/nio/ShortBuffer;")); - bailif0(bufobj=(*env)->CallObjectMethod(env, bufobj, mid)); - - bailif0(cls=(*env)->FindClass(env, "java/awt/Rectangle")); - bailif0(arrayRegionObj=(*env)->AllocObject(env, cls)); - bailif0(fid=(*env)->GetFieldID(env, cls, "x", "I")); - (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.x); - bailif0(fid=(*env)->GetFieldID(env, cls, "y", "I")); - (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.y); - bailif0(fid=(*env)->GetFieldID(env, cls, "width", "I")); - (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.w); - bailif0(fid=(*env)->GetFieldID(env, cls, "height", "I")); - (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.h); - - bailif0(planeRegionObj=(*env)->AllocObject(env, cls)); - bailif0(fid=(*env)->GetFieldID(env, cls, "x", "I")); - (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.x); - bailif0(fid=(*env)->GetFieldID(env, cls, "y", "I")); - (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.y); - bailif0(fid=(*env)->GetFieldID(env, cls, "width", "I")); - (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.w); - bailif0(fid=(*env)->GetFieldID(env, cls, "height", "I")); - (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.h); - - bailif0(cls=(*env)->GetObjectClass(env, cfobj)); - bailif0(mid=(*env)->GetMethodID(env, cls, "customFilter", - "(Ljava/nio/ShortBuffer;Ljava/awt/Rectangle;Ljava/awt/Rectangle;IILorg/libjpegturbo/turbojpeg/TJTransform;)V")); - (*env)->CallVoidMethod(env, cfobj, mid, bufobj, arrayRegionObj, - planeRegionObj, componentIndex, transformIndex, tobj); - - return 0; - - bailout: - return -1; + JNICustomFilterParams *params = (JNICustomFilterParams *)transform->data; + JNIEnv *env = params->env; + jobject tobj = params->tobj, cfobj = params->cfobj; + jobject arrayRegionObj, planeRegionObj, bufobj, borobj; + jclass cls; + jmethodID mid; + jfieldID fid; + + bailif0(bufobj = (*env)->NewDirectByteBuffer(env, coeffs, + sizeof(short) * arrayRegion.w * arrayRegion.h)); + bailif0(cls = (*env)->FindClass(env, "java/nio/ByteOrder")); + bailif0(mid = (*env)->GetStaticMethodID(env, cls, "nativeOrder", + "()Ljava/nio/ByteOrder;")); + bailif0(borobj = (*env)->CallStaticObjectMethod(env, cls, mid)); + bailif0(cls = (*env)->GetObjectClass(env, bufobj)); + bailif0(mid = (*env)->GetMethodID(env, cls, "order", + "(Ljava/nio/ByteOrder;)Ljava/nio/ByteBuffer;")); + (*env)->CallObjectMethod(env, bufobj, mid, borobj); + bailif0(mid = (*env)->GetMethodID(env, cls, "asShortBuffer", + "()Ljava/nio/ShortBuffer;")); + bailif0(bufobj = (*env)->CallObjectMethod(env, bufobj, mid)); + + bailif0(cls = (*env)->FindClass(env, "java/awt/Rectangle")); + bailif0(arrayRegionObj = (*env)->AllocObject(env, cls)); + bailif0(fid = (*env)->GetFieldID(env, cls, "x", "I")); + (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.x); + bailif0(fid = (*env)->GetFieldID(env, cls, "y", "I")); + (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.y); + bailif0(fid = (*env)->GetFieldID(env, cls, "width", "I")); + (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.w); + bailif0(fid = (*env)->GetFieldID(env, cls, "height", "I")); + (*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.h); + + bailif0(planeRegionObj = (*env)->AllocObject(env, cls)); + bailif0(fid = (*env)->GetFieldID(env, cls, "x", "I")); + (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.x); + bailif0(fid = (*env)->GetFieldID(env, cls, "y", "I")); + (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.y); + bailif0(fid = (*env)->GetFieldID(env, cls, "width", "I")); + (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.w); + bailif0(fid = (*env)->GetFieldID(env, cls, "height", "I")); + (*env)->SetIntField(env, planeRegionObj, fid, planeRegion.h); + + bailif0(cls = (*env)->GetObjectClass(env, cfobj)); + bailif0(mid = (*env)->GetMethodID(env, cls, "customFilter", + "(Ljava/nio/ShortBuffer;Ljava/awt/Rectangle;Ljava/awt/Rectangle;IILorg/libjpegturbo/turbojpeg/TJTransform;)V")); + (*env)->CallVoidMethod(env, cfobj, mid, bufobj, arrayRegionObj, + planeRegionObj, componentIndex, transformIndex, tobj); + + return 0; + +bailout: + return -1; } /* TurboJPEG 1.2.x: TJTransformer::transform() */ JNIEXPORT jintArray JNICALL Java_org_libjpegturbo_turbojpeg_TJTransformer_transform - (JNIEnv *env, jobject obj, jbyteArray jsrcBuf, jint jpegSize, - jobjectArray dstobjs, jobjectArray tobjs, jint flags) + (JNIEnv *env, jobject obj, jbyteArray jsrcBuf, jint jpegSize, + jobjectArray dstobjs, jobjectArray tobjs, jint flags) { - tjhandle handle=0; int i; - unsigned char *jpegBuf=NULL, **dstBufs=NULL; jsize n=0; - unsigned long *dstSizes=NULL; tjtransform *t=NULL; - jbyteArray *jdstBufs=NULL; - int jpegWidth=0, jpegHeight=0, jpegSubsamp; - jintArray jdstSizes=0; jint *dstSizesi=NULL; - JNICustomFilterParams *params=NULL; - - gethandle(); - - if((*env)->GetArrayLength(env, jsrcBuf)GetFieldID(env, _cls, "jpegWidth", "I")); - jpegWidth=(int)(*env)->GetIntField(env, obj, _fid); - bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegHeight", "I")); - jpegHeight=(int)(*env)->GetIntField(env, obj, _fid); - bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegSubsamp", "I")); - jpegSubsamp=(int)(*env)->GetIntField(env, obj, _fid); - - n=(*env)->GetArrayLength(env, dstobjs); - if(n!=(*env)->GetArrayLength(env, tobjs)) - _throwarg("Mismatch between size of transforms array and destination buffers array"); - - if((dstBufs=(unsigned char **)malloc(sizeof(unsigned char *)*n))==NULL) - _throwmem(); - if((jdstBufs=(jbyteArray *)malloc(sizeof(jbyteArray)*n))==NULL) - _throwmem(); - if((dstSizes=(unsigned long *)malloc(sizeof(unsigned long)*n))==NULL) - _throwmem(); - if((t=(tjtransform *)malloc(sizeof(tjtransform)*n))==NULL) - _throwmem(); - if((params=(JNICustomFilterParams *)malloc(sizeof(JNICustomFilterParams)*n)) - ==NULL) - _throwmem(); - for(i=0; iGetObjectArrayElement(env, tobjs, i)); - bailif0(_cls=(*env)->GetObjectClass(env, tobj)); - bailif0(_fid=(*env)->GetFieldID(env, _cls, "op", "I")); - t[i].op=(*env)->GetIntField(env, tobj, _fid); - bailif0(_fid=(*env)->GetFieldID(env, _cls, "options", "I")); - t[i].options=(*env)->GetIntField(env, tobj, _fid); - bailif0(_fid=(*env)->GetFieldID(env, _cls, "x", "I")); - t[i].r.x=(*env)->GetIntField(env, tobj, _fid); - bailif0(_fid=(*env)->GetFieldID(env, _cls, "y", "I")); - t[i].r.y=(*env)->GetIntField(env, tobj, _fid); - bailif0(_fid=(*env)->GetFieldID(env, _cls, "width", "I")); - t[i].r.w=(*env)->GetIntField(env, tobj, _fid); - bailif0(_fid=(*env)->GetFieldID(env, _cls, "height", "I")); - t[i].r.h=(*env)->GetIntField(env, tobj, _fid); - - bailif0(_fid=(*env)->GetFieldID(env, _cls, "cf", - "Lorg/libjpegturbo/turbojpeg/TJCustomFilter;")); - cfobj=(*env)->GetObjectField(env, tobj, _fid); - if(cfobj) - { - params[i].env=env; - params[i].tobj=tobj; - params[i].cfobj=cfobj; - t[i].customFilter=JNICustomFilter; - t[i].data=(void *)¶ms[i]; - } - } - - for(i=0; iGetObjectArrayElement(env, dstobjs, i)); - if((unsigned long)(*env)->GetArrayLength(env, jdstBufs[i]) - GetPrimitiveArrayCritical(env, jsrcBuf, 0)); - for(i=0; iGetPrimitiveArrayCritical(env, jdstBufs[i], 0)); - - if(tjTransform(handle, jpegBuf, jpegSize, n, dstBufs, dstSizes, t, - flags|TJFLAG_NOREALLOC)==-1) - _throwtj(); - - for(i=0; iReleasePrimitiveArrayCritical(env, jdstBufs[i], dstBufs[i], 0); - dstBufs[i]=NULL; - } - (*env)->ReleasePrimitiveArrayCritical(env, jsrcBuf, jpegBuf, 0); - jpegBuf=NULL; - - jdstSizes=(*env)->NewIntArray(env, n); - bailif0(dstSizesi=(*env)->GetIntArrayElements(env, jdstSizes, 0)); - for(i=0; iReleaseIntArrayElements(env, jdstSizes, dstSizesi, 0); - if(dstBufs) - { - for(i=0; iReleasePrimitiveArrayCritical(env, jdstBufs[i], dstBufs[i], 0); - } - free(dstBufs); - } - if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, jsrcBuf, jpegBuf, 0); - if(jdstBufs) free(jdstBufs); - if(dstSizes) free(dstSizes); - if(t) free(t); - return jdstSizes; + tjhandle handle = 0; + unsigned char *jpegBuf = NULL, **dstBufs = NULL; + jsize n = 0; + unsigned long *dstSizes = NULL; + tjtransform *t = NULL; + jbyteArray *jdstBufs = NULL; + int i, jpegWidth = 0, jpegHeight = 0, jpegSubsamp; + jintArray jdstSizes = 0; + jint *dstSizesi = NULL; + JNICustomFilterParams *params = NULL; + + gethandle(); + + if ((*env)->GetArrayLength(env, jsrcBuf) < jpegSize) + _throwarg("Source buffer is not large enough"); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "jpegWidth", "I")); + jpegWidth = (int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "jpegHeight", "I")); + jpegHeight = (int)(*env)->GetIntField(env, obj, _fid); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "jpegSubsamp", "I")); + jpegSubsamp = (int)(*env)->GetIntField(env, obj, _fid); + + n = (*env)->GetArrayLength(env, dstobjs); + if (n != (*env)->GetArrayLength(env, tobjs)) + _throwarg("Mismatch between size of transforms array and destination buffers array"); + + if ((dstBufs = + (unsigned char **)malloc(sizeof(unsigned char *) * n)) == NULL) + _throwmem(); + if ((jdstBufs = (jbyteArray *)malloc(sizeof(jbyteArray) * n)) == NULL) + _throwmem(); + if ((dstSizes = (unsigned long *)malloc(sizeof(unsigned long) * n)) == NULL) + _throwmem(); + if ((t = (tjtransform *)malloc(sizeof(tjtransform) * n)) == NULL) + _throwmem(); + if ((params = (JNICustomFilterParams *)malloc(sizeof(JNICustomFilterParams) * + n)) == NULL) + _throwmem(); + for (i = 0; i < n; i++) { + dstBufs[i] = NULL; jdstBufs[i] = NULL; dstSizes[i] = 0; + memset(&t[i], 0, sizeof(tjtransform)); + memset(¶ms[i], 0, sizeof(JNICustomFilterParams)); + } + + for (i = 0; i < n; i++) { + jobject tobj, cfobj; + + bailif0(tobj = (*env)->GetObjectArrayElement(env, tobjs, i)); + bailif0(_cls = (*env)->GetObjectClass(env, tobj)); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "op", "I")); + t[i].op = (*env)->GetIntField(env, tobj, _fid); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "options", "I")); + t[i].options = (*env)->GetIntField(env, tobj, _fid); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "x", "I")); + t[i].r.x = (*env)->GetIntField(env, tobj, _fid); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "y", "I")); + t[i].r.y = (*env)->GetIntField(env, tobj, _fid); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "width", "I")); + t[i].r.w = (*env)->GetIntField(env, tobj, _fid); + bailif0(_fid = (*env)->GetFieldID(env, _cls, "height", "I")); + t[i].r.h = (*env)->GetIntField(env, tobj, _fid); + + bailif0(_fid = (*env)->GetFieldID(env, _cls, "cf", + "Lorg/libjpegturbo/turbojpeg/TJCustomFilter;")); + cfobj = (*env)->GetObjectField(env, tobj, _fid); + if (cfobj) { + params[i].env = env; + params[i].tobj = tobj; + params[i].cfobj = cfobj; + t[i].customFilter = JNICustomFilter; + t[i].data = (void *)¶ms[i]; + } + } + + for (i = 0; i < n; i++) { + int w = jpegWidth, h = jpegHeight; + + if (t[i].r.w != 0) w = t[i].r.w; + if (t[i].r.h != 0) h = t[i].r.h; + bailif0(jdstBufs[i] = (*env)->GetObjectArrayElement(env, dstobjs, i)); + if ((unsigned long)(*env)->GetArrayLength(env, jdstBufs[i]) < + tjBufSize(w, h, jpegSubsamp)) + _throwarg("Destination buffer is not large enough"); + } + bailif0(jpegBuf = (*env)->GetPrimitiveArrayCritical(env, jsrcBuf, 0)); + for (i = 0; i < n; i++) + bailif0(dstBufs[i] = + (*env)->GetPrimitiveArrayCritical(env, jdstBufs[i], 0)); + + if (tjTransform(handle, jpegBuf, jpegSize, n, dstBufs, dstSizes, t, + flags | TJFLAG_NOREALLOC) == -1) + _throwtj(); + + for (i = 0; i < n; i++) { + (*env)->ReleasePrimitiveArrayCritical(env, jdstBufs[i], dstBufs[i], 0); + dstBufs[i] = NULL; + } + (*env)->ReleasePrimitiveArrayCritical(env, jsrcBuf, jpegBuf, 0); + jpegBuf = NULL; + + jdstSizes = (*env)->NewIntArray(env, n); + bailif0(dstSizesi = (*env)->GetIntArrayElements(env, jdstSizes, 0)); + for (i = 0; i < n; i++) dstSizesi[i] = (int)dstSizes[i]; + +bailout: + if (dstSizesi) (*env)->ReleaseIntArrayElements(env, jdstSizes, dstSizesi, 0); + if (dstBufs) { + for (i = 0; i < n; i++) { + if (dstBufs[i] && jdstBufs && jdstBufs[i]) + (*env)->ReleasePrimitiveArrayCritical(env, jdstBufs[i], dstBufs[i], 0); + } + free(dstBufs); + } + if (jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, jsrcBuf, jpegBuf, 0); + if (jdstBufs) free(jdstBufs); + if (dstSizes) free(dstSizes); + if (t) free(t); + return jdstSizes; } /* TurboJPEG 1.2.x: TJDecompressor::destroy() */ JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy - (JNIEnv *env, jobject obj) + (JNIEnv *env, jobject obj) { - Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy(env, obj); + Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy(env, obj); } diff --git a/turbojpeg-mapfile b/turbojpeg-mapfile index 35d55ae..5477fed 100755 --- a/turbojpeg-mapfile +++ b/turbojpeg-mapfile @@ -1,56 +1,65 @@ TURBOJPEG_1.0 { - global: - tjInitCompress; - tjCompress; - TJBUFSIZE; - tjInitDecompress; - tjDecompressHeader; - tjDecompress; - tjDestroy; - tjGetErrorStr; - local: - *; + global: + tjInitCompress; + tjCompress; + TJBUFSIZE; + tjInitDecompress; + tjDecompressHeader; + tjDecompress; + tjDestroy; + tjGetErrorStr; + local: + *; }; TURBOJPEG_1.1 { - global: - TJBUFSIZEYUV; - tjDecompressHeader2; - tjDecompressToYUV; - tjEncodeYUV; + global: + TJBUFSIZEYUV; + tjDecompressHeader2; + tjDecompressToYUV; + tjEncodeYUV; } TURBOJPEG_1.0; TURBOJPEG_1.2 { - global: - tjAlloc; - tjBufSize; - tjBufSizeYUV; - tjCompress2; - tjDecompress2; - tjEncodeYUV2; - tjFree; - tjGetScalingFactors; - tjInitTransform; - tjTransform; + global: + tjAlloc; + tjBufSize; + tjBufSizeYUV; + tjCompress2; + tjDecompress2; + tjEncodeYUV2; + tjFree; + tjGetScalingFactors; + tjInitTransform; + tjTransform; } TURBOJPEG_1.1; TURBOJPEG_1.4 { - global: - tjBufSizeYUV2; - tjCompressFromYUV; - tjCompressFromYUVPlanes; - tjDecodeYUV; - tjDecodeYUVPlanes; - tjDecompressHeader3; - tjDecompressToYUV2; - tjDecompressToYUVPlanes; - tjEncodeYUV3; - tjEncodeYUVPlanes; - tjPlaneHeight; - tjPlaneSizeYUV; - tjPlaneWidth; + global: + tjBufSizeYUV2; + tjCompressFromYUV; + tjCompressFromYUVPlanes; + tjDecodeYUV; + tjDecodeYUVPlanes; + tjDecompressHeader3; + tjDecompressToYUV2; + tjDecompressToYUVPlanes; + tjEncodeYUV3; + tjEncodeYUVPlanes; + tjPlaneHeight; + tjPlaneSizeYUV; + tjPlaneWidth; } TURBOJPEG_1.2; + +TURBOJPEG_2.0 +{ + global: + tjGetErrorCode; + tjGetErrorStr2; + tjLoadImage; + tjSaveImage; +} TURBOJPEG_1.4; diff --git a/turbojpeg-mapfile.jni b/turbojpeg-mapfile.jni index 9c1d25b..4432791 100755 --- a/turbojpeg-mapfile.jni +++ b/turbojpeg-mapfile.jni @@ -1,92 +1,101 @@ TURBOJPEG_1.0 { - global: - tjInitCompress; - tjCompress; - TJBUFSIZE; - tjInitDecompress; - tjDecompressHeader; - tjDecompress; - tjDestroy; - tjGetErrorStr; - local: - *; + global: + tjInitCompress; + tjCompress; + TJBUFSIZE; + tjInitDecompress; + tjDecompressHeader; + tjDecompress; + tjDestroy; + tjGetErrorStr; + local: + *; }; TURBOJPEG_1.1 { - global: - TJBUFSIZEYUV; - tjDecompressHeader2; - tjDecompressToYUV; - tjEncodeYUV; + global: + TJBUFSIZEYUV; + tjDecompressHeader2; + tjDecompressToYUV; + tjEncodeYUV; } TURBOJPEG_1.0; TURBOJPEG_1.2 { - global: - tjAlloc; - tjBufSize; - tjBufSizeYUV; - tjCompress2; - tjDecompress2; - tjEncodeYUV2; - tjFree; - tjGetScalingFactors; - tjInitTransform; - tjTransform; - Java_org_libjpegturbo_turbojpeg_TJ_bufSize; - Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__III; - Java_org_libjpegturbo_turbojpeg_TJ_getScalingFactors; - Java_org_libjpegturbo_turbojpeg_TJCompressor_init; - Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIII_3BIII; - Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIII_3BIII; - Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BII; - Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BII; - Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy; - Java_org_libjpegturbo_turbojpeg_TJDecompressor_init; - Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader; - Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIII; - Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIII; - Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BI; - Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy; - Java_org_libjpegturbo_turbojpeg_TJTransformer_init; - Java_org_libjpegturbo_turbojpeg_TJTransformer_transform; + global: + tjAlloc; + tjBufSize; + tjBufSizeYUV; + tjCompress2; + tjDecompress2; + tjEncodeYUV2; + tjFree; + tjGetScalingFactors; + tjInitTransform; + tjTransform; + Java_org_libjpegturbo_turbojpeg_TJ_bufSize; + Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__III; + Java_org_libjpegturbo_turbojpeg_TJ_getScalingFactors; + Java_org_libjpegturbo_turbojpeg_TJCompressor_init; + Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIII_3BIII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIII_3BIII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_init; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIII; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIII; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BI; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy; + Java_org_libjpegturbo_turbojpeg_TJTransformer_init; + Java_org_libjpegturbo_turbojpeg_TJTransformer_transform; } TURBOJPEG_1.1; TURBOJPEG_1.3 { - global: - Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIIIII_3BIII; - Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIIIII_3BIII; - Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIIIII; - Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIIIII; + global: + Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIIIII_3BIII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIIIII_3BIII; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIIIII; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIIIII; } TURBOJPEG_1.2; TURBOJPEG_1.4 { - global: - tjBufSizeYUV2; - tjCompressFromYUV; - tjCompressFromYUVPlanes; - tjDecodeYUV; - tjDecodeYUVPlanes; - tjDecompressHeader3; - tjDecompressToYUV2; - tjDecompressToYUVPlanes; - tjEncodeYUV3; - tjEncodeYUVPlanes; - tjPlaneHeight; - tjPlaneSizeYUV; - tjPlaneWidth; - Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII; - Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV___3_3B_3II_3III_3BII; - Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIIIII_3_3B_3I_3III; - Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIIIII_3_3B_3I_3III; - Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3_3B_3II_3III; - Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3_3B_3I_3II_3BIIIIIII; - Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3_3B_3I_3II_3IIIIIIII; - Java_org_libjpegturbo_turbojpeg_TJ_planeHeight__III; - Java_org_libjpegturbo_turbojpeg_TJ_planeSizeYUV__IIIII; - Java_org_libjpegturbo_turbojpeg_TJ_planeWidth__III; + global: + tjBufSizeYUV2; + tjCompressFromYUV; + tjCompressFromYUVPlanes; + tjDecodeYUV; + tjDecodeYUVPlanes; + tjDecompressHeader3; + tjDecompressToYUV2; + tjDecompressToYUVPlanes; + tjEncodeYUV3; + tjEncodeYUVPlanes; + tjPlaneHeight; + tjPlaneSizeYUV; + tjPlaneWidth; + Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV___3_3B_3II_3III_3BII; + Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIIIII_3_3B_3I_3III; + Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIIIII_3_3B_3I_3III; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3_3B_3II_3III; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3_3B_3I_3II_3BIIIIIII; + Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3_3B_3I_3II_3IIIIIIII; + Java_org_libjpegturbo_turbojpeg_TJ_planeHeight__III; + Java_org_libjpegturbo_turbojpeg_TJ_planeSizeYUV__IIIII; + Java_org_libjpegturbo_turbojpeg_TJ_planeWidth__III; } TURBOJPEG_1.3; + +TURBOJPEG_2.0 +{ + global: + tjGetErrorCode; + tjGetErrorStr2; + tjLoadImage; + tjSaveImage; +} TURBOJPEG_1.4; diff --git a/turbojpeg.c b/turbojpeg.c index 662c68f..90a9ce6 100644 --- a/turbojpeg.c +++ b/turbojpeg.c @@ -1,5 +1,5 @@ /* - * Copyright (C)2009-2017 D. R. Commander. All Rights Reserved. + * Copyright (C)2009-2018 D. R. Commander. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,525 +37,372 @@ #include #include #include +#include #include "./turbojpeg.h" #include "./tjutil.h" #include "transupp.h" #include "./jpegcomp.h" +#include "./cdjpeg.h" -extern void jpeg_mem_dest_tj(j_compress_ptr, unsigned char **, - unsigned long *, boolean); +extern void jpeg_mem_dest_tj(j_compress_ptr, unsigned char **, unsigned long *, + boolean); extern void jpeg_mem_src_tj(j_decompress_ptr, const unsigned char *, - unsigned long); + unsigned long); -#define PAD(v, p) ((v+(p)-1)&(~((p)-1))) -#define isPow2(x) (((x)&(x-1))==0) +#define PAD(v, p) ((v + (p) - 1) & (~((p) - 1))) +#define isPow2(x) (((x) & (x - 1)) == 0) -/* Error handling (based on example in example.c) */ +/* Error handling (based on example in example.txt) */ -static char errStr[JMSG_LENGTH_MAX]="No error"; +static char errStr[JMSG_LENGTH_MAX] = "No error"; -struct my_error_mgr -{ - struct jpeg_error_mgr pub; - jmp_buf setjmp_buffer; - void (*emit_message)(j_common_ptr, int); - boolean warning; +struct my_error_mgr { + struct jpeg_error_mgr pub; + jmp_buf setjmp_buffer; + void (*emit_message) (j_common_ptr, int); + boolean warning, stopOnWarning; }; typedef struct my_error_mgr *my_error_ptr; +#define JMESSAGE(code, string) string, +static const char *turbojpeg_message_table[] = { +#include "cderror.h" + NULL +}; + static void my_error_exit(j_common_ptr cinfo) { - my_error_ptr myerr=(my_error_ptr)cinfo->err; - (*cinfo->err->output_message)(cinfo); - longjmp(myerr->setjmp_buffer, 1); + my_error_ptr myerr = (my_error_ptr)cinfo->err; + + (*cinfo->err->output_message) (cinfo); + longjmp(myerr->setjmp_buffer, 1); } /* Based on output_message() in jerror.c */ static void my_output_message(j_common_ptr cinfo) { - (*cinfo->err->format_message)(cinfo, errStr); + (*cinfo->err->format_message) (cinfo, errStr); } static void my_emit_message(j_common_ptr cinfo, int msg_level) { - my_error_ptr myerr=(my_error_ptr)cinfo->err; - myerr->emit_message(cinfo, msg_level); - if(msg_level<0) myerr->warning=TRUE; + my_error_ptr myerr = (my_error_ptr)cinfo->err; + + myerr->emit_message(cinfo, msg_level); + if (msg_level < 0) { + myerr->warning = TRUE; + if (myerr->stopOnWarning) longjmp(myerr->setjmp_buffer, 1); + } } /* Global structures, macros, etc. */ -enum {COMPRESS=1, DECOMPRESS=2}; +enum { COMPRESS = 1, DECOMPRESS = 2 }; -typedef struct _tjinstance -{ - struct jpeg_compress_struct cinfo; - struct jpeg_decompress_struct dinfo; - struct my_error_mgr jerr; - int init, headerRead; +typedef struct _tjinstance { + struct jpeg_compress_struct cinfo; + struct jpeg_decompress_struct dinfo; + struct my_error_mgr jerr; + int init, headerRead; + char errStr[JMSG_LENGTH_MAX]; + boolean isInstanceError; } tjinstance; -static const int pixelsize[TJ_NUMSAMP]={3, 3, 3, 1, 3, 3}; +static const int pixelsize[TJ_NUMSAMP] = { 3, 3, 3, 1, 3, 3 }; -static const JXFORM_CODE xformtypes[TJ_NUMXOP]= -{ - JXFORM_NONE, JXFORM_FLIP_H, JXFORM_FLIP_V, JXFORM_TRANSPOSE, - JXFORM_TRANSVERSE, JXFORM_ROT_90, JXFORM_ROT_180, JXFORM_ROT_270 +static const JXFORM_CODE xformtypes[TJ_NUMXOP] = { + JXFORM_NONE, JXFORM_FLIP_H, JXFORM_FLIP_V, JXFORM_TRANSPOSE, + JXFORM_TRANSVERSE, JXFORM_ROT_90, JXFORM_ROT_180, JXFORM_ROT_270 }; -#define NUMSF 16 -static const tjscalingfactor sf[NUMSF]={ - {2, 1}, - {15, 8}, - {7, 4}, - {13, 8}, - {3, 2}, - {11, 8}, - {5, 4}, - {9, 8}, - {1, 1}, - {7, 8}, - {3, 4}, - {5, 8}, - {1, 2}, - {3, 8}, - {1, 4}, - {1, 8} +#define NUMSF 16 +static const tjscalingfactor sf[NUMSF] = { + { 2, 1 }, + { 15, 8 }, + { 7, 4 }, + { 13, 8 }, + { 3, 2 }, + { 11, 8 }, + { 5, 4 }, + { 9, 8 }, + { 1, 1 }, + { 7, 8 }, + { 3, 4 }, + { 5, 8 }, + { 1, 2 }, + { 3, 8 }, + { 1, 4 }, + { 1, 8 } }; -#define _throw(m) {snprintf(errStr, JMSG_LENGTH_MAX, "%s", m); \ - retval=-1; goto bailout;} -#define getinstance(handle) tjinstance *this=(tjinstance *)handle; \ - j_compress_ptr cinfo=NULL; j_decompress_ptr dinfo=NULL; \ - if(!this) {snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \ - return -1;} \ - cinfo=&this->cinfo; dinfo=&this->dinfo; \ - this->jerr.warning=FALSE; -#define getcinstance(handle) tjinstance *this=(tjinstance *)handle; \ - j_compress_ptr cinfo=NULL; \ - if(!this) {snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \ - return -1;} \ - cinfo=&this->cinfo; \ - this->jerr.warning=FALSE; -#define getdinstance(handle) tjinstance *this=(tjinstance *)handle; \ - j_decompress_ptr dinfo=NULL; \ - if(!this) {snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \ - return -1;} \ - dinfo=&this->dinfo; \ - this->jerr.warning=FALSE; +static J_COLOR_SPACE pf2cs[TJ_NUMPF] = { + JCS_EXT_RGB, JCS_EXT_BGR, JCS_EXT_RGBX, JCS_EXT_BGRX, JCS_EXT_XBGR, + JCS_EXT_XRGB, JCS_GRAYSCALE, JCS_EXT_RGBA, JCS_EXT_BGRA, JCS_EXT_ABGR, + JCS_EXT_ARGB, JCS_CMYK +}; + +static int cs2pf[JPEG_NUMCS] = { + TJPF_UNKNOWN, TJPF_GRAY, +#if RGB_RED == 0 && RGB_GREEN == 1 && RGB_BLUE == 2 && RGB_PIXELSIZE == 3 + TJPF_RGB, +#elif RGB_RED == 2 && RGB_GREEN == 1 && RGB_BLUE == 0 && RGB_PIXELSIZE == 3 + TJPF_BGR, +#elif RGB_RED == 0 && RGB_GREEN == 1 && RGB_BLUE == 2 && RGB_PIXELSIZE == 4 + TJPF_RGBX, +#elif RGB_RED == 2 && RGB_GREEN == 1 && RGB_BLUE == 0 && RGB_PIXELSIZE == 4 + TJPF_BGRX, +#elif RGB_RED == 3 && RGB_GREEN == 2 && RGB_BLUE == 1 && RGB_PIXELSIZE == 4 + TJPF_XBGR, +#elif RGB_RED == 1 && RGB_GREEN == 2 && RGB_BLUE == 3 && RGB_PIXELSIZE == 4 + TJPF_XRGB, +#endif + TJPF_UNKNOWN, TJPF_CMYK, TJPF_UNKNOWN, TJPF_RGB, TJPF_RGBX, TJPF_BGR, + TJPF_BGRX, TJPF_XBGR, TJPF_XRGB, TJPF_RGBA, TJPF_BGRA, TJPF_ABGR, TJPF_ARGB, + TJPF_UNKNOWN +}; + +#define _throwg(m) { \ + snprintf(errStr, JMSG_LENGTH_MAX, "%s", m); \ + retval = -1; goto bailout; \ +} +#define _throwunix(m) { \ + snprintf(errStr, JMSG_LENGTH_MAX, "%s\n%s", m, strerror(errno)); \ + retval = -1; goto bailout; \ +} +#define _throw(m) { \ + snprintf(this->errStr, JMSG_LENGTH_MAX, "%s", m); \ + this->isInstanceError = TRUE; _throwg(m) \ +} + +#define getinstance(handle) \ + tjinstance *this = (tjinstance *)handle; \ + j_compress_ptr cinfo = NULL; \ + j_decompress_ptr dinfo = NULL; \ + \ + if (!this) { \ + snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \ + return -1; \ + } \ + cinfo = &this->cinfo; dinfo = &this->dinfo; \ + this->jerr.warning = FALSE; \ + this->isInstanceError = FALSE; + +#define getcinstance(handle) \ + tjinstance *this = (tjinstance *)handle; \ + j_compress_ptr cinfo = NULL; \ + \ + if (!this) { \ + snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \ + return -1; \ + } \ + cinfo = &this->cinfo; \ + this->jerr.warning = FALSE; \ + this->isInstanceError = FALSE; + +#define getdinstance(handle) \ + tjinstance *this = (tjinstance *)handle; \ + j_decompress_ptr dinfo = NULL; \ + \ + if (!this) { \ + snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle"); \ + return -1; \ + } \ + dinfo = &this->dinfo; \ + this->jerr.warning = FALSE; \ + this->isInstanceError = FALSE; static int getPixelFormat(int pixelSize, int flags) { - if(pixelSize==1) return TJPF_GRAY; - if(pixelSize==3) - { - if(flags&TJ_BGR) return TJPF_BGR; - else return TJPF_RGB; - } - if(pixelSize==4) - { - if(flags&TJ_ALPHAFIRST) - { - if(flags&TJ_BGR) return TJPF_XBGR; - else return TJPF_XRGB; - } - else - { - if(flags&TJ_BGR) return TJPF_BGRX; - else return TJPF_RGBX; - } - } - return -1; + if (pixelSize == 1) return TJPF_GRAY; + if (pixelSize == 3) { + if (flags & TJ_BGR) return TJPF_BGR; + else return TJPF_RGB; + } + if (pixelSize == 4) { + if (flags & TJ_ALPHAFIRST) { + if (flags & TJ_BGR) return TJPF_XBGR; + else return TJPF_XRGB; + } else { + if (flags & TJ_BGR) return TJPF_BGRX; + else return TJPF_RGBX; + } + } + return -1; } -static int setCompDefaults(struct jpeg_compress_struct *cinfo, - int pixelFormat, int subsamp, int jpegQual, int flags) +static int setCompDefaults(struct jpeg_compress_struct *cinfo, int pixelFormat, + int subsamp, int jpegQual, int flags) { - int retval=0; - char *env=NULL; - - switch(pixelFormat) - { - case TJPF_GRAY: - cinfo->in_color_space=JCS_GRAYSCALE; break; - #if JCS_EXTENSIONS==1 - case TJPF_RGB: - cinfo->in_color_space=JCS_EXT_RGB; break; - case TJPF_BGR: - cinfo->in_color_space=JCS_EXT_BGR; break; - case TJPF_RGBX: - case TJPF_RGBA: - cinfo->in_color_space=JCS_EXT_RGBX; break; - case TJPF_BGRX: - case TJPF_BGRA: - cinfo->in_color_space=JCS_EXT_BGRX; break; - case TJPF_XRGB: - case TJPF_ARGB: - cinfo->in_color_space=JCS_EXT_XRGB; break; - case TJPF_XBGR: - case TJPF_ABGR: - cinfo->in_color_space=JCS_EXT_XBGR; break; - #else - case TJPF_RGB: - case TJPF_BGR: - case TJPF_RGBX: - case TJPF_BGRX: - case TJPF_XRGB: - case TJPF_XBGR: - case TJPF_RGBA: - case TJPF_BGRA: - case TJPF_ARGB: - case TJPF_ABGR: - cinfo->in_color_space=JCS_RGB; pixelFormat=TJPF_RGB; - break; - #endif - case TJPF_CMYK: - cinfo->in_color_space=JCS_CMYK; break; - } - - cinfo->input_components=tjPixelSize[pixelFormat]; - jpeg_set_defaults(cinfo); + int retval = 0; + char *env = NULL; + + cinfo->in_color_space = pf2cs[pixelFormat]; + cinfo->input_components = tjPixelSize[pixelFormat]; + jpeg_set_defaults(cinfo); #ifndef NO_GETENV - if((env=getenv("TJ_OPTIMIZE"))!=NULL && strlen(env)>0 && !strcmp(env, "1")) - cinfo->optimize_coding=TRUE; - if((env=getenv("TJ_ARITHMETIC"))!=NULL && strlen(env)>0 && !strcmp(env, "1")) - cinfo->arith_code=TRUE; - if((env=getenv("TJ_RESTART"))!=NULL && strlen(env)>0) - { - int temp=-1; char tempc=0; - if(sscanf(env, "%d%c", &temp, &tempc)>=1 && temp>=0 && temp<=65535) - { - if(toupper(tempc)=='B') - { - cinfo->restart_interval=temp; - cinfo->restart_in_rows=0; - } - else - cinfo->restart_in_rows=temp; - } - } + if ((env = getenv("TJ_OPTIMIZE")) != NULL && strlen(env) > 0 && + !strcmp(env, "1")) + cinfo->optimize_coding = TRUE; + if ((env = getenv("TJ_ARITHMETIC")) != NULL && strlen(env) > 0 && + !strcmp(env, "1")) + cinfo->arith_code = TRUE; + if ((env = getenv("TJ_RESTART")) != NULL && strlen(env) > 0) { + int temp = -1; + char tempc = 0; + + if (sscanf(env, "%d%c", &temp, &tempc) >= 1 && temp >= 0 && + temp <= 65535) { + if (toupper(tempc) == 'B') { + cinfo->restart_interval = temp; + cinfo->restart_in_rows = 0; + } else + cinfo->restart_in_rows = temp; + } + } #endif - if(jpegQual>=0) - { - jpeg_set_quality(cinfo, jpegQual, TRUE); - if(jpegQual>=96 || flags&TJFLAG_ACCURATEDCT) cinfo->dct_method=JDCT_ISLOW; - else cinfo->dct_method=JDCT_FASTEST; - } - if(subsamp==TJSAMP_GRAY) - jpeg_set_colorspace(cinfo, JCS_GRAYSCALE); - else if(pixelFormat==TJPF_CMYK) - jpeg_set_colorspace(cinfo, JCS_YCCK); - else jpeg_set_colorspace(cinfo, JCS_YCbCr); - + if (jpegQual >= 0) { + jpeg_set_quality(cinfo, jpegQual, TRUE); + if (jpegQual >= 96 || flags & TJFLAG_ACCURATEDCT) + cinfo->dct_method = JDCT_ISLOW; + else + cinfo->dct_method = JDCT_FASTEST; + } + if (subsamp == TJSAMP_GRAY) + jpeg_set_colorspace(cinfo, JCS_GRAYSCALE); + else if (pixelFormat == TJPF_CMYK) + jpeg_set_colorspace(cinfo, JCS_YCCK); + else + jpeg_set_colorspace(cinfo, JCS_YCbCr); + + if (flags & TJFLAG_PROGRESSIVE) + jpeg_simple_progression(cinfo); #ifndef NO_GETENV - if((env=getenv("TJ_PROGRESSIVE"))!=NULL && strlen(env)>0 - && !strcmp(env, "1")) - jpeg_simple_progression(cinfo); + else if ((env = getenv("TJ_PROGRESSIVE")) != NULL && strlen(env) > 0 && + !strcmp(env, "1")) + jpeg_simple_progression(cinfo); #endif - cinfo->comp_info[0].h_samp_factor=tjMCUWidth[subsamp]/8; - cinfo->comp_info[1].h_samp_factor=1; - cinfo->comp_info[2].h_samp_factor=1; - if(cinfo->num_components>3) - cinfo->comp_info[3].h_samp_factor=tjMCUWidth[subsamp]/8; - cinfo->comp_info[0].v_samp_factor=tjMCUHeight[subsamp]/8; - cinfo->comp_info[1].v_samp_factor=1; - cinfo->comp_info[2].v_samp_factor=1; - if(cinfo->num_components>3) - cinfo->comp_info[3].v_samp_factor=tjMCUHeight[subsamp]/8; - - return retval; -} - -static int setDecompDefaults(struct jpeg_decompress_struct *dinfo, - int pixelFormat, int flags) -{ - int retval=0; - - switch(pixelFormat) - { - case TJPF_GRAY: - dinfo->out_color_space=JCS_GRAYSCALE; break; - #if JCS_EXTENSIONS==1 - case TJPF_RGB: - dinfo->out_color_space=JCS_EXT_RGB; break; - case TJPF_BGR: - dinfo->out_color_space=JCS_EXT_BGR; break; - case TJPF_RGBX: - dinfo->out_color_space=JCS_EXT_RGBX; break; - case TJPF_BGRX: - dinfo->out_color_space=JCS_EXT_BGRX; break; - case TJPF_XRGB: - dinfo->out_color_space=JCS_EXT_XRGB; break; - case TJPF_XBGR: - dinfo->out_color_space=JCS_EXT_XBGR; break; - #if JCS_ALPHA_EXTENSIONS==1 - case TJPF_RGBA: - dinfo->out_color_space=JCS_EXT_RGBA; break; - case TJPF_BGRA: - dinfo->out_color_space=JCS_EXT_BGRA; break; - case TJPF_ARGB: - dinfo->out_color_space=JCS_EXT_ARGB; break; - case TJPF_ABGR: - dinfo->out_color_space=JCS_EXT_ABGR; break; - #endif - #else - case TJPF_RGB: - case TJPF_BGR: - case TJPF_RGBX: - case TJPF_BGRX: - case TJPF_XRGB: - case TJPF_XBGR: - case TJPF_RGBA: - case TJPF_BGRA: - case TJPF_ARGB: - case TJPF_ABGR: - dinfo->out_color_space=JCS_RGB; break; - #endif - case TJPF_CMYK: - dinfo->out_color_space=JCS_CMYK; break; - default: - _throw("Unsupported pixel format"); - } - - if(flags&TJFLAG_FASTDCT) dinfo->dct_method=JDCT_FASTEST; - - bailout: - return retval; + cinfo->comp_info[0].h_samp_factor = tjMCUWidth[subsamp] / 8; + cinfo->comp_info[1].h_samp_factor = 1; + cinfo->comp_info[2].h_samp_factor = 1; + if (cinfo->num_components > 3) + cinfo->comp_info[3].h_samp_factor = tjMCUWidth[subsamp] / 8; + cinfo->comp_info[0].v_samp_factor = tjMCUHeight[subsamp] / 8; + cinfo->comp_info[1].v_samp_factor = 1; + cinfo->comp_info[2].v_samp_factor = 1; + if (cinfo->num_components > 3) + cinfo->comp_info[3].v_samp_factor = tjMCUHeight[subsamp] / 8; + + return retval; } static int getSubsamp(j_decompress_ptr dinfo) { - int retval=-1, i, k; - - /* The sampling factors actually have no meaning with grayscale JPEG files, - and in fact it's possible to generate grayscale JPEGs with sampling - factors > 1 (even though those sampling factors are ignored by the - decompressor.) Thus, we need to treat grayscale as a special case. */ - if(dinfo->num_components==1 && dinfo->jpeg_color_space==JCS_GRAYSCALE) - return TJSAMP_GRAY; - - for(i=0; inum_components==pixelsize[i] - || ((dinfo->jpeg_color_space==JCS_YCCK - || dinfo->jpeg_color_space==JCS_CMYK) - && pixelsize[i]==3 && dinfo->num_components==4)) - { - if(dinfo->comp_info[0].h_samp_factor==tjMCUWidth[i]/8 - && dinfo->comp_info[0].v_samp_factor==tjMCUHeight[i]/8) - { - int match=0; - for(k=1; knum_components; k++) - { - int href=1, vref=1; - if(dinfo->jpeg_color_space==JCS_YCCK && k==3) - { - href=tjMCUWidth[i]/8; vref=tjMCUHeight[i]/8; - } - if(dinfo->comp_info[k].h_samp_factor==href - && dinfo->comp_info[k].v_samp_factor==vref) - match++; - } - if(match==dinfo->num_components-1) - { - retval=i; break; - } - } - /* Handle 4:2:2 and 4:4:0 images whose sampling factors are specified - in non-standard ways. */ - if(dinfo->comp_info[0].h_samp_factor==2 && - dinfo->comp_info[0].v_samp_factor==2 && - (i==TJSAMP_422 || i==TJSAMP_440)) - { - int match=0; - for(k=1; knum_components; k++) - { - int href=tjMCUHeight[i]/8, vref=tjMCUWidth[i]/8; - if(dinfo->jpeg_color_space==JCS_YCCK && k==3) - { - href=vref=2; - } - if(dinfo->comp_info[k].h_samp_factor==href - && dinfo->comp_info[k].v_samp_factor==vref) - match++; - } - if(match==dinfo->num_components-1) - { - retval=i; break; - } - } - } - } - return retval; + int retval = -1, i, k; + + /* The sampling factors actually have no meaning with grayscale JPEG files, + and in fact it's possible to generate grayscale JPEGs with sampling + factors > 1 (even though those sampling factors are ignored by the + decompressor.) Thus, we need to treat grayscale as a special case. */ + if (dinfo->num_components == 1 && dinfo->jpeg_color_space == JCS_GRAYSCALE) + return TJSAMP_GRAY; + + for (i = 0; i < NUMSUBOPT; i++) { + if (dinfo->num_components == pixelsize[i] || + ((dinfo->jpeg_color_space == JCS_YCCK || + dinfo->jpeg_color_space == JCS_CMYK) && + pixelsize[i] == 3 && dinfo->num_components == 4)) { + if (dinfo->comp_info[0].h_samp_factor == tjMCUWidth[i] / 8 && + dinfo->comp_info[0].v_samp_factor == tjMCUHeight[i] / 8) { + int match = 0; + + for (k = 1; k < dinfo->num_components; k++) { + int href = 1, vref = 1; + + if ((dinfo->jpeg_color_space == JCS_YCCK || + dinfo->jpeg_color_space == JCS_CMYK) && k == 3) { + href = tjMCUWidth[i] / 8; vref = tjMCUHeight[i] / 8; + } + if (dinfo->comp_info[k].h_samp_factor == href && + dinfo->comp_info[k].v_samp_factor == vref) + match++; + } + if (match == dinfo->num_components - 1) { + retval = i; break; + } + } + /* Handle 4:2:2 and 4:4:0 images whose sampling factors are specified + in non-standard ways. */ + if (dinfo->comp_info[0].h_samp_factor == 2 && + dinfo->comp_info[0].v_samp_factor == 2 && + (i == TJSAMP_422 || i == TJSAMP_440)) { + int match = 0; + + for (k = 1; k < dinfo->num_components; k++) { + int href = tjMCUHeight[i] / 8, vref = tjMCUWidth[i] / 8; + + if ((dinfo->jpeg_color_space == JCS_YCCK || + dinfo->jpeg_color_space == JCS_CMYK) && k == 3) { + href = vref = 2; + } + if (dinfo->comp_info[k].h_samp_factor == href && + dinfo->comp_info[k].v_samp_factor == vref) + match++; + } + if (match == dinfo->num_components - 1) { + retval = i; break; + } + } + } + } + return retval; } -#ifndef JCS_EXTENSIONS - -/* Conversion functions to emulate the colorspace extensions. This allows the - TurboJPEG wrapper to be used with libjpeg */ - -#define TORGB(PS, ROFFSET, GOFFSET, BOFFSET) { \ - int rowPad=pitch-width*PS; \ - while(height--) \ - { \ - unsigned char *endOfRow=src+width*PS; \ - while(srcisInstanceError) { + this->isInstanceError = FALSE; + return this->errStr; + } else + return errStr; } -static void fromRGB(unsigned char *src, unsigned char *dst, int width, - int pitch, int height, int pixelFormat) + +DLLEXPORT char *tjGetErrorStr(void) { - switch(pixelFormat) - { - case TJPF_RGB: - #if RGB_RED!=0 || RGB_GREEN!=1 || RGB_BLUE!=2 || RGB_PIXELSIZE!=3 - FROMRGB(3, 0, 1, 2,); - #endif - break; - case TJPF_BGR: - #if RGB_RED!=2 || RGB_GREEN!=1 || RGB_BLUE!=0 || RGB_PIXELSIZE!=3 - FROMRGB(3, 2, 1, 0,); - #endif - break; - case TJPF_RGBX: - #if RGB_RED!=0 || RGB_GREEN!=1 || RGB_BLUE!=2 || RGB_PIXELSIZE!=4 - FROMRGB(4, 0, 1, 2,); - #endif - break; - case TJPF_RGBA: - #if RGB_RED!=0 || RGB_GREEN!=1 || RGB_BLUE!=2 || RGB_PIXELSIZE!=4 - FROMRGB(4, 0, 1, 2, dst[3]=0xFF;); - #endif - break; - case TJPF_BGRX: - #if RGB_RED!=2 || RGB_GREEN!=1 || RGB_BLUE!=0 || RGB_PIXELSIZE!=4 - FROMRGB(4, 2, 1, 0,); - #endif - break; - case TJPF_BGRA: - #if RGB_RED!=2 || RGB_GREEN!=1 || RGB_BLUE!=0 || RGB_PIXELSIZE!=4 - FROMRGB(4, 2, 1, 0, dst[3]=0xFF;); return; - #endif - break; - case TJPF_XRGB: - #if RGB_RED!=1 || RGB_GREEN!=2 || RGB_BLUE!=3 || RGB_PIXELSIZE!=4 - FROMRGB(4, 1, 2, 3,); return; - #endif - break; - case TJPF_ARGB: - #if RGB_RED!=1 || RGB_GREEN!=2 || RGB_BLUE!=3 || RGB_PIXELSIZE!=4 - FROMRGB(4, 1, 2, 3, dst[0]=0xFF;); return; - #endif - break; - case TJPF_XBGR: - #if RGB_RED!=3 || RGB_GREEN!=2 || RGB_BLUE!=1 || RGB_PIXELSIZE!=4 - FROMRGB(4, 3, 2, 1,); return; - #endif - break; - case TJPF_ABGR: - #if RGB_RED!=3 || RGB_GREEN!=2 || RGB_BLUE!=1 || RGB_PIXELSIZE!=4 - FROMRGB(4, 3, 2, 1, dst[0]=0xFF;); return; - #endif - break; - } + return errStr; } -#endif - -/* General API functions */ - -DLLEXPORT char* DLLCALL tjGetErrorStr(void) +DLLEXPORT int tjGetErrorCode(tjhandle handle) { - return errStr; + tjinstance *this = (tjinstance *)handle; + + if (this && this->jerr.warning) return TJERR_WARNING; + else return TJERR_FATAL; } -DLLEXPORT int DLLCALL tjDestroy(tjhandle handle) +DLLEXPORT int tjDestroy(tjhandle handle) { - getinstance(handle); - if(setjmp(this->jerr.setjmp_buffer)) return -1; - if(this->init&COMPRESS) jpeg_destroy_compress(cinfo); - if(this->init&DECOMPRESS) jpeg_destroy_decompress(dinfo); - free(this); - return 0; + getinstance(handle); + + if (setjmp(this->jerr.setjmp_buffer)) return -1; + if (this->init & COMPRESS) jpeg_destroy_compress(cinfo); + if (this->init & DECOMPRESS) jpeg_destroy_decompress(dinfo); + free(this); + return 0; } @@ -564,15 +411,15 @@ DLLEXPORT int DLLCALL tjDestroy(tjhandle handle) with turbojpeg.dll for compatibility reasons. However, these functions can potentially be used for other purposes by different implementations. */ -DLLEXPORT void DLLCALL tjFree(unsigned char *buf) +DLLEXPORT void tjFree(unsigned char *buf) { - if(buf) free(buf); + if (buf) free(buf); } -DLLEXPORT unsigned char *DLLCALL tjAlloc(int bytes) +DLLEXPORT unsigned char *tjAlloc(int bytes) { - return (unsigned char *)malloc(bytes); + return (unsigned char *)malloc(bytes); } @@ -580,672 +427,660 @@ DLLEXPORT unsigned char *DLLCALL tjAlloc(int bytes) static tjhandle _tjInitCompress(tjinstance *this) { - static unsigned char buffer[1]; - unsigned char *buf=buffer; unsigned long size=1; - - /* This is also straight out of example.c */ - this->cinfo.err=jpeg_std_error(&this->jerr.pub); - this->jerr.pub.error_exit=my_error_exit; - this->jerr.pub.output_message=my_output_message; - this->jerr.emit_message=this->jerr.pub.emit_message; - this->jerr.pub.emit_message=my_emit_message; - - if(setjmp(this->jerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - if(this) free(this); - return NULL; - } - - jpeg_create_compress(&this->cinfo); - /* Make an initial call so it will create the destination manager */ - jpeg_mem_dest_tj(&this->cinfo, &buf, &size, 0); - - this->init|=COMPRESS; - return (tjhandle)this; + static unsigned char buffer[1]; + unsigned char *buf = buffer; + unsigned long size = 1; + + /* This is also straight out of example.txt */ + this->cinfo.err = jpeg_std_error(&this->jerr.pub); + this->jerr.pub.error_exit = my_error_exit; + this->jerr.pub.output_message = my_output_message; + this->jerr.emit_message = this->jerr.pub.emit_message; + this->jerr.pub.emit_message = my_emit_message; + this->jerr.pub.addon_message_table = turbojpeg_message_table; + this->jerr.pub.first_addon_message = JMSG_FIRSTADDONCODE; + this->jerr.pub.last_addon_message = JMSG_LASTADDONCODE; + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + if (this) free(this); + return NULL; + } + + jpeg_create_compress(&this->cinfo); + /* Make an initial call so it will create the destination manager */ + jpeg_mem_dest_tj(&this->cinfo, &buf, &size, 0); + + this->init |= COMPRESS; + return (tjhandle)this; } -DLLEXPORT tjhandle DLLCALL tjInitCompress(void) +DLLEXPORT tjhandle tjInitCompress(void) { - tjinstance *this=NULL; - if((this=(tjinstance *)malloc(sizeof(tjinstance)))==NULL) - { - snprintf(errStr, JMSG_LENGTH_MAX, - "tjInitCompress(): Memory allocation failure"); - return NULL; - } - MEMZERO(this, sizeof(tjinstance)); - return _tjInitCompress(this); + tjinstance *this = NULL; + + if ((this = (tjinstance *)malloc(sizeof(tjinstance))) == NULL) { + snprintf(errStr, JMSG_LENGTH_MAX, + "tjInitCompress(): Memory allocation failure"); + return NULL; + } + MEMZERO(this, sizeof(tjinstance)); + snprintf(this->errStr, JMSG_LENGTH_MAX, "No error"); + return _tjInitCompress(this); } -DLLEXPORT unsigned long DLLCALL tjBufSize(int width, int height, - int jpegSubsamp) +DLLEXPORT unsigned long tjBufSize(int width, int height, int jpegSubsamp) { - unsigned long retval=0; int mcuw, mcuh, chromasf; - if(width<1 || height<1 || jpegSubsamp<0 || jpegSubsamp>=NUMSUBOPT) - _throw("tjBufSize(): Invalid argument"); - - /* This allows for rare corner cases in which a JPEG image can actually be - larger than the uncompressed input (we wouldn't mention it if it hadn't - happened before.) */ - mcuw=tjMCUWidth[jpegSubsamp]; - mcuh=tjMCUHeight[jpegSubsamp]; - chromasf=jpegSubsamp==TJSAMP_GRAY? 0: 4*64/(mcuw*mcuh); - retval=PAD(width, mcuw) * PAD(height, mcuh) * (2 + chromasf) + 2048; - - bailout: - return retval; + unsigned long retval = 0; + int mcuw, mcuh, chromasf; + + if (width < 1 || height < 1 || jpegSubsamp < 0 || jpegSubsamp >= NUMSUBOPT) + _throwg("tjBufSize(): Invalid argument"); + + /* This allows for rare corner cases in which a JPEG image can actually be + larger than the uncompressed input (we wouldn't mention it if it hadn't + happened before.) */ + mcuw = tjMCUWidth[jpegSubsamp]; + mcuh = tjMCUHeight[jpegSubsamp]; + chromasf = jpegSubsamp == TJSAMP_GRAY ? 0 : 4 * 64 / (mcuw * mcuh); + retval = PAD(width, mcuw) * PAD(height, mcuh) * (2 + chromasf) + 2048; + +bailout: + return retval; } -DLLEXPORT unsigned long DLLCALL TJBUFSIZE(int width, int height) +DLLEXPORT unsigned long TJBUFSIZE(int width, int height) { - unsigned long retval=0; - if(width<1 || height<1) - _throw("TJBUFSIZE(): Invalid argument"); + unsigned long retval = 0; + + if (width < 1 || height < 1) + _throwg("TJBUFSIZE(): Invalid argument"); - /* This allows for rare corner cases in which a JPEG image can actually be - larger than the uncompressed input (we wouldn't mention it if it hadn't - happened before.) */ - retval=PAD(width, 16) * PAD(height, 16) * 6 + 2048; + /* This allows for rare corner cases in which a JPEG image can actually be + larger than the uncompressed input (we wouldn't mention it if it hadn't + happened before.) */ + retval = PAD(width, 16) * PAD(height, 16) * 6 + 2048; - bailout: - return retval; +bailout: + return retval; } -DLLEXPORT unsigned long DLLCALL tjBufSizeYUV2(int width, int pad, int height, - int subsamp) +DLLEXPORT unsigned long tjBufSizeYUV2(int width, int pad, int height, + int subsamp) { - int retval=0, nc, i; - - if(subsamp<0 || subsamp>=NUMSUBOPT) - _throw("tjBufSizeYUV2(): Invalid argument"); - - nc=(subsamp==TJSAMP_GRAY? 1:3); - for(i=0; i= NUMSUBOPT) + _throwg("tjBufSizeYUV2(): Invalid argument"); + + nc = (subsamp == TJSAMP_GRAY ? 1 : 3); + for (i = 0; i < nc; i++) { + int pw = tjPlaneWidth(i, width, subsamp); + int stride = PAD(pw, pad); + int ph = tjPlaneHeight(i, height, subsamp); + + if (pw < 0 || ph < 0) return -1; + else retval += stride * ph; + } + +bailout: + return retval; } -DLLEXPORT unsigned long DLLCALL tjBufSizeYUV(int width, int height, - int subsamp) +DLLEXPORT unsigned long tjBufSizeYUV(int width, int height, int subsamp) { - return tjBufSizeYUV2(width, 4, height, subsamp); + return tjBufSizeYUV2(width, 4, height, subsamp); } -DLLEXPORT unsigned long DLLCALL TJBUFSIZEYUV(int width, int height, - int subsamp) +DLLEXPORT unsigned long TJBUFSIZEYUV(int width, int height, int subsamp) { - return tjBufSizeYUV(width, height, subsamp); + return tjBufSizeYUV(width, height, subsamp); } DLLEXPORT int tjPlaneWidth(int componentID, int width, int subsamp) { - int pw, nc, retval=0; - - if(width<1 || subsamp<0 || subsamp>=TJ_NUMSAMP) - _throw("tjPlaneWidth(): Invalid argument"); - nc=(subsamp==TJSAMP_GRAY? 1:3); - if(componentID<0 || componentID>=nc) - _throw("tjPlaneWidth(): Invalid argument"); - - pw=PAD(width, tjMCUWidth[subsamp]/8); - if(componentID==0) - retval=pw; - else - retval=pw*8/tjMCUWidth[subsamp]; - - bailout: - return retval; + int pw, nc, retval = 0; + + if (width < 1 || subsamp < 0 || subsamp >= TJ_NUMSAMP) + _throwg("tjPlaneWidth(): Invalid argument"); + nc = (subsamp == TJSAMP_GRAY ? 1 : 3); + if (componentID < 0 || componentID >= nc) + _throwg("tjPlaneWidth(): Invalid argument"); + + pw = PAD(width, tjMCUWidth[subsamp] / 8); + if (componentID == 0) + retval = pw; + else + retval = pw * 8 / tjMCUWidth[subsamp]; + +bailout: + return retval; } DLLEXPORT int tjPlaneHeight(int componentID, int height, int subsamp) { - int ph, nc, retval=0; - - if(height<1 || subsamp<0 || subsamp>=TJ_NUMSAMP) - _throw("tjPlaneHeight(): Invalid argument"); - nc=(subsamp==TJSAMP_GRAY? 1:3); - if(componentID<0 || componentID>=nc) - _throw("tjPlaneHeight(): Invalid argument"); - - ph=PAD(height, tjMCUHeight[subsamp]/8); - if(componentID==0) - retval=ph; - else - retval=ph*8/tjMCUHeight[subsamp]; - - bailout: - return retval; + int ph, nc, retval = 0; + + if (height < 1 || subsamp < 0 || subsamp >= TJ_NUMSAMP) + _throwg("tjPlaneHeight(): Invalid argument"); + nc = (subsamp == TJSAMP_GRAY ? 1 : 3); + if (componentID < 0 || componentID >= nc) + _throwg("tjPlaneHeight(): Invalid argument"); + + ph = PAD(height, tjMCUHeight[subsamp] / 8); + if (componentID == 0) + retval = ph; + else + retval = ph * 8 / tjMCUHeight[subsamp]; + +bailout: + return retval; } -DLLEXPORT unsigned long DLLCALL tjPlaneSizeYUV(int componentID, int width, - int stride, int height, int subsamp) +DLLEXPORT unsigned long tjPlaneSizeYUV(int componentID, int width, int stride, + int height, int subsamp) { - unsigned long retval=0; - int pw, ph; + unsigned long retval = 0; + int pw, ph; - if(width<1 || height<1 || subsamp<0 || subsamp>=NUMSUBOPT) - _throw("tjPlaneSizeYUV(): Invalid argument"); + if (width < 1 || height < 1 || subsamp < 0 || subsamp >= NUMSUBOPT) + _throwg("tjPlaneSizeYUV(): Invalid argument"); - pw=tjPlaneWidth(componentID, width, subsamp); - ph=tjPlaneHeight(componentID, height, subsamp); - if(pw<0 || ph<0) return -1; + pw = tjPlaneWidth(componentID, width, subsamp); + ph = tjPlaneHeight(componentID, height, subsamp); + if (pw < 0 || ph < 0) return -1; - if(stride==0) stride=pw; - else stride=abs(stride); + if (stride == 0) stride = pw; + else stride = abs(stride); - retval=stride*(ph-1)+pw; + retval = stride * (ph - 1) + pw; - bailout: - return retval; +bailout: + return retval; } -DLLEXPORT int DLLCALL tjCompress2(tjhandle handle, const unsigned char *srcBuf, - int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf, - unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags) +DLLEXPORT int tjCompress2(tjhandle handle, const unsigned char *srcBuf, + int width, int pitch, int height, int pixelFormat, + unsigned char **jpegBuf, unsigned long *jpegSize, + int jpegSubsamp, int jpegQual, int flags) { - int i, retval=0, alloc=1; JSAMPROW *row_pointer=NULL; - #ifndef JCS_EXTENSIONS - unsigned char *rgbBuf=NULL; - #endif - - getcinstance(handle) - if((this->init&COMPRESS)==0) - _throw("tjCompress2(): Instance has not been initialized for compression"); - - if(srcBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0 - || pixelFormat>=TJ_NUMPF || jpegBuf==NULL || jpegSize==NULL - || jpegSubsamp<0 || jpegSubsamp>=NUMSUBOPT || jpegQual<0 || jpegQual>100) - _throw("tjCompress2(): Invalid argument"); - - if(pitch==0) pitch=width*tjPixelSize[pixelFormat]; - - #ifndef JCS_EXTENSIONS - if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK) - { - rgbBuf=(unsigned char *)malloc(width*height*RGB_PIXELSIZE); - if(!rgbBuf) _throw("tjCompress2(): Memory allocation failure"); - srcBuf=toRGB(srcBuf, width, pitch, height, pixelFormat, rgbBuf); - pitch=width*RGB_PIXELSIZE; - } - #endif - - if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*height))==NULL) - _throw("tjCompress2(): Memory allocation failure"); - - if(setjmp(this->jerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - retval=-1; goto bailout; - } - - cinfo->image_width=width; - cinfo->image_height=height; - - if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); - else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); - else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); - - if(flags&TJFLAG_NOREALLOC) - { - alloc=0; *jpegSize=tjBufSize(width, height, jpegSubsamp); - } - jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc); - if(setCompDefaults(cinfo, pixelFormat, jpegSubsamp, jpegQual, flags)==-1) - return -1; - - jpeg_start_compress(cinfo, TRUE); - for(i=0; inext_scanlineimage_height) - { - jpeg_write_scanlines(cinfo, &row_pointer[cinfo->next_scanline], - cinfo->image_height-cinfo->next_scanline); - } - jpeg_finish_compress(cinfo); - - bailout: - if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo); - #ifndef JCS_EXTENSIONS - if(rgbBuf) free(rgbBuf); - #endif - if(row_pointer) free(row_pointer); - if(this->jerr.warning) retval=-1; - return retval; + int i, retval = 0, alloc = 1; + JSAMPROW *row_pointer = NULL; + + getcinstance(handle) + this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE; + if ((this->init & COMPRESS) == 0) + _throw("tjCompress2(): Instance has not been initialized for compression"); + + if (srcBuf == NULL || width <= 0 || pitch < 0 || height <= 0 || + pixelFormat < 0 || pixelFormat >= TJ_NUMPF || jpegBuf == NULL || + jpegSize == NULL || jpegSubsamp < 0 || jpegSubsamp >= NUMSUBOPT || + jpegQual < 0 || jpegQual > 100) + _throw("tjCompress2(): Invalid argument"); + + if (pitch == 0) pitch = width * tjPixelSize[pixelFormat]; + + if ((row_pointer = (JSAMPROW *)malloc(sizeof(JSAMPROW) * height)) == NULL) + _throw("tjCompress2(): Memory allocation failure"); + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + cinfo->image_width = width; + cinfo->image_height = height; + +#ifndef NO_PUTENV + if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); +#endif + + if (flags & TJFLAG_NOREALLOC) { + alloc = 0; *jpegSize = tjBufSize(width, height, jpegSubsamp); + } + jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc); + if (setCompDefaults(cinfo, pixelFormat, jpegSubsamp, jpegQual, flags) == -1) + return -1; + + jpeg_start_compress(cinfo, TRUE); + for (i = 0; i < height; i++) { + if (flags & TJFLAG_BOTTOMUP) + row_pointer[i] = (JSAMPROW)&srcBuf[(height - i - 1) * pitch]; + else + row_pointer[i] = (JSAMPROW)&srcBuf[i * pitch]; + } + while (cinfo->next_scanline < cinfo->image_height) + jpeg_write_scanlines(cinfo, &row_pointer[cinfo->next_scanline], + cinfo->image_height - cinfo->next_scanline); + jpeg_finish_compress(cinfo); + +bailout: + if (cinfo->global_state > CSTATE_START) jpeg_abort_compress(cinfo); + if (row_pointer) free(row_pointer); + if (this->jerr.warning) retval = -1; + this->jerr.stopOnWarning = FALSE; + return retval; } -DLLEXPORT int DLLCALL tjCompress(tjhandle handle, unsigned char *srcBuf, - int width, int pitch, int height, int pixelSize, unsigned char *jpegBuf, - unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags) +DLLEXPORT int tjCompress(tjhandle handle, unsigned char *srcBuf, int width, + int pitch, int height, int pixelSize, + unsigned char *jpegBuf, unsigned long *jpegSize, + int jpegSubsamp, int jpegQual, int flags) { - int retval=0; unsigned long size; - if(flags&TJ_YUV) - { - size=tjBufSizeYUV(width, height, jpegSubsamp); - retval=tjEncodeYUV2(handle, srcBuf, width, pitch, height, - getPixelFormat(pixelSize, flags), jpegBuf, jpegSubsamp, flags); - } - else - { - retval=tjCompress2(handle, srcBuf, width, pitch, height, - getPixelFormat(pixelSize, flags), &jpegBuf, &size, jpegSubsamp, jpegQual, - flags|TJFLAG_NOREALLOC); - } - *jpegSize=size; - return retval; + int retval = 0; + unsigned long size; + + if (flags & TJ_YUV) { + size = tjBufSizeYUV(width, height, jpegSubsamp); + retval = tjEncodeYUV2(handle, srcBuf, width, pitch, height, + getPixelFormat(pixelSize, flags), jpegBuf, + jpegSubsamp, flags); + } else { + retval = tjCompress2(handle, srcBuf, width, pitch, height, + getPixelFormat(pixelSize, flags), &jpegBuf, &size, + jpegSubsamp, jpegQual, flags | TJFLAG_NOREALLOC); + } + *jpegSize = size; + return retval; } -DLLEXPORT int DLLCALL tjEncodeYUVPlanes(tjhandle handle, - const unsigned char *srcBuf, int width, int pitch, int height, - int pixelFormat, unsigned char **dstPlanes, int *strides, int subsamp, - int flags) +DLLEXPORT int tjEncodeYUVPlanes(tjhandle handle, const unsigned char *srcBuf, + int width, int pitch, int height, + int pixelFormat, unsigned char **dstPlanes, + int *strides, int subsamp, int flags) { - int i, retval=0; JSAMPROW *row_pointer=NULL; - JSAMPLE *_tmpbuf[MAX_COMPONENTS], *_tmpbuf2[MAX_COMPONENTS]; - JSAMPROW *tmpbuf[MAX_COMPONENTS], *tmpbuf2[MAX_COMPONENTS]; - JSAMPROW *outbuf[MAX_COMPONENTS]; - int row, pw0, ph0, pw[MAX_COMPONENTS], ph[MAX_COMPONENTS]; - JSAMPLE *ptr; - jpeg_component_info *compptr; - #ifndef JCS_EXTENSIONS - unsigned char *rgbBuf=NULL; - #endif - - getcinstance(handle); - - for(i=0; iinit&COMPRESS)==0) - _throw("tjEncodeYUVPlanes(): Instance has not been initialized for compression"); - - if(srcBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0 - || pixelFormat>=TJ_NUMPF || !dstPlanes || !dstPlanes[0] || subsamp<0 - || subsamp>=NUMSUBOPT) - _throw("tjEncodeYUVPlanes(): Invalid argument"); - if(subsamp!=TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2])) - _throw("tjEncodeYUVPlanes(): Invalid argument"); - - if(pixelFormat==TJPF_CMYK) - _throw("tjEncodeYUVPlanes(): Cannot generate YUV images from CMYK pixels"); - - if(pitch==0) pitch=width*tjPixelSize[pixelFormat]; - - #ifndef JCS_EXTENSIONS - if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK) - { - rgbBuf=(unsigned char *)malloc(width*height*RGB_PIXELSIZE); - if(!rgbBuf) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); - srcBuf=toRGB(srcBuf, width, pitch, height, pixelFormat, rgbBuf); - pitch=width*RGB_PIXELSIZE; - } - #endif - - if(setjmp(this->jerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - retval=-1; goto bailout; - } - - cinfo->image_width=width; - cinfo->image_height=height; - - if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); - else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); - else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); - - if(setCompDefaults(cinfo, pixelFormat, subsamp, -1, flags)==-1) return -1; - - /* Execute only the parts of jpeg_start_compress() that we need. If we - were to call the whole jpeg_start_compress() function, then it would try - to write the file headers, which could overflow the output buffer if the - YUV image were very small. */ - if(cinfo->global_state!=CSTATE_START) - _throw("tjEncodeYUVPlanes(): libjpeg API is in the wrong state"); - (*cinfo->err->reset_error_mgr)((j_common_ptr)cinfo); - jinit_c_master_control(cinfo, FALSE); - jinit_color_converter(cinfo); - jinit_downsampler(cinfo); - (*cinfo->cconvert->start_pass)(cinfo); - - pw0=PAD(width, cinfo->max_h_samp_factor); - ph0=PAD(height, cinfo->max_v_samp_factor); - - if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph0))==NULL) - _throw("tjEncodeYUVPlanes(): Memory allocation failure"); - for(i=0; inum_components; i++) - { - compptr=&cinfo->comp_info[i]; - _tmpbuf[i]=(JSAMPLE *)malloc( - PAD((compptr->width_in_blocks*cinfo->max_h_samp_factor*DCTSIZE) - /compptr->h_samp_factor, 16) * cinfo->max_v_samp_factor + 16); - if(!_tmpbuf[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); - tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*cinfo->max_v_samp_factor); - if(!tmpbuf[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); - for(row=0; rowmax_v_samp_factor; row++) - { - unsigned char *_tmpbuf_aligned= - (unsigned char *)PAD((size_t)_tmpbuf[i], 16); - tmpbuf[i][row]=&_tmpbuf_aligned[ - PAD((compptr->width_in_blocks*cinfo->max_h_samp_factor*DCTSIZE) - /compptr->h_samp_factor, 16) * row]; - } - _tmpbuf2[i]=(JSAMPLE *)malloc(PAD(compptr->width_in_blocks*DCTSIZE, 16) - * compptr->v_samp_factor + 16); - if(!_tmpbuf2[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); - tmpbuf2[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*compptr->v_samp_factor); - if(!tmpbuf2[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); - for(row=0; rowv_samp_factor; row++) - { - unsigned char *_tmpbuf2_aligned= - (unsigned char *)PAD((size_t)_tmpbuf2[i], 16); - tmpbuf2[i][row]=&_tmpbuf2_aligned[ - PAD(compptr->width_in_blocks*DCTSIZE, 16) * row]; - } - pw[i]=pw0*compptr->h_samp_factor/cinfo->max_h_samp_factor; - ph[i]=ph0*compptr->v_samp_factor/cinfo->max_v_samp_factor; - outbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph[i]); - if(!outbuf[i]) _throw("tjEncodeYUVPlanes(): Memory allocation failure"); - ptr=dstPlanes[i]; - for(row=0; rowjerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - retval=-1; goto bailout; - } - - for(row=0; rowmax_v_samp_factor) - { - (*cinfo->cconvert->color_convert)(cinfo, &row_pointer[row], tmpbuf, 0, - cinfo->max_v_samp_factor); - (cinfo->downsample->downsample)(cinfo, tmpbuf, 0, tmpbuf2, 0); - for(i=0, compptr=cinfo->comp_info; inum_components; i++, compptr++) - jcopy_sample_rows(tmpbuf2[i], 0, outbuf[i], - row*compptr->v_samp_factor/cinfo->max_v_samp_factor, - compptr->v_samp_factor, pw[i]); - } - cinfo->next_scanline+=height; - jpeg_abort_compress(cinfo); - - bailout: - if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo); - #ifndef JCS_EXTENSIONS - if(rgbBuf) free(rgbBuf); - #endif - if(row_pointer) free(row_pointer); - for(i=0; ijerr.warning) retval=-1; - return retval; + JSAMPROW *row_pointer = NULL; + JSAMPLE *_tmpbuf[MAX_COMPONENTS], *_tmpbuf2[MAX_COMPONENTS]; + JSAMPROW *tmpbuf[MAX_COMPONENTS], *tmpbuf2[MAX_COMPONENTS]; + JSAMPROW *outbuf[MAX_COMPONENTS]; + int i, retval = 0, row, pw0, ph0, pw[MAX_COMPONENTS], ph[MAX_COMPONENTS]; + JSAMPLE *ptr; + jpeg_component_info *compptr; + + getcinstance(handle); + this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE; + + for (i = 0; i < MAX_COMPONENTS; i++) { + tmpbuf[i] = NULL; _tmpbuf[i] = NULL; + tmpbuf2[i] = NULL; _tmpbuf2[i] = NULL; outbuf[i] = NULL; + } + + if ((this->init & COMPRESS) == 0) + _throw("tjEncodeYUVPlanes(): Instance has not been initialized for compression"); + + if (srcBuf == NULL || width <= 0 || pitch < 0 || height <= 0 || + pixelFormat < 0 || pixelFormat >= TJ_NUMPF || !dstPlanes || + !dstPlanes[0] || subsamp < 0 || subsamp >= NUMSUBOPT) + _throw("tjEncodeYUVPlanes(): Invalid argument"); + if (subsamp != TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2])) + _throw("tjEncodeYUVPlanes(): Invalid argument"); + + if (pixelFormat == TJPF_CMYK) + _throw("tjEncodeYUVPlanes(): Cannot generate YUV images from CMYK pixels"); + + if (pitch == 0) pitch = width * tjPixelSize[pixelFormat]; + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + cinfo->image_width = width; + cinfo->image_height = height; + +#ifndef NO_PUTENV + if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); +#endif + + if (setCompDefaults(cinfo, pixelFormat, subsamp, -1, flags) == -1) return -1; + + /* Execute only the parts of jpeg_start_compress() that we need. If we + were to call the whole jpeg_start_compress() function, then it would try + to write the file headers, which could overflow the output buffer if the + YUV image were very small. */ + if (cinfo->global_state != CSTATE_START) + _throw("tjEncodeYUVPlanes(): libjpeg API is in the wrong state"); + (*cinfo->err->reset_error_mgr) ((j_common_ptr)cinfo); + jinit_c_master_control(cinfo, FALSE); + jinit_color_converter(cinfo); + jinit_downsampler(cinfo); + (*cinfo->cconvert->start_pass) (cinfo); + + pw0 = PAD(width, cinfo->max_h_samp_factor); + ph0 = PAD(height, cinfo->max_v_samp_factor); + + if ((row_pointer = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph0)) == NULL) + _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + for (i = 0; i < height; i++) { + if (flags & TJFLAG_BOTTOMUP) + row_pointer[i] = (JSAMPROW)&srcBuf[(height - i - 1) * pitch]; + else + row_pointer[i] = (JSAMPROW)&srcBuf[i * pitch]; + } + if (height < ph0) + for (i = height; i < ph0; i++) row_pointer[i] = row_pointer[height - 1]; + + for (i = 0; i < cinfo->num_components; i++) { + compptr = &cinfo->comp_info[i]; + _tmpbuf[i] = (JSAMPLE *)malloc( + PAD((compptr->width_in_blocks * cinfo->max_h_samp_factor * DCTSIZE) / + compptr->h_samp_factor, 32) * + cinfo->max_v_samp_factor + 32); + if (!_tmpbuf[i]) + _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + tmpbuf[i] = + (JSAMPROW *)malloc(sizeof(JSAMPROW) * cinfo->max_v_samp_factor); + if (!tmpbuf[i]) + _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + for (row = 0; row < cinfo->max_v_samp_factor; row++) { + unsigned char *_tmpbuf_aligned = + (unsigned char *)PAD((size_t)_tmpbuf[i], 32); + + tmpbuf[i][row] = &_tmpbuf_aligned[ + PAD((compptr->width_in_blocks * cinfo->max_h_samp_factor * DCTSIZE) / + compptr->h_samp_factor, 32) * row]; + } + _tmpbuf2[i] = + (JSAMPLE *)malloc(PAD(compptr->width_in_blocks * DCTSIZE, 32) * + compptr->v_samp_factor + 32); + if (!_tmpbuf2[i]) + _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + tmpbuf2[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * compptr->v_samp_factor); + if (!tmpbuf2[i]) + _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + for (row = 0; row < compptr->v_samp_factor; row++) { + unsigned char *_tmpbuf2_aligned = + (unsigned char *)PAD((size_t)_tmpbuf2[i], 32); + + tmpbuf2[i][row] = + &_tmpbuf2_aligned[PAD(compptr->width_in_blocks * DCTSIZE, 32) * row]; + } + pw[i] = pw0 * compptr->h_samp_factor / cinfo->max_h_samp_factor; + ph[i] = ph0 * compptr->v_samp_factor / cinfo->max_v_samp_factor; + outbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph[i]); + if (!outbuf[i]) + _throw("tjEncodeYUVPlanes(): Memory allocation failure"); + ptr = dstPlanes[i]; + for (row = 0; row < ph[i]; row++) { + outbuf[i][row] = ptr; + ptr += (strides && strides[i] != 0) ? strides[i] : pw[i]; + } + } + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + for (row = 0; row < ph0; row += cinfo->max_v_samp_factor) { + (*cinfo->cconvert->color_convert) (cinfo, &row_pointer[row], tmpbuf, 0, + cinfo->max_v_samp_factor); + (cinfo->downsample->downsample) (cinfo, tmpbuf, 0, tmpbuf2, 0); + for (i = 0, compptr = cinfo->comp_info; i < cinfo->num_components; + i++, compptr++) + jcopy_sample_rows(tmpbuf2[i], 0, outbuf[i], + row * compptr->v_samp_factor / cinfo->max_v_samp_factor, + compptr->v_samp_factor, pw[i]); + } + cinfo->next_scanline += height; + jpeg_abort_compress(cinfo); + +bailout: + if (cinfo->global_state > CSTATE_START) jpeg_abort_compress(cinfo); + if (row_pointer) free(row_pointer); + for (i = 0; i < MAX_COMPONENTS; i++) { + if (tmpbuf[i] != NULL) free(tmpbuf[i]); + if (_tmpbuf[i] != NULL) free(_tmpbuf[i]); + if (tmpbuf2[i] != NULL) free(tmpbuf2[i]); + if (_tmpbuf2[i] != NULL) free(_tmpbuf2[i]); + if (outbuf[i] != NULL) free(outbuf[i]); + } + if (this->jerr.warning) retval = -1; + this->jerr.stopOnWarning = FALSE; + return retval; } -DLLEXPORT int DLLCALL tjEncodeYUV3(tjhandle handle, - const unsigned char *srcBuf, int width, int pitch, int height, - int pixelFormat, unsigned char *dstBuf, int pad, int subsamp, int flags) +DLLEXPORT int tjEncodeYUV3(tjhandle handle, const unsigned char *srcBuf, + int width, int pitch, int height, int pixelFormat, + unsigned char *dstBuf, int pad, int subsamp, + int flags) { - unsigned char *dstPlanes[3]; - int pw0, ph0, strides[3], retval=-1; - - if(width<=0 || height<=0 || dstBuf==NULL || pad<0 || !isPow2(pad) - || subsamp<0 || subsamp>=NUMSUBOPT) - _throw("tjEncodeYUV3(): Invalid argument"); - - pw0=tjPlaneWidth(0, width, subsamp); - ph0=tjPlaneHeight(0, height, subsamp); - dstPlanes[0]=dstBuf; - strides[0]=PAD(pw0, pad); - if(subsamp==TJSAMP_GRAY) - { - strides[1]=strides[2]=0; - dstPlanes[1]=dstPlanes[2]=NULL; - } - else - { - int pw1=tjPlaneWidth(1, width, subsamp); - int ph1=tjPlaneHeight(1, height, subsamp); - strides[1]=strides[2]=PAD(pw1, pad); - dstPlanes[1]=dstPlanes[0]+strides[0]*ph0; - dstPlanes[2]=dstPlanes[1]+strides[1]*ph1; - } - - return tjEncodeYUVPlanes(handle, srcBuf, width, pitch, height, pixelFormat, - dstPlanes, strides, subsamp, flags); - - bailout: - return retval; + unsigned char *dstPlanes[3]; + int pw0, ph0, strides[3], retval = -1; + tjinstance *this = (tjinstance *)handle; + + if (!this) _throwg("tjEncodeYUV3(): Invalid handle"); + this->isInstanceError = FALSE; + + if (width <= 0 || height <= 0 || dstBuf == NULL || pad < 0 || !isPow2(pad) || + subsamp < 0 || subsamp >= NUMSUBOPT) + _throw("tjEncodeYUV3(): Invalid argument"); + + pw0 = tjPlaneWidth(0, width, subsamp); + ph0 = tjPlaneHeight(0, height, subsamp); + dstPlanes[0] = dstBuf; + strides[0] = PAD(pw0, pad); + if (subsamp == TJSAMP_GRAY) { + strides[1] = strides[2] = 0; + dstPlanes[1] = dstPlanes[2] = NULL; + } else { + int pw1 = tjPlaneWidth(1, width, subsamp); + int ph1 = tjPlaneHeight(1, height, subsamp); + + strides[1] = strides[2] = PAD(pw1, pad); + dstPlanes[1] = dstPlanes[0] + strides[0] * ph0; + dstPlanes[2] = dstPlanes[1] + strides[1] * ph1; + } + + return tjEncodeYUVPlanes(handle, srcBuf, width, pitch, height, pixelFormat, + dstPlanes, strides, subsamp, flags); + +bailout: + return retval; } -DLLEXPORT int DLLCALL tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, - int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf, - int subsamp, int flags) +DLLEXPORT int tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, int width, + int pitch, int height, int pixelFormat, + unsigned char *dstBuf, int subsamp, int flags) { - return tjEncodeYUV3(handle, srcBuf, width, pitch, height, pixelFormat, - dstBuf, 4, subsamp, flags); + return tjEncodeYUV3(handle, srcBuf, width, pitch, height, pixelFormat, + dstBuf, 4, subsamp, flags); } -DLLEXPORT int DLLCALL tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, - int width, int pitch, int height, int pixelSize, unsigned char *dstBuf, - int subsamp, int flags) +DLLEXPORT int tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, int width, + int pitch, int height, int pixelSize, + unsigned char *dstBuf, int subsamp, int flags) { - return tjEncodeYUV2(handle, srcBuf, width, pitch, height, - getPixelFormat(pixelSize, flags), dstBuf, subsamp, flags); + return tjEncodeYUV2(handle, srcBuf, width, pitch, height, + getPixelFormat(pixelSize, flags), dstBuf, subsamp, + flags); } -DLLEXPORT int DLLCALL tjCompressFromYUVPlanes(tjhandle handle, - const unsigned char **srcPlanes, int width, const int *strides, int height, - int subsamp, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, - int flags) +DLLEXPORT int tjCompressFromYUVPlanes(tjhandle handle, + const unsigned char **srcPlanes, + int width, const int *strides, + int height, int subsamp, + unsigned char **jpegBuf, + unsigned long *jpegSize, int jpegQual, + int flags) { - int i, row, retval=0, alloc=1; JSAMPROW *inbuf[MAX_COMPONENTS]; - int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS], - tmpbufsize=0, usetmpbuf=0, th[MAX_COMPONENTS]; - JSAMPLE *_tmpbuf=NULL, *ptr; JSAMPROW *tmpbuf[MAX_COMPONENTS]; - - getcinstance(handle) - - for(i=0; iinit&COMPRESS)==0) - _throw("tjCompressFromYUVPlanes(): Instance has not been initialized for compression"); - - if(!srcPlanes || !srcPlanes[0] || width<=0 || height<=0 || subsamp<0 - || subsamp>=NUMSUBOPT || jpegBuf==NULL || jpegSize==NULL || jpegQual<0 - || jpegQual>100) - _throw("tjCompressFromYUVPlanes(): Invalid argument"); - if(subsamp!=TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2])) - _throw("tjCompressFromYUVPlanes(): Invalid argument"); - - if(setjmp(this->jerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - retval=-1; goto bailout; - } - - cinfo->image_width=width; - cinfo->image_height=height; - - if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); - else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); - else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); - - if(flags&TJFLAG_NOREALLOC) - { - alloc=0; *jpegSize=tjBufSize(width, height, subsamp); - } - jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc); - if(setCompDefaults(cinfo, TJPF_RGB, subsamp, jpegQual, flags)==-1) - return -1; - cinfo->raw_data_in=TRUE; - - jpeg_start_compress(cinfo, TRUE); - for(i=0; inum_components; i++) - { - jpeg_component_info *compptr=&cinfo->comp_info[i]; - int ih; - iw[i]=compptr->width_in_blocks*DCTSIZE; - ih=compptr->height_in_blocks*DCTSIZE; - pw[i]=PAD(cinfo->image_width, cinfo->max_h_samp_factor) - *compptr->h_samp_factor/cinfo->max_h_samp_factor; - ph[i]=PAD(cinfo->image_height, cinfo->max_v_samp_factor) - *compptr->v_samp_factor/cinfo->max_v_samp_factor; - if(iw[i]!=pw[i] || ih!=ph[i]) usetmpbuf=1; - th[i]=compptr->v_samp_factor*DCTSIZE; - tmpbufsize+=iw[i]*th[i]; - if((inbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph[i]))==NULL) - _throw("tjCompressFromYUVPlanes(): Memory allocation failure"); - ptr=(JSAMPLE *)srcPlanes[i]; - for(row=0; rownum_components; i++) - { - if((tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*th[i]))==NULL) - _throw("tjCompressFromYUVPlanes(): Memory allocation failure"); - for(row=0; rowjerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - retval=-1; goto bailout; - } - - for(row=0; row<(int)cinfo->image_height; - row+=cinfo->max_v_samp_factor*DCTSIZE) - { - JSAMPARRAY yuvptr[MAX_COMPONENTS]; - int crow[MAX_COMPONENTS]; - for(i=0; inum_components; i++) - { - jpeg_component_info *compptr=&cinfo->comp_info[i]; - crow[i]=row*compptr->v_samp_factor/cinfo->max_v_samp_factor; - if(usetmpbuf) - { - int j, k; - for(j=0; jmax_v_samp_factor*DCTSIZE); - } - jpeg_finish_compress(cinfo); - - bailout: - if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo); - for(i=0; ijerr.warning) retval=-1; - return retval; + int i, row, retval = 0, alloc = 1; + int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS], + tmpbufsize = 0, usetmpbuf = 0, th[MAX_COMPONENTS]; + JSAMPLE *_tmpbuf = NULL, *ptr; + JSAMPROW *inbuf[MAX_COMPONENTS], *tmpbuf[MAX_COMPONENTS]; + + getcinstance(handle) + this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE; + + for (i = 0; i < MAX_COMPONENTS; i++) { + tmpbuf[i] = NULL; inbuf[i] = NULL; + } + + if ((this->init & COMPRESS) == 0) + _throw("tjCompressFromYUVPlanes(): Instance has not been initialized for compression"); + + if (!srcPlanes || !srcPlanes[0] || width <= 0 || height <= 0 || + subsamp < 0 || subsamp >= NUMSUBOPT || jpegBuf == NULL || + jpegSize == NULL || jpegQual < 0 || jpegQual > 100) + _throw("tjCompressFromYUVPlanes(): Invalid argument"); + if (subsamp != TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2])) + _throw("tjCompressFromYUVPlanes(): Invalid argument"); + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + cinfo->image_width = width; + cinfo->image_height = height; + +#ifndef NO_PUTENV + if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); +#endif + + if (flags & TJFLAG_NOREALLOC) { + alloc = 0; *jpegSize = tjBufSize(width, height, subsamp); + } + jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc); + if (setCompDefaults(cinfo, TJPF_RGB, subsamp, jpegQual, flags) == -1) + return -1; + cinfo->raw_data_in = TRUE; + + jpeg_start_compress(cinfo, TRUE); + for (i = 0; i < cinfo->num_components; i++) { + jpeg_component_info *compptr = &cinfo->comp_info[i]; + int ih; + + iw[i] = compptr->width_in_blocks * DCTSIZE; + ih = compptr->height_in_blocks * DCTSIZE; + pw[i] = PAD(cinfo->image_width, cinfo->max_h_samp_factor) * + compptr->h_samp_factor / cinfo->max_h_samp_factor; + ph[i] = PAD(cinfo->image_height, cinfo->max_v_samp_factor) * + compptr->v_samp_factor / cinfo->max_v_samp_factor; + if (iw[i] != pw[i] || ih != ph[i]) usetmpbuf = 1; + th[i] = compptr->v_samp_factor * DCTSIZE; + tmpbufsize += iw[i] * th[i]; + if ((inbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph[i])) == NULL) + _throw("tjCompressFromYUVPlanes(): Memory allocation failure"); + ptr = (JSAMPLE *)srcPlanes[i]; + for (row = 0; row < ph[i]; row++) { + inbuf[i][row] = ptr; + ptr += (strides && strides[i] != 0) ? strides[i] : pw[i]; + } + } + if (usetmpbuf) { + if ((_tmpbuf = (JSAMPLE *)malloc(sizeof(JSAMPLE) * tmpbufsize)) == NULL) + _throw("tjCompressFromYUVPlanes(): Memory allocation failure"); + ptr = _tmpbuf; + for (i = 0; i < cinfo->num_components; i++) { + if ((tmpbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * th[i])) == NULL) + _throw("tjCompressFromYUVPlanes(): Memory allocation failure"); + for (row = 0; row < th[i]; row++) { + tmpbuf[i][row] = ptr; + ptr += iw[i]; + } + } + } + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + for (row = 0; row < (int)cinfo->image_height; + row += cinfo->max_v_samp_factor * DCTSIZE) { + JSAMPARRAY yuvptr[MAX_COMPONENTS]; + int crow[MAX_COMPONENTS]; + + for (i = 0; i < cinfo->num_components; i++) { + jpeg_component_info *compptr = &cinfo->comp_info[i]; + + crow[i] = row * compptr->v_samp_factor / cinfo->max_v_samp_factor; + if (usetmpbuf) { + int j, k; + + for (j = 0; j < MIN(th[i], ph[i] - crow[i]); j++) { + memcpy(tmpbuf[i][j], inbuf[i][crow[i] + j], pw[i]); + /* Duplicate last sample in row to fill out MCU */ + for (k = pw[i]; k < iw[i]; k++) + tmpbuf[i][j][k] = tmpbuf[i][j][pw[i] - 1]; + } + /* Duplicate last row to fill out MCU */ + for (j = ph[i] - crow[i]; j < th[i]; j++) + memcpy(tmpbuf[i][j], tmpbuf[i][ph[i] - crow[i] - 1], iw[i]); + yuvptr[i] = tmpbuf[i]; + } else + yuvptr[i] = &inbuf[i][crow[i]]; + } + jpeg_write_raw_data(cinfo, yuvptr, cinfo->max_v_samp_factor * DCTSIZE); + } + jpeg_finish_compress(cinfo); + +bailout: + if (cinfo->global_state > CSTATE_START) jpeg_abort_compress(cinfo); + for (i = 0; i < MAX_COMPONENTS; i++) { + if (tmpbuf[i]) free(tmpbuf[i]); + if (inbuf[i]) free(inbuf[i]); + } + if (_tmpbuf) free(_tmpbuf); + if (this->jerr.warning) retval = -1; + this->jerr.stopOnWarning = FALSE; + return retval; } -DLLEXPORT int DLLCALL tjCompressFromYUV(tjhandle handle, - const unsigned char *srcBuf, int width, int pad, int height, int subsamp, - unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, int flags) +DLLEXPORT int tjCompressFromYUV(tjhandle handle, const unsigned char *srcBuf, + int width, int pad, int height, int subsamp, + unsigned char **jpegBuf, + unsigned long *jpegSize, int jpegQual, + int flags) { - const unsigned char *srcPlanes[3]; - int pw0, ph0, strides[3], retval=-1; - - if(srcBuf==NULL || width<=0 || pad<1 || height<=0 || subsamp<0 - || subsamp>=NUMSUBOPT) - _throw("tjCompressFromYUV(): Invalid argument"); - - pw0=tjPlaneWidth(0, width, subsamp); - ph0=tjPlaneHeight(0, height, subsamp); - srcPlanes[0]=srcBuf; - strides[0]=PAD(pw0, pad); - if(subsamp==TJSAMP_GRAY) - { - strides[1]=strides[2]=0; - srcPlanes[1]=srcPlanes[2]=NULL; - } - else - { - int pw1=tjPlaneWidth(1, width, subsamp); - int ph1=tjPlaneHeight(1, height, subsamp); - strides[1]=strides[2]=PAD(pw1, pad); - srcPlanes[1]=srcPlanes[0]+strides[0]*ph0; - srcPlanes[2]=srcPlanes[1]+strides[1]*ph1; - } - - return tjCompressFromYUVPlanes(handle, srcPlanes, width, strides, height, - subsamp, jpegBuf, jpegSize, jpegQual, flags); - - bailout: - return retval; + const unsigned char *srcPlanes[3]; + int pw0, ph0, strides[3], retval = -1; + tjinstance *this = (tjinstance *)handle; + + if (!this) _throwg("tjCompressFromYUV(): Invalid handle"); + this->isInstanceError = FALSE; + + if (srcBuf == NULL || width <= 0 || pad < 1 || height <= 0 || subsamp < 0 || + subsamp >= NUMSUBOPT) + _throw("tjCompressFromYUV(): Invalid argument"); + + pw0 = tjPlaneWidth(0, width, subsamp); + ph0 = tjPlaneHeight(0, height, subsamp); + srcPlanes[0] = srcBuf; + strides[0] = PAD(pw0, pad); + if (subsamp == TJSAMP_GRAY) { + strides[1] = strides[2] = 0; + srcPlanes[1] = srcPlanes[2] = NULL; + } else { + int pw1 = tjPlaneWidth(1, width, subsamp); + int ph1 = tjPlaneHeight(1, height, subsamp); + + strides[1] = strides[2] = PAD(pw1, pad); + srcPlanes[1] = srcPlanes[0] + strides[0] * ph0; + srcPlanes[2] = srcPlanes[1] + strides[1] * ph1; + } + + return tjCompressFromYUVPlanes(handle, srcPlanes, width, strides, height, + subsamp, jpegBuf, jpegSize, jpegQual, flags); + +bailout: + return retval; } @@ -1253,923 +1088,1032 @@ DLLEXPORT int DLLCALL tjCompressFromYUV(tjhandle handle, static tjhandle _tjInitDecompress(tjinstance *this) { - static unsigned char buffer[1]; - - /* This is also straight out of example.c */ - this->dinfo.err=jpeg_std_error(&this->jerr.pub); - this->jerr.pub.error_exit=my_error_exit; - this->jerr.pub.output_message=my_output_message; - this->jerr.emit_message=this->jerr.pub.emit_message; - this->jerr.pub.emit_message=my_emit_message; - - if(setjmp(this->jerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - if(this) free(this); - return NULL; - } - - jpeg_create_decompress(&this->dinfo); - /* Make an initial call so it will create the source manager */ - jpeg_mem_src_tj(&this->dinfo, buffer, 1); - - this->init|=DECOMPRESS; - return (tjhandle)this; + static unsigned char buffer[1]; + + /* This is also straight out of example.txt */ + this->dinfo.err = jpeg_std_error(&this->jerr.pub); + this->jerr.pub.error_exit = my_error_exit; + this->jerr.pub.output_message = my_output_message; + this->jerr.emit_message = this->jerr.pub.emit_message; + this->jerr.pub.emit_message = my_emit_message; + this->jerr.pub.addon_message_table = turbojpeg_message_table; + this->jerr.pub.first_addon_message = JMSG_FIRSTADDONCODE; + this->jerr.pub.last_addon_message = JMSG_LASTADDONCODE; + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + if (this) free(this); + return NULL; + } + + jpeg_create_decompress(&this->dinfo); + /* Make an initial call so it will create the source manager */ + jpeg_mem_src_tj(&this->dinfo, buffer, 1); + + this->init |= DECOMPRESS; + return (tjhandle)this; } -DLLEXPORT tjhandle DLLCALL tjInitDecompress(void) +DLLEXPORT tjhandle tjInitDecompress(void) { - tjinstance *this; - if((this=(tjinstance *)malloc(sizeof(tjinstance)))==NULL) - { - snprintf(errStr, JMSG_LENGTH_MAX, - "tjInitDecompress(): Memory allocation failure"); - return NULL; - } - MEMZERO(this, sizeof(tjinstance)); - return _tjInitDecompress(this); + tjinstance *this; + + if ((this = (tjinstance *)malloc(sizeof(tjinstance))) == NULL) { + snprintf(errStr, JMSG_LENGTH_MAX, + "tjInitDecompress(): Memory allocation failure"); + return NULL; + } + MEMZERO(this, sizeof(tjinstance)); + snprintf(this->errStr, JMSG_LENGTH_MAX, "No error"); + return _tjInitDecompress(this); } -DLLEXPORT int DLLCALL tjDecompressHeader3(tjhandle handle, - const unsigned char *jpegBuf, unsigned long jpegSize, int *width, - int *height, int *jpegSubsamp, int *jpegColorspace) +DLLEXPORT int tjDecompressHeader3(tjhandle handle, + const unsigned char *jpegBuf, + unsigned long jpegSize, int *width, + int *height, int *jpegSubsamp, + int *jpegColorspace) { - int retval=0; - - getdinstance(handle); - if((this->init&DECOMPRESS)==0) - _throw("tjDecompressHeader3(): Instance has not been initialized for decompression"); - - if(jpegBuf==NULL || jpegSize<=0 || width==NULL || height==NULL - || jpegSubsamp==NULL || jpegColorspace==NULL) - _throw("tjDecompressHeader3(): Invalid argument"); - - if(setjmp(this->jerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - return -1; - } - - jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); - jpeg_read_header(dinfo, TRUE); - - *width=dinfo->image_width; - *height=dinfo->image_height; - *jpegSubsamp=getSubsamp(dinfo); - switch(dinfo->jpeg_color_space) - { - case JCS_GRAYSCALE: *jpegColorspace=TJCS_GRAY; break; - case JCS_RGB: *jpegColorspace=TJCS_RGB; break; - case JCS_YCbCr: *jpegColorspace=TJCS_YCbCr; break; - case JCS_CMYK: *jpegColorspace=TJCS_CMYK; break; - case JCS_YCCK: *jpegColorspace=TJCS_YCCK; break; - default: *jpegColorspace=-1; break; - } - - jpeg_abort_decompress(dinfo); - - if(*jpegSubsamp<0) - _throw("tjDecompressHeader3(): Could not determine subsampling type for JPEG image"); - if(*jpegColorspace<0) - _throw("tjDecompressHeader3(): Could not determine colorspace of JPEG image"); - if(*width<1 || *height<1) - _throw("tjDecompressHeader3(): Invalid data returned in header"); - - bailout: - if(this->jerr.warning) retval=-1; - return retval; + int retval = 0; + + getdinstance(handle); + if ((this->init & DECOMPRESS) == 0) + _throw("tjDecompressHeader3(): Instance has not been initialized for decompression"); + + if (jpegBuf == NULL || jpegSize <= 0 || width == NULL || height == NULL || + jpegSubsamp == NULL || jpegColorspace == NULL) + _throw("tjDecompressHeader3(): Invalid argument"); + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + return -1; + } + + jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); + jpeg_read_header(dinfo, TRUE); + + *width = dinfo->image_width; + *height = dinfo->image_height; + *jpegSubsamp = getSubsamp(dinfo); + switch (dinfo->jpeg_color_space) { + case JCS_GRAYSCALE: *jpegColorspace = TJCS_GRAY; break; + case JCS_RGB: *jpegColorspace = TJCS_RGB; break; + case JCS_YCbCr: *jpegColorspace = TJCS_YCbCr; break; + case JCS_CMYK: *jpegColorspace = TJCS_CMYK; break; + case JCS_YCCK: *jpegColorspace = TJCS_YCCK; break; + default: *jpegColorspace = -1; break; + } + + jpeg_abort_decompress(dinfo); + + if (*jpegSubsamp < 0) + _throw("tjDecompressHeader3(): Could not determine subsampling type for JPEG image"); + if (*jpegColorspace < 0) + _throw("tjDecompressHeader3(): Could not determine colorspace of JPEG image"); + if (*width < 1 || *height < 1) + _throw("tjDecompressHeader3(): Invalid data returned in header"); + +bailout: + if (this->jerr.warning) retval = -1; + return retval; } -DLLEXPORT int DLLCALL tjDecompressHeader2(tjhandle handle, - unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height, - int *jpegSubsamp) +DLLEXPORT int tjDecompressHeader2(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, int *width, + int *height, int *jpegSubsamp) { - int jpegColorspace; - return tjDecompressHeader3(handle, jpegBuf, jpegSize, width, height, - jpegSubsamp, &jpegColorspace); + int jpegColorspace; + + return tjDecompressHeader3(handle, jpegBuf, jpegSize, width, height, + jpegSubsamp, &jpegColorspace); } -DLLEXPORT int DLLCALL tjDecompressHeader(tjhandle handle, - unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height) +DLLEXPORT int tjDecompressHeader(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, int *width, + int *height) { - int jpegSubsamp; - return tjDecompressHeader2(handle, jpegBuf, jpegSize, width, height, - &jpegSubsamp); + int jpegSubsamp; + + return tjDecompressHeader2(handle, jpegBuf, jpegSize, width, height, + &jpegSubsamp); } -DLLEXPORT tjscalingfactor* DLLCALL tjGetScalingFactors(int *numscalingfactors) +DLLEXPORT tjscalingfactor *tjGetScalingFactors(int *numscalingfactors) { - if(numscalingfactors==NULL) - { - snprintf(errStr, JMSG_LENGTH_MAX, - "tjGetScalingFactors(): Invalid argument"); - return NULL; - } - - *numscalingfactors=NUMSF; - return (tjscalingfactor *)sf; + if (numscalingfactors == NULL) { + snprintf(errStr, JMSG_LENGTH_MAX, + "tjGetScalingFactors(): Invalid argument"); + return NULL; + } + + *numscalingfactors = NUMSF; + return (tjscalingfactor *)sf; } -DLLEXPORT int DLLCALL tjDecompress2(tjhandle handle, - const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, - int width, int pitch, int height, int pixelFormat, int flags) +DLLEXPORT int tjDecompress2(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, + int flags) { - int i, retval=0; JSAMPROW *row_pointer=NULL; - int jpegwidth, jpegheight, scaledw, scaledh; - #ifndef JCS_EXTENSIONS - unsigned char *rgbBuf=NULL; - unsigned char *_dstBuf=NULL; int _pitch=0; - #endif - - getdinstance(handle); - if((this->init&DECOMPRESS)==0) - _throw("tjDecompress2(): Instance has not been initialized for decompression"); - - if(jpegBuf==NULL || jpegSize<=0 || dstBuf==NULL || width<0 || pitch<0 - || height<0 || pixelFormat<0 || pixelFormat>=TJ_NUMPF) - _throw("tjDecompress2(): Invalid argument"); - - if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); - else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); - else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); - - if(setjmp(this->jerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - retval=-1; goto bailout; - } - - jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); - jpeg_read_header(dinfo, TRUE); - if(setDecompDefaults(dinfo, pixelFormat, flags)==-1) - { - retval=-1; goto bailout; - } - - if(flags&TJFLAG_FASTUPSAMPLE) dinfo->do_fancy_upsampling=FALSE; - - jpegwidth=dinfo->image_width; jpegheight=dinfo->image_height; - if(width==0) width=jpegwidth; - if(height==0) height=jpegheight; - for(i=0; i=NUMSF) - _throw("tjDecompress2(): Could not scale down to desired image dimensions"); - width=scaledw; height=scaledh; - dinfo->scale_num=sf[i].num; - dinfo->scale_denom=sf[i].denom; - - jpeg_start_decompress(dinfo); - if(pitch==0) pitch=dinfo->output_width*tjPixelSize[pixelFormat]; - - #ifndef JCS_EXTENSIONS - if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK && - (RGB_RED!=tjRedOffset[pixelFormat] || - RGB_GREEN!=tjGreenOffset[pixelFormat] || - RGB_BLUE!=tjBlueOffset[pixelFormat] || - RGB_PIXELSIZE!=tjPixelSize[pixelFormat])) - { - rgbBuf=(unsigned char *)malloc(width*height*3); - if(!rgbBuf) _throw("tjDecompress2(): Memory allocation failure"); - _pitch=pitch; pitch=width*3; - _dstBuf=dstBuf; dstBuf=rgbBuf; - } - #endif - - if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW) - *dinfo->output_height))==NULL) - _throw("tjDecompress2(): Memory allocation failure"); - if(setjmp(this->jerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - retval=-1; goto bailout; - } - for(i=0; i<(int)dinfo->output_height; i++) - { - if(flags&TJFLAG_BOTTOMUP) - row_pointer[i]=&dstBuf[(dinfo->output_height-i-1)*pitch]; - else row_pointer[i]=&dstBuf[i*pitch]; - } - while(dinfo->output_scanlineoutput_height) - { - jpeg_read_scanlines(dinfo, &row_pointer[dinfo->output_scanline], - dinfo->output_height-dinfo->output_scanline); - } - jpeg_finish_decompress(dinfo); - - #ifndef JCS_EXTENSIONS - fromRGB(rgbBuf, _dstBuf, width, _pitch, height, pixelFormat); - #endif - - bailout: - if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo); - #ifndef JCS_EXTENSIONS - if(rgbBuf) free(rgbBuf); - #endif - if(row_pointer) free(row_pointer); - if(this->jerr.warning) retval=-1; - return retval; + JSAMPROW *row_pointer = NULL; + int i, retval = 0, jpegwidth, jpegheight, scaledw, scaledh; + + getdinstance(handle); + this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE; + if ((this->init & DECOMPRESS) == 0) + _throw("tjDecompress2(): Instance has not been initialized for decompression"); + + if (jpegBuf == NULL || jpegSize <= 0 || dstBuf == NULL || width < 0 || + pitch < 0 || height < 0 || pixelFormat < 0 || pixelFormat >= TJ_NUMPF) + _throw("tjDecompress2(): Invalid argument"); + +#ifndef NO_PUTENV + if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); +#endif + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); + jpeg_read_header(dinfo, TRUE); + this->dinfo.out_color_space = pf2cs[pixelFormat]; + if (flags & TJFLAG_FASTDCT) this->dinfo.dct_method = JDCT_FASTEST; + if (flags & TJFLAG_FASTUPSAMPLE) dinfo->do_fancy_upsampling = FALSE; + + jpegwidth = dinfo->image_width; jpegheight = dinfo->image_height; + if (width == 0) width = jpegwidth; + if (height == 0) height = jpegheight; + for (i = 0; i < NUMSF; i++) { + scaledw = TJSCALED(jpegwidth, sf[i]); + scaledh = TJSCALED(jpegheight, sf[i]); + if (scaledw <= width && scaledh <= height) + break; + } + if (i >= NUMSF) + _throw("tjDecompress2(): Could not scale down to desired image dimensions"); + width = scaledw; height = scaledh; + dinfo->scale_num = sf[i].num; + dinfo->scale_denom = sf[i].denom; + + jpeg_start_decompress(dinfo); + if (pitch == 0) pitch = dinfo->output_width * tjPixelSize[pixelFormat]; + + if ((row_pointer = + (JSAMPROW *)malloc(sizeof(JSAMPROW) * dinfo->output_height)) == NULL) + _throw("tjDecompress2(): Memory allocation failure"); + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + for (i = 0; i < (int)dinfo->output_height; i++) { + if (flags & TJFLAG_BOTTOMUP) + row_pointer[i] = &dstBuf[(dinfo->output_height - i - 1) * pitch]; + else + row_pointer[i] = &dstBuf[i * pitch]; + } + while (dinfo->output_scanline < dinfo->output_height) + jpeg_read_scanlines(dinfo, &row_pointer[dinfo->output_scanline], + dinfo->output_height - dinfo->output_scanline); + jpeg_finish_decompress(dinfo); + +bailout: + if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo); + if (row_pointer) free(row_pointer); + if (this->jerr.warning) retval = -1; + this->jerr.stopOnWarning = FALSE; + return retval; } -DLLEXPORT int DLLCALL tjDecompress(tjhandle handle, unsigned char *jpegBuf, - unsigned long jpegSize, unsigned char *dstBuf, int width, int pitch, - int height, int pixelSize, int flags) +DLLEXPORT int tjDecompress(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelSize, + int flags) { - if(flags&TJ_YUV) - return tjDecompressToYUV(handle, jpegBuf, jpegSize, dstBuf, flags); - else - return tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, width, pitch, - height, getPixelFormat(pixelSize, flags), flags); + if (flags & TJ_YUV) + return tjDecompressToYUV(handle, jpegBuf, jpegSize, dstBuf, flags); + else + return tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, width, pitch, + height, getPixelFormat(pixelSize, flags), flags); } static int setDecodeDefaults(struct jpeg_decompress_struct *dinfo, - int pixelFormat, int subsamp, int flags) + int pixelFormat, int subsamp, int flags) { - int i; - - dinfo->scale_num=dinfo->scale_denom=1; - - if(subsamp==TJSAMP_GRAY) - { - dinfo->num_components=dinfo->comps_in_scan=1; - dinfo->jpeg_color_space=JCS_GRAYSCALE; - } - else - { - dinfo->num_components=dinfo->comps_in_scan=3; - dinfo->jpeg_color_space=JCS_YCbCr; - } - - dinfo->comp_info=(jpeg_component_info *) - (*dinfo->mem->alloc_small)((j_common_ptr)dinfo, JPOOL_IMAGE, - dinfo->num_components*sizeof(jpeg_component_info)); - - for(i=0; inum_components; i++) - { - jpeg_component_info *compptr=&dinfo->comp_info[i]; - compptr->h_samp_factor=(i==0)? tjMCUWidth[subsamp]/8:1; - compptr->v_samp_factor=(i==0)? tjMCUHeight[subsamp]/8:1; - compptr->component_index=i; - compptr->component_id=i+1; - compptr->quant_tbl_no=compptr->dc_tbl_no=compptr->ac_tbl_no= - (i==0)? 0:1; - dinfo->cur_comp_info[i]=compptr; - } - dinfo->data_precision=8; - for(i=0; i<2; i++) - { - if(dinfo->quant_tbl_ptrs[i]==NULL) - dinfo->quant_tbl_ptrs[i]=jpeg_alloc_quant_table((j_common_ptr)dinfo); - } - - return 0; + int i; + + dinfo->scale_num = dinfo->scale_denom = 1; + + if (subsamp == TJSAMP_GRAY) { + dinfo->num_components = dinfo->comps_in_scan = 1; + dinfo->jpeg_color_space = JCS_GRAYSCALE; + } else { + dinfo->num_components = dinfo->comps_in_scan = 3; + dinfo->jpeg_color_space = JCS_YCbCr; + } + + dinfo->comp_info = (jpeg_component_info *) + (*dinfo->mem->alloc_small) ((j_common_ptr)dinfo, JPOOL_IMAGE, + dinfo->num_components * + sizeof(jpeg_component_info)); + + for (i = 0; i < dinfo->num_components; i++) { + jpeg_component_info *compptr = &dinfo->comp_info[i]; + + compptr->h_samp_factor = (i == 0) ? tjMCUWidth[subsamp] / 8 : 1; + compptr->v_samp_factor = (i == 0) ? tjMCUHeight[subsamp] / 8 : 1; + compptr->component_index = i; + compptr->component_id = i + 1; + compptr->quant_tbl_no = compptr->dc_tbl_no = + compptr->ac_tbl_no = (i == 0) ? 0 : 1; + dinfo->cur_comp_info[i] = compptr; + } + dinfo->data_precision = 8; + for (i = 0; i < 2; i++) { + if (dinfo->quant_tbl_ptrs[i] == NULL) + dinfo->quant_tbl_ptrs[i] = jpeg_alloc_quant_table((j_common_ptr)dinfo); + } + + return 0; } int my_read_markers(j_decompress_ptr dinfo) { - return JPEG_REACHED_SOS; + return JPEG_REACHED_SOS; } void my_reset_marker_reader(j_decompress_ptr dinfo) { } -DLLEXPORT int DLLCALL tjDecodeYUVPlanes(tjhandle handle, - const unsigned char **srcPlanes, const int *strides, int subsamp, - unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, - int flags) +DLLEXPORT int tjDecodeYUVPlanes(tjhandle handle, + const unsigned char **srcPlanes, + const int *strides, int subsamp, + unsigned char *dstBuf, int width, int pitch, + int height, int pixelFormat, int flags) { - int i, retval=0; JSAMPROW *row_pointer=NULL; - JSAMPLE *_tmpbuf[MAX_COMPONENTS]; - JSAMPROW *tmpbuf[MAX_COMPONENTS], *inbuf[MAX_COMPONENTS]; - int row, pw0, ph0, pw[MAX_COMPONENTS], ph[MAX_COMPONENTS]; - JSAMPLE *ptr; - jpeg_component_info *compptr; - #ifndef JCS_EXTENSIONS - unsigned char *rgbBuf=NULL; - unsigned char *_dstBuf=NULL; int _pitch=0; - #endif - int (*old_read_markers)(j_decompress_ptr); - void (*old_reset_marker_reader)(j_decompress_ptr); - - getdinstance(handle); - - for(i=0; iinit&DECOMPRESS)==0) - _throw("tjDecodeYUVPlanes(): Instance has not been initialized for decompression"); - - if(!srcPlanes || !srcPlanes[0] || subsamp<0 || subsamp>=NUMSUBOPT - || dstBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0 - || pixelFormat>=TJ_NUMPF) - _throw("tjDecodeYUVPlanes(): Invalid argument"); - if(subsamp!=TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2])) - _throw("tjDecodeYUVPlanes(): Invalid argument"); - - if(setjmp(this->jerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - retval=-1; goto bailout; - } - - if(pixelFormat==TJPF_CMYK) - _throw("tjDecodeYUVPlanes(): Cannot decode YUV images into CMYK pixels."); - - if(pitch==0) pitch=width*tjPixelSize[pixelFormat]; - dinfo->image_width=width; - dinfo->image_height=height; - - if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); - else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); - else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); - - if(setDecodeDefaults(dinfo, pixelFormat, subsamp, flags)==-1) - { - retval=-1; goto bailout; - } - old_read_markers=dinfo->marker->read_markers; - dinfo->marker->read_markers=my_read_markers; - old_reset_marker_reader=dinfo->marker->reset_marker_reader; - dinfo->marker->reset_marker_reader=my_reset_marker_reader; - jpeg_read_header(dinfo, TRUE); - dinfo->marker->read_markers=old_read_markers; - dinfo->marker->reset_marker_reader=old_reset_marker_reader; - - if(setDecompDefaults(dinfo, pixelFormat, flags)==-1) - { - retval=-1; goto bailout; - } - dinfo->do_fancy_upsampling=FALSE; - dinfo->Se=DCTSIZE2-1; - jinit_master_decompress(dinfo); - (*dinfo->upsample->start_pass)(dinfo); - - pw0=PAD(width, dinfo->max_h_samp_factor); - ph0=PAD(height, dinfo->max_v_samp_factor); - - if(pitch==0) pitch=dinfo->output_width*tjPixelSize[pixelFormat]; - - #ifndef JCS_EXTENSIONS - if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK && - (RGB_RED!=tjRedOffset[pixelFormat] || - RGB_GREEN!=tjGreenOffset[pixelFormat] || - RGB_BLUE!=tjBlueOffset[pixelFormat] || - RGB_PIXELSIZE!=tjPixelSize[pixelFormat])) - { - rgbBuf=(unsigned char *)malloc(width*height*3); - if(!rgbBuf) _throw("tjDecodeYUVPlanes(): Memory allocation failure"); - _pitch=pitch; pitch=width*3; - _dstBuf=dstBuf; dstBuf=rgbBuf; - } - #endif - - if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph0))==NULL) - _throw("tjDecodeYUVPlanes(): Memory allocation failure"); - for(i=0; inum_components; i++) - { - compptr=&dinfo->comp_info[i]; - _tmpbuf[i]=(JSAMPLE *)malloc(PAD(compptr->width_in_blocks*DCTSIZE, 16) - * compptr->v_samp_factor + 16); - if(!_tmpbuf[i]) _throw("tjDecodeYUVPlanes(): Memory allocation failure"); - tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*compptr->v_samp_factor); - if(!tmpbuf[i]) _throw("tjDecodeYUVPlanes(): Memory allocation failure"); - for(row=0; rowv_samp_factor; row++) - { - unsigned char *_tmpbuf_aligned= - (unsigned char *)PAD((size_t)_tmpbuf[i], 16); - tmpbuf[i][row]=&_tmpbuf_aligned[ - PAD(compptr->width_in_blocks*DCTSIZE, 16) * row]; - } - pw[i]=pw0*compptr->h_samp_factor/dinfo->max_h_samp_factor; - ph[i]=ph0*compptr->v_samp_factor/dinfo->max_v_samp_factor; - inbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph[i]); - if(!inbuf[i]) _throw("tjDecodeYUVPlanes(): Memory allocation failure"); - ptr=(JSAMPLE *)srcPlanes[i]; - for(row=0; rowjerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - retval=-1; goto bailout; - } - - for(row=0; rowmax_v_samp_factor) - { - JDIMENSION inrow=0, outrow=0; - for(i=0, compptr=dinfo->comp_info; inum_components; i++, compptr++) - jcopy_sample_rows(inbuf[i], - row*compptr->v_samp_factor/dinfo->max_v_samp_factor, tmpbuf[i], 0, - compptr->v_samp_factor, pw[i]); - (dinfo->upsample->upsample)(dinfo, tmpbuf, &inrow, - dinfo->max_v_samp_factor, &row_pointer[row], &outrow, - dinfo->max_v_samp_factor); - } - jpeg_abort_decompress(dinfo); - - #ifndef JCS_EXTENSIONS - fromRGB(rgbBuf, _dstBuf, width, _pitch, height, pixelFormat); - #endif - - bailout: - if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo); - #ifndef JCS_EXTENSIONS - if(rgbBuf) free(rgbBuf); - #endif - if(row_pointer) free(row_pointer); - for(i=0; ijerr.warning) retval=-1; - return retval; + JSAMPROW *row_pointer = NULL; + JSAMPLE *_tmpbuf[MAX_COMPONENTS]; + JSAMPROW *tmpbuf[MAX_COMPONENTS], *inbuf[MAX_COMPONENTS]; + int i, retval = 0, row, pw0, ph0, pw[MAX_COMPONENTS], ph[MAX_COMPONENTS]; + JSAMPLE *ptr; + jpeg_component_info *compptr; + int (*old_read_markers) (j_decompress_ptr); + void (*old_reset_marker_reader) (j_decompress_ptr); + + getdinstance(handle); + this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE; + + for (i = 0; i < MAX_COMPONENTS; i++) { + tmpbuf[i] = NULL; _tmpbuf[i] = NULL; inbuf[i] = NULL; + } + + if ((this->init & DECOMPRESS) == 0) + _throw("tjDecodeYUVPlanes(): Instance has not been initialized for decompression"); + + if (!srcPlanes || !srcPlanes[0] || subsamp < 0 || subsamp >= NUMSUBOPT || + dstBuf == NULL || width <= 0 || pitch < 0 || height <= 0 || + pixelFormat < 0 || pixelFormat >= TJ_NUMPF) + _throw("tjDecodeYUVPlanes(): Invalid argument"); + if (subsamp != TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2])) + _throw("tjDecodeYUVPlanes(): Invalid argument"); + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + if (pixelFormat == TJPF_CMYK) + _throw("tjDecodeYUVPlanes(): Cannot decode YUV images into CMYK pixels."); + + if (pitch == 0) pitch = width * tjPixelSize[pixelFormat]; + dinfo->image_width = width; + dinfo->image_height = height; + +#ifndef NO_PUTENV + if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); +#endif + + if (setDecodeDefaults(dinfo, pixelFormat, subsamp, flags) == -1) { + retval = -1; goto bailout; + } + old_read_markers = dinfo->marker->read_markers; + dinfo->marker->read_markers = my_read_markers; + old_reset_marker_reader = dinfo->marker->reset_marker_reader; + dinfo->marker->reset_marker_reader = my_reset_marker_reader; + jpeg_read_header(dinfo, TRUE); + dinfo->marker->read_markers = old_read_markers; + dinfo->marker->reset_marker_reader = old_reset_marker_reader; + + this->dinfo.out_color_space = pf2cs[pixelFormat]; + if (flags & TJFLAG_FASTDCT) this->dinfo.dct_method = JDCT_FASTEST; + dinfo->do_fancy_upsampling = FALSE; + dinfo->Se = DCTSIZE2 - 1; + jinit_master_decompress(dinfo); + (*dinfo->upsample->start_pass) (dinfo); + + pw0 = PAD(width, dinfo->max_h_samp_factor); + ph0 = PAD(height, dinfo->max_v_samp_factor); + + if (pitch == 0) pitch = dinfo->output_width * tjPixelSize[pixelFormat]; + + if ((row_pointer = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph0)) == NULL) + _throw("tjDecodeYUVPlanes(): Memory allocation failure"); + for (i = 0; i < height; i++) { + if (flags & TJFLAG_BOTTOMUP) + row_pointer[i] = &dstBuf[(height - i - 1) * pitch]; + else + row_pointer[i] = &dstBuf[i * pitch]; + } + if (height < ph0) + for (i = height; i < ph0; i++) row_pointer[i] = row_pointer[height - 1]; + + for (i = 0; i < dinfo->num_components; i++) { + compptr = &dinfo->comp_info[i]; + _tmpbuf[i] = + (JSAMPLE *)malloc(PAD(compptr->width_in_blocks * DCTSIZE, 32) * + compptr->v_samp_factor + 32); + if (!_tmpbuf[i]) + _throw("tjDecodeYUVPlanes(): Memory allocation failure"); + tmpbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * compptr->v_samp_factor); + if (!tmpbuf[i]) + _throw("tjDecodeYUVPlanes(): Memory allocation failure"); + for (row = 0; row < compptr->v_samp_factor; row++) { + unsigned char *_tmpbuf_aligned = + (unsigned char *)PAD((size_t)_tmpbuf[i], 32); + + tmpbuf[i][row] = + &_tmpbuf_aligned[PAD(compptr->width_in_blocks * DCTSIZE, 32) * row]; + } + pw[i] = pw0 * compptr->h_samp_factor / dinfo->max_h_samp_factor; + ph[i] = ph0 * compptr->v_samp_factor / dinfo->max_v_samp_factor; + inbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph[i]); + if (!inbuf[i]) + _throw("tjDecodeYUVPlanes(): Memory allocation failure"); + ptr = (JSAMPLE *)srcPlanes[i]; + for (row = 0; row < ph[i]; row++) { + inbuf[i][row] = ptr; + ptr += (strides && strides[i] != 0) ? strides[i] : pw[i]; + } + } + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + for (row = 0; row < ph0; row += dinfo->max_v_samp_factor) { + JDIMENSION inrow = 0, outrow = 0; + + for (i = 0, compptr = dinfo->comp_info; i < dinfo->num_components; + i++, compptr++) + jcopy_sample_rows(inbuf[i], + row * compptr->v_samp_factor / dinfo->max_v_samp_factor, tmpbuf[i], 0, + compptr->v_samp_factor, pw[i]); + (dinfo->upsample->upsample) (dinfo, tmpbuf, &inrow, + dinfo->max_v_samp_factor, &row_pointer[row], + &outrow, dinfo->max_v_samp_factor); + } + jpeg_abort_decompress(dinfo); + +bailout: + if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo); + if (row_pointer) free(row_pointer); + for (i = 0; i < MAX_COMPONENTS; i++) { + if (tmpbuf[i] != NULL) free(tmpbuf[i]); + if (_tmpbuf[i] != NULL) free(_tmpbuf[i]); + if (inbuf[i] != NULL) free(inbuf[i]); + } + if (this->jerr.warning) retval = -1; + this->jerr.stopOnWarning = FALSE; + return retval; } -DLLEXPORT int DLLCALL tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf, - int pad, int subsamp, unsigned char *dstBuf, int width, int pitch, - int height, int pixelFormat, int flags) +DLLEXPORT int tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf, + int pad, int subsamp, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, + int flags) { - const unsigned char *srcPlanes[3]; - int pw0, ph0, strides[3], retval=-1; - - if(srcBuf==NULL || pad<0 || !isPow2(pad) || subsamp<0 || subsamp>=NUMSUBOPT - || width<=0 || height<=0) - _throw("tjDecodeYUV(): Invalid argument"); - - pw0=tjPlaneWidth(0, width, subsamp); - ph0=tjPlaneHeight(0, height, subsamp); - srcPlanes[0]=srcBuf; - strides[0]=PAD(pw0, pad); - if(subsamp==TJSAMP_GRAY) - { - strides[1]=strides[2]=0; - srcPlanes[1]=srcPlanes[2]=NULL; - } - else - { - int pw1=tjPlaneWidth(1, width, subsamp); - int ph1=tjPlaneHeight(1, height, subsamp); - strides[1]=strides[2]=PAD(pw1, pad); - srcPlanes[1]=srcPlanes[0]+strides[0]*ph0; - srcPlanes[2]=srcPlanes[1]+strides[1]*ph1; - } - - return tjDecodeYUVPlanes(handle, srcPlanes, strides, subsamp, dstBuf, width, - pitch, height, pixelFormat, flags); - - bailout: - return retval; + const unsigned char *srcPlanes[3]; + int pw0, ph0, strides[3], retval = -1; + tjinstance *this = (tjinstance *)handle; + + if (!this) _throwg("tjDecodeYUV(): Invalid handle"); + this->isInstanceError = FALSE; + + if (srcBuf == NULL || pad < 0 || !isPow2(pad) || subsamp < 0 || + subsamp >= NUMSUBOPT || width <= 0 || height <= 0) + _throw("tjDecodeYUV(): Invalid argument"); + + pw0 = tjPlaneWidth(0, width, subsamp); + ph0 = tjPlaneHeight(0, height, subsamp); + srcPlanes[0] = srcBuf; + strides[0] = PAD(pw0, pad); + if (subsamp == TJSAMP_GRAY) { + strides[1] = strides[2] = 0; + srcPlanes[1] = srcPlanes[2] = NULL; + } else { + int pw1 = tjPlaneWidth(1, width, subsamp); + int ph1 = tjPlaneHeight(1, height, subsamp); + + strides[1] = strides[2] = PAD(pw1, pad); + srcPlanes[1] = srcPlanes[0] + strides[0] * ph0; + srcPlanes[2] = srcPlanes[1] + strides[1] * ph1; + } + + return tjDecodeYUVPlanes(handle, srcPlanes, strides, subsamp, dstBuf, width, + pitch, height, pixelFormat, flags); + +bailout: + return retval; } -DLLEXPORT int DLLCALL tjDecompressToYUVPlanes(tjhandle handle, - const unsigned char *jpegBuf, unsigned long jpegSize, - unsigned char **dstPlanes, int width, int *strides, int height, int flags) +DLLEXPORT int tjDecompressToYUVPlanes(tjhandle handle, + const unsigned char *jpegBuf, + unsigned long jpegSize, + unsigned char **dstPlanes, int width, + int *strides, int height, int flags) { - int i, sfi, row, retval=0; JSAMPROW *outbuf[MAX_COMPONENTS]; - int jpegwidth, jpegheight, jpegSubsamp, scaledw, scaledh; - int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS], - tmpbufsize=0, usetmpbuf=0, th[MAX_COMPONENTS]; - JSAMPLE *_tmpbuf=NULL, *ptr; JSAMPROW *tmpbuf[MAX_COMPONENTS]; - int dctsize; - - getdinstance(handle); - - for(i=0; iinit&DECOMPRESS)==0) - _throw("tjDecompressToYUVPlanes(): Instance has not been initialized for decompression"); - - if(jpegBuf==NULL || jpegSize<=0 || !dstPlanes || !dstPlanes[0] || width<0 - || height<0) - _throw("tjDecompressToYUVPlanes(): Invalid argument"); - - if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); - else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); - else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); - - if(setjmp(this->jerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - retval=-1; goto bailout; - } - - if(!this->headerRead) - { - jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); - jpeg_read_header(dinfo, TRUE); - } - this->headerRead=0; - jpegSubsamp=getSubsamp(dinfo); - if(jpegSubsamp<0) - _throw("tjDecompressToYUVPlanes(): Could not determine subsampling type for JPEG image"); - - if(jpegSubsamp!=TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2])) - _throw("tjDecompressToYUVPlanes(): Invalid argument"); - - jpegwidth=dinfo->image_width; jpegheight=dinfo->image_height; - if(width==0) width=jpegwidth; - if(height==0) height=jpegheight; - for(i=0; i=NUMSF) - _throw("tjDecompressToYUVPlanes(): Could not scale down to desired image dimensions"); - if(dinfo->num_components>3) - _throw("tjDecompressToYUVPlanes(): JPEG image must have 3 or fewer components"); - - width=scaledw; height=scaledh; - dinfo->scale_num=sf[i].num; - dinfo->scale_denom=sf[i].denom; - sfi=i; - jpeg_calc_output_dimensions(dinfo); - - dctsize=DCTSIZE*sf[sfi].num/sf[sfi].denom; - - for(i=0; inum_components; i++) - { - jpeg_component_info *compptr=&dinfo->comp_info[i]; - int ih; - iw[i]=compptr->width_in_blocks*dctsize; - ih=compptr->height_in_blocks*dctsize; - pw[i]=PAD(dinfo->output_width, dinfo->max_h_samp_factor) - *compptr->h_samp_factor/dinfo->max_h_samp_factor; - ph[i]=PAD(dinfo->output_height, dinfo->max_v_samp_factor) - *compptr->v_samp_factor/dinfo->max_v_samp_factor; - if(iw[i]!=pw[i] || ih!=ph[i]) usetmpbuf=1; - th[i]=compptr->v_samp_factor*dctsize; - tmpbufsize+=iw[i]*th[i]; - if((outbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph[i]))==NULL) - _throw("tjDecompressToYUVPlanes(): Memory allocation failure"); - ptr=dstPlanes[i]; - for(row=0; rownum_components; i++) - { - if((tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*th[i]))==NULL) - _throw("tjDecompressToYUVPlanes(): Memory allocation failure"); - for(row=0; rowjerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - retval=-1; goto bailout; - } - - if(flags&TJFLAG_FASTUPSAMPLE) dinfo->do_fancy_upsampling=FALSE; - if(flags&TJFLAG_FASTDCT) dinfo->dct_method=JDCT_FASTEST; - dinfo->raw_data_out=TRUE; - - jpeg_start_decompress(dinfo); - for(row=0; row<(int)dinfo->output_height; - row+=dinfo->max_v_samp_factor*dinfo->_min_DCT_scaled_size) - { - JSAMPARRAY yuvptr[MAX_COMPONENTS]; - int crow[MAX_COMPONENTS]; - for(i=0; inum_components; i++) - { - jpeg_component_info *compptr=&dinfo->comp_info[i]; - if(jpegSubsamp==TJ_420) - { - /* When 4:2:0 subsampling is used with IDCT scaling, libjpeg will try - to be clever and use the IDCT to perform upsampling on the U and V - planes. For instance, if the output image is to be scaled by 1/2 - relative to the JPEG image, then the scaling factor and upsampling - effectively cancel each other, so a normal 8x8 IDCT can be used. - However, this is not desirable when using the decompress-to-YUV - functionality in TurboJPEG, since we want to output the U and V - planes in their subsampled form. Thus, we have to override some - internal libjpeg parameters to force it to use the "scaled" IDCT - functions on the U and V planes. */ - compptr->_DCT_scaled_size=dctsize; - compptr->MCU_sample_width=tjMCUWidth[jpegSubsamp]* - sf[sfi].num/sf[sfi].denom* - compptr->v_samp_factor/dinfo->max_v_samp_factor; - dinfo->idct->inverse_DCT[i] = dinfo->idct->inverse_DCT[0]; - } - crow[i]=row*compptr->v_samp_factor/dinfo->max_v_samp_factor; - if(usetmpbuf) yuvptr[i]=tmpbuf[i]; - else yuvptr[i]=&outbuf[i][crow[i]]; - } - jpeg_read_raw_data(dinfo, yuvptr, - dinfo->max_v_samp_factor*dinfo->_min_DCT_scaled_size); - if(usetmpbuf) - { - int j; - for(i=0; inum_components; i++) - { - for(j=0; jglobal_state>DSTATE_START) jpeg_abort_decompress(dinfo); - for(i=0; ijerr.warning) retval=-1; - return retval; + int i, sfi, row, retval = 0; + int jpegwidth, jpegheight, jpegSubsamp, scaledw, scaledh; + int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS], + tmpbufsize = 0, usetmpbuf = 0, th[MAX_COMPONENTS]; + JSAMPLE *_tmpbuf = NULL, *ptr; + JSAMPROW *outbuf[MAX_COMPONENTS], *tmpbuf[MAX_COMPONENTS]; + int dctsize; + + getdinstance(handle); + this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE; + + for (i = 0; i < MAX_COMPONENTS; i++) { + tmpbuf[i] = NULL; outbuf[i] = NULL; + } + + if ((this->init & DECOMPRESS) == 0) + _throw("tjDecompressToYUVPlanes(): Instance has not been initialized for decompression"); + + if (jpegBuf == NULL || jpegSize <= 0 || !dstPlanes || !dstPlanes[0] || + width < 0 || height < 0) + _throw("tjDecompressToYUVPlanes(): Invalid argument"); + +#ifndef NO_PUTENV + if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); +#endif + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + if (!this->headerRead) { + jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); + jpeg_read_header(dinfo, TRUE); + } + this->headerRead = 0; + jpegSubsamp = getSubsamp(dinfo); + if (jpegSubsamp < 0) + _throw("tjDecompressToYUVPlanes(): Could not determine subsampling type for JPEG image"); + + if (jpegSubsamp != TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2])) + _throw("tjDecompressToYUVPlanes(): Invalid argument"); + + jpegwidth = dinfo->image_width; jpegheight = dinfo->image_height; + if (width == 0) width = jpegwidth; + if (height == 0) height = jpegheight; + for (i = 0; i < NUMSF; i++) { + scaledw = TJSCALED(jpegwidth, sf[i]); + scaledh = TJSCALED(jpegheight, sf[i]); + if (scaledw <= width && scaledh <= height) + break; + } + if (i >= NUMSF) + _throw("tjDecompressToYUVPlanes(): Could not scale down to desired image dimensions"); + if (dinfo->num_components > 3) + _throw("tjDecompressToYUVPlanes(): JPEG image must have 3 or fewer components"); + + width = scaledw; height = scaledh; + dinfo->scale_num = sf[i].num; + dinfo->scale_denom = sf[i].denom; + sfi = i; + jpeg_calc_output_dimensions(dinfo); + + dctsize = DCTSIZE * sf[sfi].num / sf[sfi].denom; + + for (i = 0; i < dinfo->num_components; i++) { + jpeg_component_info *compptr = &dinfo->comp_info[i]; + int ih; + + iw[i] = compptr->width_in_blocks * dctsize; + ih = compptr->height_in_blocks * dctsize; + pw[i] = PAD(dinfo->output_width, dinfo->max_h_samp_factor) * + compptr->h_samp_factor / dinfo->max_h_samp_factor; + ph[i] = PAD(dinfo->output_height, dinfo->max_v_samp_factor) * + compptr->v_samp_factor / dinfo->max_v_samp_factor; + if (iw[i] != pw[i] || ih != ph[i]) usetmpbuf = 1; + th[i] = compptr->v_samp_factor * dctsize; + tmpbufsize += iw[i] * th[i]; + if ((outbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph[i])) == NULL) + _throw("tjDecompressToYUVPlanes(): Memory allocation failure"); + ptr = dstPlanes[i]; + for (row = 0; row < ph[i]; row++) { + outbuf[i][row] = ptr; + ptr += (strides && strides[i] != 0) ? strides[i] : pw[i]; + } + } + if (usetmpbuf) { + if ((_tmpbuf = (JSAMPLE *)malloc(sizeof(JSAMPLE) * tmpbufsize)) == NULL) + _throw("tjDecompressToYUVPlanes(): Memory allocation failure"); + ptr = _tmpbuf; + for (i = 0; i < dinfo->num_components; i++) { + if ((tmpbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * th[i])) == NULL) + _throw("tjDecompressToYUVPlanes(): Memory allocation failure"); + for (row = 0; row < th[i]; row++) { + tmpbuf[i][row] = ptr; + ptr += iw[i]; + } + } + } + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + if (flags & TJFLAG_FASTUPSAMPLE) dinfo->do_fancy_upsampling = FALSE; + if (flags & TJFLAG_FASTDCT) dinfo->dct_method = JDCT_FASTEST; + dinfo->raw_data_out = TRUE; + + jpeg_start_decompress(dinfo); + for (row = 0; row < (int)dinfo->output_height; + row += dinfo->max_v_samp_factor * dinfo->_min_DCT_scaled_size) { + JSAMPARRAY yuvptr[MAX_COMPONENTS]; + int crow[MAX_COMPONENTS]; + + for (i = 0; i < dinfo->num_components; i++) { + jpeg_component_info *compptr = &dinfo->comp_info[i]; + + if (jpegSubsamp == TJ_420) { + /* When 4:2:0 subsampling is used with IDCT scaling, libjpeg will try + to be clever and use the IDCT to perform upsampling on the U and V + planes. For instance, if the output image is to be scaled by 1/2 + relative to the JPEG image, then the scaling factor and upsampling + effectively cancel each other, so a normal 8x8 IDCT can be used. + However, this is not desirable when using the decompress-to-YUV + functionality in TurboJPEG, since we want to output the U and V + planes in their subsampled form. Thus, we have to override some + internal libjpeg parameters to force it to use the "scaled" IDCT + functions on the U and V planes. */ + compptr->_DCT_scaled_size = dctsize; + compptr->MCU_sample_width = tjMCUWidth[jpegSubsamp] * + sf[sfi].num / sf[sfi].denom * + compptr->v_samp_factor / dinfo->max_v_samp_factor; + dinfo->idct->inverse_DCT[i] = dinfo->idct->inverse_DCT[0]; + } + crow[i] = row * compptr->v_samp_factor / dinfo->max_v_samp_factor; + if (usetmpbuf) yuvptr[i] = tmpbuf[i]; + else yuvptr[i] = &outbuf[i][crow[i]]; + } + jpeg_read_raw_data(dinfo, yuvptr, + dinfo->max_v_samp_factor * dinfo->_min_DCT_scaled_size); + if (usetmpbuf) { + int j; + + for (i = 0; i < dinfo->num_components; i++) { + for (j = 0; j < MIN(th[i], ph[i] - crow[i]); j++) { + memcpy(outbuf[i][crow[i] + j], tmpbuf[i][j], pw[i]); + } + } + } + } + jpeg_finish_decompress(dinfo); + +bailout: + if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo); + for (i = 0; i < MAX_COMPONENTS; i++) { + if (tmpbuf[i]) free(tmpbuf[i]); + if (outbuf[i]) free(outbuf[i]); + } + if (_tmpbuf) free(_tmpbuf); + if (this->jerr.warning) retval = -1; + this->jerr.stopOnWarning = FALSE; + return retval; } -DLLEXPORT int DLLCALL tjDecompressToYUV2(tjhandle handle, - const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, - int width, int pad, int height, int flags) +DLLEXPORT int tjDecompressToYUV2(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pad, int height, int flags) { - unsigned char *dstPlanes[3]; - int pw0, ph0, strides[3], retval=-1, jpegSubsamp=-1; - int i, jpegwidth, jpegheight, scaledw, scaledh; - - getdinstance(handle); - - if(jpegBuf==NULL || jpegSize<=0 || dstBuf==NULL || width<0 || pad<1 - || !isPow2(pad) || height<0) - _throw("tjDecompressToYUV2(): Invalid argument"); - - if(setjmp(this->jerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - return -1; - } - - jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); - jpeg_read_header(dinfo, TRUE); - jpegSubsamp=getSubsamp(dinfo); - if(jpegSubsamp<0) - _throw("tjDecompressToYUV2(): Could not determine subsampling type for JPEG image"); - - jpegwidth=dinfo->image_width; jpegheight=dinfo->image_height; - if(width==0) width=jpegwidth; - if(height==0) height=jpegheight; - - for(i=0; i=NUMSF) - _throw("tjDecompressToYUV2(): Could not scale down to desired image dimensions"); - - pw0=tjPlaneWidth(0, width, jpegSubsamp); - ph0=tjPlaneHeight(0, height, jpegSubsamp); - dstPlanes[0]=dstBuf; - strides[0]=PAD(pw0, pad); - if(jpegSubsamp==TJSAMP_GRAY) - { - strides[1]=strides[2]=0; - dstPlanes[1]=dstPlanes[2]=NULL; - } - else - { - int pw1=tjPlaneWidth(1, width, jpegSubsamp); - int ph1=tjPlaneHeight(1, height, jpegSubsamp); - strides[1]=strides[2]=PAD(pw1, pad); - dstPlanes[1]=dstPlanes[0]+strides[0]*ph0; - dstPlanes[2]=dstPlanes[1]+strides[1]*ph1; - } - - this->headerRead=1; - return tjDecompressToYUVPlanes(handle, jpegBuf, jpegSize, dstPlanes, width, - strides, height, flags); - - bailout: - return retval; - + unsigned char *dstPlanes[3]; + int pw0, ph0, strides[3], retval = -1, jpegSubsamp = -1; + int i, jpegwidth, jpegheight, scaledw, scaledh; + + getdinstance(handle); + this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE; + + if (jpegBuf == NULL || jpegSize <= 0 || dstBuf == NULL || width < 0 || + pad < 1 || !isPow2(pad) || height < 0) + _throw("tjDecompressToYUV2(): Invalid argument"); + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + return -1; + } + + jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); + jpeg_read_header(dinfo, TRUE); + jpegSubsamp = getSubsamp(dinfo); + if (jpegSubsamp < 0) + _throw("tjDecompressToYUV2(): Could not determine subsampling type for JPEG image"); + + jpegwidth = dinfo->image_width; jpegheight = dinfo->image_height; + if (width == 0) width = jpegwidth; + if (height == 0) height = jpegheight; + + for (i = 0; i < NUMSF; i++) { + scaledw = TJSCALED(jpegwidth, sf[i]); + scaledh = TJSCALED(jpegheight, sf[i]); + if (scaledw <= width && scaledh <= height) + break; + } + if (i >= NUMSF) + _throw("tjDecompressToYUV2(): Could not scale down to desired image dimensions"); + + pw0 = tjPlaneWidth(0, width, jpegSubsamp); + ph0 = tjPlaneHeight(0, height, jpegSubsamp); + dstPlanes[0] = dstBuf; + strides[0] = PAD(pw0, pad); + if (jpegSubsamp == TJSAMP_GRAY) { + strides[1] = strides[2] = 0; + dstPlanes[1] = dstPlanes[2] = NULL; + } else { + int pw1 = tjPlaneWidth(1, width, jpegSubsamp); + int ph1 = tjPlaneHeight(1, height, jpegSubsamp); + + strides[1] = strides[2] = PAD(pw1, pad); + dstPlanes[1] = dstPlanes[0] + strides[0] * ph0; + dstPlanes[2] = dstPlanes[1] + strides[1] * ph1; + } + + this->headerRead = 1; + return tjDecompressToYUVPlanes(handle, jpegBuf, jpegSize, dstPlanes, width, + strides, height, flags); + +bailout: + this->jerr.stopOnWarning = FALSE; + return retval; } -DLLEXPORT int DLLCALL tjDecompressToYUV(tjhandle handle, - unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, - int flags) +DLLEXPORT int tjDecompressToYUV(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int flags) { - return tjDecompressToYUV2(handle, jpegBuf, jpegSize, dstBuf, 0, 4, 0, flags); + return tjDecompressToYUV2(handle, jpegBuf, jpegSize, dstBuf, 0, 4, 0, flags); } /* Transformer */ -DLLEXPORT tjhandle DLLCALL tjInitTransform(void) +DLLEXPORT tjhandle tjInitTransform(void) +{ + tjinstance *this = NULL; + tjhandle handle = NULL; + + if ((this = (tjinstance *)malloc(sizeof(tjinstance))) == NULL) { + snprintf(errStr, JMSG_LENGTH_MAX, + "tjInitTransform(): Memory allocation failure"); + return NULL; + } + MEMZERO(this, sizeof(tjinstance)); + snprintf(this->errStr, JMSG_LENGTH_MAX, "No error"); + handle = _tjInitCompress(this); + if (!handle) return NULL; + handle = _tjInitDecompress(this); + return handle; +} + + +DLLEXPORT int tjTransform(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, int n, + unsigned char **dstBufs, unsigned long *dstSizes, + tjtransform *t, int flags) +{ + jpeg_transform_info *xinfo = NULL; + jvirt_barray_ptr *srccoefs, *dstcoefs; + int retval = 0, i, jpegSubsamp, saveMarkers = 0; + + getinstance(handle); + this->jerr.stopOnWarning = (flags & TJFLAG_STOPONWARNING) ? TRUE : FALSE; + if ((this->init & COMPRESS) == 0 || (this->init & DECOMPRESS) == 0) + _throw("tjTransform(): Instance has not been initialized for transformation"); + + if (jpegBuf == NULL || jpegSize <= 0 || n < 1 || dstBufs == NULL || + dstSizes == NULL || t == NULL || flags < 0) + _throw("tjTransform(): Invalid argument"); + +#ifndef NO_PUTENV + if (flags & TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); + else if (flags & TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); + else if (flags & TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); +#endif + + if ((xinfo = + (jpeg_transform_info *)malloc(sizeof(jpeg_transform_info) * n)) == NULL) + _throw("tjTransform(): Memory allocation failure"); + MEMZERO(xinfo, sizeof(jpeg_transform_info) * n); + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); + + for (i = 0; i < n; i++) { + xinfo[i].transform = xformtypes[t[i].op]; + xinfo[i].perfect = (t[i].options & TJXOPT_PERFECT) ? 1 : 0; + xinfo[i].trim = (t[i].options & TJXOPT_TRIM) ? 1 : 0; + xinfo[i].force_grayscale = (t[i].options & TJXOPT_GRAY) ? 1 : 0; + xinfo[i].crop = (t[i].options & TJXOPT_CROP) ? 1 : 0; + if (n != 1 && t[i].op == TJXOP_HFLIP) xinfo[i].slow_hflip = 1; + else xinfo[i].slow_hflip = 0; + + if (xinfo[i].crop) { + xinfo[i].crop_xoffset = t[i].r.x; xinfo[i].crop_xoffset_set = JCROP_POS; + xinfo[i].crop_yoffset = t[i].r.y; xinfo[i].crop_yoffset_set = JCROP_POS; + if (t[i].r.w != 0) { + xinfo[i].crop_width = t[i].r.w; xinfo[i].crop_width_set = JCROP_POS; + } else + xinfo[i].crop_width = JCROP_UNSET; + if (t[i].r.h != 0) { + xinfo[i].crop_height = t[i].r.h; xinfo[i].crop_height_set = JCROP_POS; + } else + xinfo[i].crop_height = JCROP_UNSET; + } + if (!(t[i].options & TJXOPT_COPYNONE)) saveMarkers = 1; + } + + jcopy_markers_setup(dinfo, saveMarkers ? JCOPYOPT_ALL : JCOPYOPT_NONE); + jpeg_read_header(dinfo, TRUE); + jpegSubsamp = getSubsamp(dinfo); + if (jpegSubsamp < 0) + _throw("tjTransform(): Could not determine subsampling type for JPEG image"); + + for (i = 0; i < n; i++) { + if (!jtransform_request_workspace(dinfo, &xinfo[i])) + _throw("tjTransform(): Transform is not perfect"); + + if (xinfo[i].crop) { + if ((t[i].r.x % xinfo[i].iMCU_sample_width) != 0 || + (t[i].r.y % xinfo[i].iMCU_sample_height) != 0) { + snprintf(errStr, JMSG_LENGTH_MAX, + "To crop this JPEG image, x must be a multiple of %d\n" + "and y must be a multiple of %d.\n", + xinfo[i].iMCU_sample_width, xinfo[i].iMCU_sample_height); + retval = -1; goto bailout; + } + } + } + + srccoefs = jpeg_read_coefficients(dinfo); + + for (i = 0; i < n; i++) { + int w, h, alloc = 1; + + if (!xinfo[i].crop) { + w = dinfo->image_width; h = dinfo->image_height; + } else { + w = xinfo[i].crop_width; h = xinfo[i].crop_height; + } + if (flags & TJFLAG_NOREALLOC) { + alloc = 0; dstSizes[i] = tjBufSize(w, h, jpegSubsamp); + } + if (!(t[i].options & TJXOPT_NOOUTPUT)) + jpeg_mem_dest_tj(cinfo, &dstBufs[i], &dstSizes[i], alloc); + jpeg_copy_critical_parameters(dinfo, cinfo); + dstcoefs = jtransform_adjust_parameters(dinfo, cinfo, srccoefs, &xinfo[i]); + if (flags & TJFLAG_PROGRESSIVE || t[i].options & TJXOPT_PROGRESSIVE) + jpeg_simple_progression(cinfo); + if (!(t[i].options & TJXOPT_NOOUTPUT)) { + jpeg_write_coefficients(cinfo, dstcoefs); + jcopy_markers_execute(dinfo, cinfo, t[i].options & TJXOPT_COPYNONE ? + JCOPYOPT_NONE : JCOPYOPT_ALL); + } else + jinit_c_master_control(cinfo, TRUE); + jtransform_execute_transformation(dinfo, cinfo, srccoefs, &xinfo[i]); + if (t[i].customFilter) { + int ci, y; + JDIMENSION by; + + for (ci = 0; ci < cinfo->num_components; ci++) { + jpeg_component_info *compptr = &cinfo->comp_info[ci]; + tjregion arrayRegion = { + 0, 0, compptr->width_in_blocks * DCTSIZE, DCTSIZE + }; + tjregion planeRegion = { + 0, 0, compptr->width_in_blocks * DCTSIZE, + compptr->height_in_blocks * DCTSIZE + }; + + for (by = 0; by < compptr->height_in_blocks; + by += compptr->v_samp_factor) { + JBLOCKARRAY barray = (dinfo->mem->access_virt_barray) + ((j_common_ptr)dinfo, dstcoefs[ci], by, compptr->v_samp_factor, + TRUE); + + for (y = 0; y < compptr->v_samp_factor; y++) { + if (t[i].customFilter(barray[y][0], arrayRegion, planeRegion, ci, + i, &t[i]) == -1) + _throw("tjTransform(): Error in custom filter"); + arrayRegion.y += DCTSIZE; + } + } + } + } + if (!(t[i].options & TJXOPT_NOOUTPUT)) jpeg_finish_compress(cinfo); + } + + jpeg_finish_decompress(dinfo); + +bailout: + if (cinfo->global_state > CSTATE_START) jpeg_abort_compress(cinfo); + if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo); + if (xinfo) free(xinfo); + if (this->jerr.warning) retval = -1; + this->jerr.stopOnWarning = FALSE; + return retval; +} + + +DLLEXPORT unsigned char *tjLoadImage(const char *filename, int *width, + int align, int *height, int *pixelFormat, + int flags) { - tjinstance *this=NULL; tjhandle handle=NULL; - if((this=(tjinstance *)malloc(sizeof(tjinstance)))==NULL) - { - snprintf(errStr, JMSG_LENGTH_MAX, - "tjInitTransform(): Memory allocation failure"); - return NULL; - } - MEMZERO(this, sizeof(tjinstance)); - handle=_tjInitCompress(this); - if(!handle) return NULL; - handle=_tjInitDecompress(this); - return handle; + int retval = 0, tempc, pitch; + tjhandle handle = NULL; + tjinstance *this; + j_compress_ptr cinfo = NULL; + cjpeg_source_ptr src; + unsigned char *dstBuf = NULL; + FILE *file = NULL; + boolean invert; + + if (!filename || !width || align < 1 || !height || !pixelFormat || + *pixelFormat < TJPF_UNKNOWN || *pixelFormat >= TJ_NUMPF) + _throwg("tjLoadImage(): Invalid argument"); + if ((align & (align - 1)) != 0) + _throwg("tjLoadImage(): Alignment must be a power of 2"); + + if ((handle = tjInitCompress()) == NULL) return NULL; + this = (tjinstance *)handle; + cinfo = &this->cinfo; + + if ((file = fopen(filename, "rb")) == NULL) + _throwunix("tjLoadImage(): Cannot open input file"); + + if ((tempc = getc(file)) < 0 || ungetc(tempc, file) == EOF) + _throwunix("tjLoadImage(): Could not read input file") + else if (tempc == EOF) + _throwg("tjLoadImage(): Input file contains no data"); + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + if (*pixelFormat == TJPF_UNKNOWN) cinfo->in_color_space = JCS_UNKNOWN; + else cinfo->in_color_space = pf2cs[*pixelFormat]; + if (tempc == 'B') { + if ((src = jinit_read_bmp(cinfo, FALSE)) == NULL) + _throwg("tjLoadImage(): Could not initialize bitmap loader"); + invert = (flags & TJFLAG_BOTTOMUP) == 0; + } else if (tempc == 'P') { + if ((src = jinit_read_ppm(cinfo)) == NULL) + _throwg("tjLoadImage(): Could not initialize bitmap loader"); + invert = (flags & TJFLAG_BOTTOMUP) != 0; + } else + _throwg("tjLoadImage(): Unsupported file type"); + + src->input_file = file; + (*src->start_input) (cinfo, src); + (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo); + + *width = cinfo->image_width; *height = cinfo->image_height; + *pixelFormat = cs2pf[cinfo->in_color_space]; + + pitch = PAD((*width) * tjPixelSize[*pixelFormat], align); + if ((dstBuf = (unsigned char *)malloc(pitch * (*height))) == NULL) + _throwg("tjLoadImage(): Memory allocation failure"); + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + while (cinfo->next_scanline < cinfo->image_height) { + int i, nlines = (*src->get_pixel_rows) (cinfo, src); + + for (i = 0; i < nlines; i++) { + unsigned char *dstptr; + int row; + + row = cinfo->next_scanline + i; + if (invert) dstptr = &dstBuf[((*height) - row - 1) * pitch]; + else dstptr = &dstBuf[row * pitch]; + memcpy(dstptr, src->buffer[i], (*width) * tjPixelSize[*pixelFormat]); + } + cinfo->next_scanline += nlines; + } + + (*src->finish_input) (cinfo, src); + +bailout: + if (handle) tjDestroy(handle); + if (file) fclose(file); + if (retval < 0 && dstBuf) { free(dstBuf); dstBuf = NULL; } + return dstBuf; } -DLLEXPORT int DLLCALL tjTransform(tjhandle handle, - const unsigned char *jpegBuf, unsigned long jpegSize, int n, - unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *t, int flags) +DLLEXPORT int tjSaveImage(const char *filename, unsigned char *buffer, + int width, int pitch, int height, int pixelFormat, + int flags) { - jpeg_transform_info *xinfo=NULL; - jvirt_barray_ptr *srccoefs, *dstcoefs; - int retval=0, i, jpegSubsamp; - - getinstance(handle); - if((this->init&COMPRESS)==0 || (this->init&DECOMPRESS)==0) - _throw("tjTransform(): Instance has not been initialized for transformation"); - - if(jpegBuf==NULL || jpegSize<=0 || n<1 || dstBufs==NULL || dstSizes==NULL - || t==NULL || flags<0) - _throw("tjTransform(): Invalid argument"); - - if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); - else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1"); - else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); - - if((xinfo=(jpeg_transform_info *)malloc(sizeof(jpeg_transform_info)*n)) - ==NULL) - _throw("tjTransform(): Memory allocation failure"); - MEMZERO(xinfo, sizeof(jpeg_transform_info)*n); - - if(setjmp(this->jerr.setjmp_buffer)) - { - /* If we get here, the JPEG code has signaled an error. */ - retval=-1; goto bailout; - } - - jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize); - - for(i=0; iimage_width; h=dinfo->image_height; - } - else - { - w=xinfo[i].crop_width; h=xinfo[i].crop_height; - } - if(flags&TJFLAG_NOREALLOC) - { - alloc=0; dstSizes[i]=tjBufSize(w, h, jpegSubsamp); - } - if(!(t[i].options&TJXOPT_NOOUTPUT)) - jpeg_mem_dest_tj(cinfo, &dstBufs[i], &dstSizes[i], alloc); - jpeg_copy_critical_parameters(dinfo, cinfo); - dstcoefs=jtransform_adjust_parameters(dinfo, cinfo, srccoefs, - &xinfo[i]); - if(!(t[i].options&TJXOPT_NOOUTPUT)) - { - jpeg_write_coefficients(cinfo, dstcoefs); - jcopy_markers_execute(dinfo, cinfo, JCOPYOPT_ALL); - } - else jinit_c_master_control(cinfo, TRUE); - jtransform_execute_transformation(dinfo, cinfo, srccoefs, - &xinfo[i]); - if(t[i].customFilter) - { - int ci, y; JDIMENSION by; - for(ci=0; cinum_components; ci++) - { - jpeg_component_info *compptr=&cinfo->comp_info[ci]; - tjregion arrayRegion={0, 0, compptr->width_in_blocks*DCTSIZE, - DCTSIZE}; - tjregion planeRegion={0, 0, compptr->width_in_blocks*DCTSIZE, - compptr->height_in_blocks*DCTSIZE}; - for(by=0; byheight_in_blocks; by+=compptr->v_samp_factor) - { - JBLOCKARRAY barray=(dinfo->mem->access_virt_barray) - ((j_common_ptr)dinfo, dstcoefs[ci], by, compptr->v_samp_factor, - TRUE); - for(y=0; yv_samp_factor; y++) - { - if(t[i].customFilter(barray[y][0], arrayRegion, planeRegion, - ci, i, &t[i])==-1) - _throw("tjTransform(): Error in custom filter"); - arrayRegion.y+=DCTSIZE; - } - } - } - } - if(!(t[i].options&TJXOPT_NOOUTPUT)) jpeg_finish_compress(cinfo); - } - - jpeg_finish_decompress(dinfo); - - bailout: - if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo); - if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo); - if(xinfo) free(xinfo); - if(this->jerr.warning) retval=-1; - return retval; + int retval = 0; + tjhandle handle = NULL; + tjinstance *this; + j_decompress_ptr dinfo = NULL; + djpeg_dest_ptr dst; + FILE *file = NULL; + char *ptr = NULL; + boolean invert; + + if (!filename || !buffer || width < 1 || pitch < 0 || height < 1 || + pixelFormat < 0 || pixelFormat >= TJ_NUMPF) + _throwg("tjSaveImage(): Invalid argument"); + + if ((handle = tjInitDecompress()) == NULL) + return -1; + this = (tjinstance *)handle; + dinfo = &this->dinfo; + + if ((file = fopen(filename, "wb")) == NULL) + _throwunix("tjSaveImage(): Cannot open output file"); + + if (setjmp(this->jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. */ + retval = -1; goto bailout; + } + + this->dinfo.out_color_space = pf2cs[pixelFormat]; + dinfo->image_width = width; dinfo->image_height = height; + dinfo->global_state = DSTATE_READY; + dinfo->scale_num = dinfo->scale_denom = 1; + + ptr = strrchr(filename, '.'); + if (ptr && !strcasecmp(ptr, ".bmp")) { + if ((dst = jinit_write_bmp(dinfo, FALSE, FALSE)) == NULL) + _throwg("tjSaveImage(): Could not initialize bitmap writer"); + invert = (flags & TJFLAG_BOTTOMUP) == 0; + } else { + if ((dst = jinit_write_ppm(dinfo)) == NULL) + _throwg("tjSaveImage(): Could not initialize PPM writer"); + invert = (flags & TJFLAG_BOTTOMUP) != 0; + } + + dst->output_file = file; + (*dst->start_output) (dinfo, dst); + (*dinfo->mem->realize_virt_arrays) ((j_common_ptr)dinfo); + + if (pitch == 0) pitch = width * tjPixelSize[pixelFormat]; + + while (dinfo->output_scanline < dinfo->output_height) { + unsigned char *rowptr; + + if (invert) + rowptr = &buffer[(height - dinfo->output_scanline - 1) * pitch]; + else + rowptr = &buffer[dinfo->output_scanline * pitch]; + memcpy(dst->buffer[0], rowptr, width * tjPixelSize[pixelFormat]); + (*dst->put_pixel_rows) (dinfo, dst, 1); + dinfo->output_scanline++; + } + + (*dst->finish_output) (dinfo, dst); + +bailout: + if (handle) tjDestroy(handle); + if (file) fclose(file); + return retval; } diff --git a/turbojpeg.h b/turbojpeg.h index 307dc6f..9c0a371 100644 --- a/turbojpeg.h +++ b/turbojpeg.h @@ -30,7 +30,7 @@ #define __TURBOJPEG_H__ #if defined(_WIN32) && defined(DLLDEFINE) -#define DLLEXPORT __declspec(dllexport) +#define DLLEXPORT __declspec(dllexport) #else #define DLLEXPORT #endif @@ -78,7 +78,7 @@ /** * The number of chrominance subsampling options */ -#define TJ_NUMSAMP 6 +#define TJ_NUMSAMP 6 /** * Chrominance subsampling options. @@ -89,14 +89,13 @@ * (the human eye is more sensitive to small changes in brightness than to * small changes in color.) This is called "chrominance subsampling". */ -enum TJSAMP -{ +enum TJSAMP { /** * 4:4:4 chrominance subsampling (no chrominance subsampling). The JPEG or * YUV image will contain one chrominance component for every pixel in the * source image. */ - TJSAMP_444=0, + TJSAMP_444 = 0, /** * 4:2:2 chrominance subsampling. The JPEG or YUV image will contain one * chrominance component for every 2x1 block of pixels in the source image. @@ -141,7 +140,7 @@ enum TJSAMP * - 16x16 for 4:2:0 * - 32x8 for 4:1:1 */ -static const int tjMCUWidth[TJ_NUMSAMP] = {8, 16, 16, 8, 8, 32}; +static const int tjMCUWidth[TJ_NUMSAMP] = { 8, 16, 16, 8, 8, 32 }; /** * MCU block height (in pixels) for a given level of chrominance subsampling. @@ -152,25 +151,24 @@ static const int tjMCUWidth[TJ_NUMSAMP] = {8, 16, 16, 8, 8, 32}; * - 16x16 for 4:2:0 * - 32x8 for 4:1:1 */ -static const int tjMCUHeight[TJ_NUMSAMP] = {8, 8, 16, 8, 16, 8}; +static const int tjMCUHeight[TJ_NUMSAMP] = { 8, 8, 16, 8, 16, 8 }; /** * The number of pixel formats */ -#define TJ_NUMPF 12 +#define TJ_NUMPF 12 /** * Pixel formats */ -enum TJPF -{ +enum TJPF { /** * RGB pixel format. The red, green, and blue components in the image are * stored in 3-byte pixels in the order R, G, B from lowest to highest byte * address within each pixel. */ - TJPF_RGB=0, + TJPF_RGB = 0, /** * BGR pixel format. The red, green, and blue components in the image are * stored in 3-byte pixels in the order B, G, R from lowest to highest byte @@ -249,48 +247,72 @@ enum TJPF * CMYK pixels into a YCCK JPEG image (see #TJCS_YCCK) and decompressing YCCK * JPEG images into CMYK pixels. */ - TJPF_CMYK + TJPF_CMYK, + /** + * Unknown pixel format. Currently this is only used by #tjLoadImage(). + */ + TJPF_UNKNOWN = -1 }; - /** * Red offset (in bytes) for a given pixel format. This specifies the number * of bytes that the red component is offset from the start of the pixel. For * instance, if a pixel of format TJ_BGRX is stored in char pixel[], - * then the red component will be pixel[tjRedOffset[TJ_BGRX]]. + * then the red component will be pixel[tjRedOffset[TJ_BGRX]]. This + * will be -1 if the pixel format does not have a red component. */ -static const int tjRedOffset[TJ_NUMPF] = {0, 2, 0, 2, 3, 1, 0, 0, 2, 3, 1, -1}; +static const int tjRedOffset[TJ_NUMPF] = { + 0, 2, 0, 2, 3, 1, -1, 0, 2, 3, 1, -1 +}; /** * Green offset (in bytes) for a given pixel format. This specifies the number * of bytes that the green component is offset from the start of the pixel. * For instance, if a pixel of format TJ_BGRX is stored in * char pixel[], then the green component will be - * pixel[tjGreenOffset[TJ_BGRX]]. + * pixel[tjGreenOffset[TJ_BGRX]]. This will be -1 if the pixel format + * does not have a green component. */ -static const int tjGreenOffset[TJ_NUMPF] = {1, 1, 1, 1, 2, 2, 0, 1, 1, 2, 2, -1}; +static const int tjGreenOffset[TJ_NUMPF] = { + 1, 1, 1, 1, 2, 2, -1, 1, 1, 2, 2, -1 +}; /** * Blue offset (in bytes) for a given pixel format. This specifies the number * of bytes that the Blue component is offset from the start of the pixel. For * instance, if a pixel of format TJ_BGRX is stored in char pixel[], - * then the blue component will be pixel[tjBlueOffset[TJ_BGRX]]. + * then the blue component will be pixel[tjBlueOffset[TJ_BGRX]]. This + * will be -1 if the pixel format does not have a blue component. + */ +static const int tjBlueOffset[TJ_NUMPF] = { + 2, 0, 2, 0, 1, 3, -1, 2, 0, 1, 3, -1 +}; +/** + * Alpha offset (in bytes) for a given pixel format. This specifies the number + * of bytes that the Alpha component is offset from the start of the pixel. + * For instance, if a pixel of format TJ_BGRA is stored in + * char pixel[], then the alpha component will be + * pixel[tjAlphaOffset[TJ_BGRA]]. This will be -1 if the pixel format + * does not have an alpha component. */ -static const int tjBlueOffset[TJ_NUMPF] = {2, 0, 2, 0, 1, 3, 0, 2, 0, 1, 3, -1}; +static const int tjAlphaOffset[TJ_NUMPF] = { + -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1 +}; /** - * Pixel size (in bytes) for a given pixel format. + * Pixel size (in bytes) for a given pixel format */ -static const int tjPixelSize[TJ_NUMPF] = {3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4}; +static const int tjPixelSize[TJ_NUMPF] = { + 3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4 +}; /** * The number of JPEG colorspaces */ -#define TJ_NUMCS 5 +#define TJ_NUMCS 5 /** * JPEG colorspaces */ -enum TJCS -{ +enum TJCS { /** * RGB colorspace. When compressing the JPEG image, the R, G, and B * components in the source image are reordered into image planes, but no @@ -298,7 +320,7 @@ enum TJCS * decompressed to any of the extended RGB pixel formats or grayscale, but * they cannot be decompressed to YUV images. */ - TJCS_RGB=0, + TJCS_RGB = 0, /** * YCbCr colorspace. YCbCr is not an absolute colorspace but rather a * mathematical transformation of RGB designed solely for storage and @@ -347,7 +369,7 @@ enum TJCS * The uncompressed source/destination image is stored in bottom-up (Windows, * OpenGL) order, not top-down (X11) order. */ -#define TJFLAG_BOTTOMUP 2 +#define TJFLAG_BOTTOMUP 2 /** * When decompressing an image that was compressed using chrominance * subsampling, use the fastest chrominance upsampling algorithm available in @@ -363,7 +385,7 @@ enum TJCS * attempting to allocate or reallocate that buffer. This reproduces the * behavior of earlier versions of TurboJPEG. */ -#define TJFLAG_NOREALLOC 1024 +#define TJFLAG_NOREALLOC 1024 /** * Use the fastest DCT/IDCT algorithm available in the underlying codec. The * default if this flag is not specified is implementation-specific. For @@ -372,7 +394,7 @@ enum TJCS * only a very slight effect on accuracy, but it uses the accurate algorithm * when decompressing, because this has been shown to have a larger effect. */ -#define TJFLAG_FASTDCT 2048 +#define TJFLAG_FASTDCT 2048 /** * Use the most accurate DCT/IDCT algorithm available in the underlying codec. * The default if this flag is not specified is implementation-specific. For @@ -381,23 +403,57 @@ enum TJCS * only a very slight effect on accuracy, but it uses the accurate algorithm * when decompressing, because this has been shown to have a larger effect. */ -#define TJFLAG_ACCURATEDCT 4096 +#define TJFLAG_ACCURATEDCT 4096 +/** + * Immediately discontinue the current compression/decompression/transform + * operation if the underlying codec throws a warning (non-fatal error). The + * default behavior is to allow the operation to complete unless a fatal error + * is encountered. + */ +#define TJFLAG_STOPONWARNING 8192 +/** + * Use progressive entropy coding in JPEG images generated by the compression + * and transform functions. Progressive entropy coding will generally improve + * compression relative to baseline entropy coding (the default), but it will + * reduce compression and decompression performance considerably. + */ +#define TJFLAG_PROGRESSIVE 16384 + + +/** + * The number of error codes + */ +#define TJ_NUMERR 2 + +/** + * Error codes + */ +enum TJERR { + /** + * The error was non-fatal and recoverable, but the image may still be + * corrupt. + */ + TJERR_WARNING = 0, + /** + * The error was fatal and non-recoverable. + */ + TJERR_FATAL +}; /** * The number of transform operations */ -#define TJ_NUMXOP 8 +#define TJ_NUMXOP 8 /** * Transform operations for #tjTransform() */ -enum TJXOP -{ +enum TJXOP { /** * Do not transform the position of the image pixels */ - TJXOP_NONE=0, + TJXOP_NONE = 0, /** * Flip (mirror) image horizontally. This transform is imperfect if there * are any partial MCU blocks on the right edge (see #TJXOPT_PERFECT.) @@ -456,31 +512,44 @@ enum TJXOP * This option will cause #tjTransform() to discard any partial MCU blocks that * cannot be transformed. */ -#define TJXOPT_TRIM 2 +#define TJXOPT_TRIM 2 /** * This option will enable lossless cropping. See #tjTransform() for more * information. */ -#define TJXOPT_CROP 4 +#define TJXOPT_CROP 4 /** * This option will discard the color data in the input image and produce * a grayscale output image. */ -#define TJXOPT_GRAY 8 +#define TJXOPT_GRAY 8 /** * This option will prevent #tjTransform() from outputting a JPEG image for * this particular transform (this can be used in conjunction with a custom * filter to capture the transformed DCT coefficients without transcoding * them.) */ -#define TJXOPT_NOOUTPUT 16 +#define TJXOPT_NOOUTPUT 16 +/** + * This option will enable progressive entropy coding in the output image + * generated by this particular transform. Progressive entropy coding will + * generally improve compression relative to baseline entropy coding (the + * default), but it will reduce compression and decompression performance + * considerably. + */ +#define TJXOPT_PROGRESSIVE 32 +/** + * This option will prevent #tjTransform() from copying any extra markers + * (including EXIF and ICC profile data) from the source image to the output + * image. + */ +#define TJXOPT_COPYNONE 64 /** * Scaling factor */ -typedef struct -{ +typedef struct { /** * Numerator */ @@ -494,8 +563,7 @@ typedef struct /** * Cropping region */ -typedef struct -{ +typedef struct { /** * The left boundary of the cropping region. This must be evenly divisible * by the MCU block width (see #tjMCUWidth.) @@ -521,8 +589,7 @@ typedef struct /** * Lossless transform */ -typedef struct tjtransform -{ +typedef struct tjtransform { /** * Cropping region */ @@ -573,29 +640,30 @@ typedef struct tjtransform * * @return 0 if the callback was successful, or -1 if an error occurred. */ - int (*customFilter)(short *coeffs, tjregion arrayRegion, - tjregion planeRegion, int componentIndex, int transformIndex, - struct tjtransform *transform); + int (*customFilter) (short *coeffs, tjregion arrayRegion, + tjregion planeRegion, int componentIndex, + int transformIndex, struct tjtransform *transform); } tjtransform; /** * TurboJPEG instance handle */ -typedef void* tjhandle; +typedef void *tjhandle; /** * Pad the given width to the nearest 32-bit boundary */ -#define TJPAD(width) (((width)+3)&(~3)) +#define TJPAD(width) (((width) + 3) & (~3)) /** * Compute the scaled value of dimension using the given scaling * factor. This macro performs the integer equivalent of ceil(dimension * * scalingFactor). */ -#define TJSCALED(dimension, scalingFactor) ((dimension * scalingFactor.num \ - + scalingFactor.denom - 1) / scalingFactor.denom) +#define TJSCALED(dimension, scalingFactor) \ + ((dimension * scalingFactor.num + scalingFactor.denom - 1) / \ + scalingFactor.denom) #ifdef __cplusplus @@ -607,9 +675,9 @@ extern "C" { * Create a TurboJPEG compressor instance. * * @return a handle to the newly-created instance, or NULL if an error - * occurred (see #tjGetErrorStr().) + * occurred (see #tjGetErrorStr2().) */ -DLLEXPORT tjhandle DLLCALL tjInitCompress(void); +DLLEXPORT tjhandle tjInitCompress(void); /** @@ -669,11 +737,13 @@ DLLEXPORT tjhandle DLLCALL tjInitCompress(void); * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT * "flags" * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) */ -DLLEXPORT int DLLCALL tjCompress2(tjhandle handle, const unsigned char *srcBuf, - int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf, - unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags); +DLLEXPORT int tjCompress2(tjhandle handle, const unsigned char *srcBuf, + int width, int pitch, int height, int pixelFormat, + unsigned char **jpegBuf, unsigned long *jpegSize, + int jpegSubsamp, int jpegQual, int flags); /** @@ -733,11 +803,14 @@ DLLEXPORT int DLLCALL tjCompress2(tjhandle handle, const unsigned char *srcBuf, * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT * "flags" * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) */ -DLLEXPORT int DLLCALL tjCompressFromYUV(tjhandle handle, - const unsigned char *srcBuf, int width, int pad, int height, int subsamp, - unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, int flags); +DLLEXPORT int tjCompressFromYUV(tjhandle handle, const unsigned char *srcBuf, + int width, int pad, int height, int subsamp, + unsigned char **jpegBuf, + unsigned long *jpegSize, int jpegQual, + int flags); /** @@ -803,12 +876,16 @@ DLLEXPORT int DLLCALL tjCompressFromYUV(tjhandle handle, * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT * "flags" * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) */ -DLLEXPORT int DLLCALL tjCompressFromYUVPlanes(tjhandle handle, - const unsigned char **srcPlanes, int width, const int *strides, int height, - int subsamp, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, - int flags); +DLLEXPORT int tjCompressFromYUVPlanes(tjhandle handle, + const unsigned char **srcPlanes, + int width, const int *strides, + int height, int subsamp, + unsigned char **jpegBuf, + unsigned long *jpegSize, int jpegQual, + int flags); /** @@ -833,8 +910,7 @@ DLLEXPORT int DLLCALL tjCompressFromYUVPlanes(tjhandle handle, * @return the maximum size of the buffer (in bytes) required to hold the * image, or -1 if the arguments are out of bounds. */ -DLLEXPORT unsigned long DLLCALL tjBufSize(int width, int height, - int jpegSubsamp); +DLLEXPORT unsigned long tjBufSize(int width, int height, int jpegSubsamp); /** @@ -854,8 +930,8 @@ DLLEXPORT unsigned long DLLCALL tjBufSize(int width, int height, * @return the size of the buffer (in bytes) required to hold the image, or * -1 if the arguments are out of bounds. */ -DLLEXPORT unsigned long DLLCALL tjBufSizeYUV2(int width, int pad, int height, - int subsamp); +DLLEXPORT unsigned long tjBufSizeYUV2(int width, int pad, int height, + int subsamp); /** @@ -879,8 +955,8 @@ DLLEXPORT unsigned long DLLCALL tjBufSizeYUV2(int width, int pad, int height, * @return the size of the buffer (in bytes) required to hold the YUV image * plane, or -1 if the arguments are out of bounds. */ -DLLEXPORT unsigned long DLLCALL tjPlaneSizeYUV(int componentID, int width, - int stride, int height, int subsamp); +DLLEXPORT unsigned long tjPlaneSizeYUV(int componentID, int width, int stride, + int height, int subsamp); /** @@ -963,11 +1039,13 @@ DLLEXPORT int tjPlaneHeight(int componentID, int height, int subsamp); * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT * "flags" * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) */ -DLLEXPORT int DLLCALL tjEncodeYUV3(tjhandle handle, - const unsigned char *srcBuf, int width, int pitch, int height, - int pixelFormat, unsigned char *dstBuf, int pad, int subsamp, int flags); +DLLEXPORT int tjEncodeYUV3(tjhandle handle, const unsigned char *srcBuf, + int width, int pitch, int height, int pixelFormat, + unsigned char *dstBuf, int pad, int subsamp, + int flags); /** @@ -1021,21 +1099,22 @@ DLLEXPORT int DLLCALL tjEncodeYUV3(tjhandle handle, * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT * "flags" * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) */ -DLLEXPORT int DLLCALL tjEncodeYUVPlanes(tjhandle handle, - const unsigned char *srcBuf, int width, int pitch, int height, - int pixelFormat, unsigned char **dstPlanes, int *strides, int subsamp, - int flags); +DLLEXPORT int tjEncodeYUVPlanes(tjhandle handle, const unsigned char *srcBuf, + int width, int pitch, int height, + int pixelFormat, unsigned char **dstPlanes, + int *strides, int subsamp, int flags); /** * Create a TurboJPEG decompressor instance. * * @return a handle to the newly-created instance, or NULL if an error - * occurred (see #tjGetErrorStr().) + * occurred (see #tjGetErrorStr2().) */ -DLLEXPORT tjhandle DLLCALL tjInitDecompress(void); +DLLEXPORT tjhandle tjInitDecompress(void); /** @@ -1061,11 +1140,14 @@ DLLEXPORT tjhandle DLLCALL tjInitDecompress(void); * of the JPEG colorspace constants, indicating the colorspace of the JPEG * image (see @ref TJCS "JPEG colorspaces".) * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) */ -DLLEXPORT int DLLCALL tjDecompressHeader3(tjhandle handle, - const unsigned char *jpegBuf, unsigned long jpegSize, int *width, - int *height, int *jpegSubsamp, int *jpegColorspace); +DLLEXPORT int tjDecompressHeader3(tjhandle handle, + const unsigned char *jpegBuf, + unsigned long jpegSize, int *width, + int *height, int *jpegSubsamp, + int *jpegColorspace); /** @@ -1076,9 +1158,9 @@ DLLEXPORT int DLLCALL tjDecompressHeader3(tjhandle handle, * the number of elements in the list * * @return a pointer to a list of fractional scaling factors, or NULL if an - * error is encountered (see #tjGetErrorStr().) + * error is encountered (see #tjGetErrorStr2().) */ -DLLEXPORT tjscalingfactor* DLLCALL tjGetScalingFactors(int *numscalingfactors); +DLLEXPORT tjscalingfactor *tjGetScalingFactors(int *numscalingfactors); /** @@ -1128,11 +1210,13 @@ DLLEXPORT tjscalingfactor* DLLCALL tjGetScalingFactors(int *numscalingfactors); * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT * "flags" * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) */ -DLLEXPORT int DLLCALL tjDecompress2(tjhandle handle, - const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, - int width, int pitch, int height, int pixelFormat, int flags); +DLLEXPORT int tjDecompress2(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, + int flags); /** @@ -1178,11 +1262,12 @@ DLLEXPORT int DLLCALL tjDecompress2(tjhandle handle, * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT * "flags" * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) */ -DLLEXPORT int DLLCALL tjDecompressToYUV2(tjhandle handle, - const unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, - int width, int pad, int height, int flags); +DLLEXPORT int tjDecompressToYUV2(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pad, int height, int flags); /** @@ -1234,11 +1319,14 @@ DLLEXPORT int DLLCALL tjDecompressToYUV2(tjhandle handle, * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT * "flags" * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) */ -DLLEXPORT int DLLCALL tjDecompressToYUVPlanes(tjhandle handle, - const unsigned char *jpegBuf, unsigned long jpegSize, - unsigned char **dstPlanes, int width, int *strides, int height, int flags); +DLLEXPORT int tjDecompressToYUVPlanes(tjhandle handle, + const unsigned char *jpegBuf, + unsigned long jpegSize, + unsigned char **dstPlanes, int width, + int *strides, int height, int flags); /** @@ -1286,11 +1374,13 @@ DLLEXPORT int DLLCALL tjDecompressToYUVPlanes(tjhandle handle, * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT * "flags" * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) */ -DLLEXPORT int DLLCALL tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf, - int pad, int subsamp, unsigned char *dstBuf, int width, int pitch, - int height, int pixelFormat, int flags); +DLLEXPORT int tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf, + int pad, int subsamp, unsigned char *dstBuf, + int width, int pitch, int height, int pixelFormat, + int flags); /** @@ -1343,21 +1433,23 @@ DLLEXPORT int DLLCALL tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf, * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT * "flags" * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) */ -DLLEXPORT int DLLCALL tjDecodeYUVPlanes(tjhandle handle, - const unsigned char **srcPlanes, const int *strides, int subsamp, - unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, - int flags); +DLLEXPORT int tjDecodeYUVPlanes(tjhandle handle, + const unsigned char **srcPlanes, + const int *strides, int subsamp, + unsigned char *dstBuf, int width, int pitch, + int height, int pixelFormat, int flags); /** * Create a new TurboJPEG transformer instance. * * @return a handle to the newly-created instance, or NULL if an error - * occurred (see #tjGetErrorStr().) + * occurred (see #tjGetErrorStr2().) */ -DLLEXPORT tjhandle DLLCALL tjInitTransform(void); +DLLEXPORT tjhandle tjInitTransform(void); /** @@ -1417,12 +1509,13 @@ DLLEXPORT tjhandle DLLCALL tjInitTransform(void); * @param flags the bitwise OR of one or more of the @ref TJFLAG_ACCURATEDCT * "flags" * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2() + * and #tjGetErrorCode().) */ -DLLEXPORT int DLLCALL tjTransform(tjhandle handle, - const unsigned char *jpegBuf, unsigned long jpegSize, int n, - unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *transforms, - int flags); +DLLEXPORT int tjTransform(tjhandle handle, const unsigned char *jpegBuf, + unsigned long jpegSize, int n, + unsigned char **dstBufs, unsigned long *dstSizes, + tjtransform *transforms, int flags); /** @@ -1431,9 +1524,9 @@ DLLEXPORT int DLLCALL tjTransform(tjhandle handle, * @param handle a handle to a TurboJPEG compressor, decompressor or * transformer instance * - * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2().) */ -DLLEXPORT int DLLCALL tjDestroy(tjhandle handle); +DLLEXPORT int tjDestroy(tjhandle handle); /** @@ -1449,7 +1542,92 @@ DLLEXPORT int DLLCALL tjDestroy(tjhandle handle); * * @sa tjFree() */ -DLLEXPORT unsigned char* DLLCALL tjAlloc(int bytes); +DLLEXPORT unsigned char *tjAlloc(int bytes); + + +/** + * Load an uncompressed image from disk into memory. + * + * @param filename name of a file containing an uncompressed image in Windows + * BMP or PBMPLUS (PPM/PGM) format + * + * @param width pointer to an integer variable that will receive the width (in + * pixels) of the uncompressed image + * + * @param align row alignment of the image buffer to be returned (must be a + * power of 2.) For instance, setting this parameter to 4 will cause all rows + * in the image buffer to be padded to the nearest 32-bit boundary, and setting + * this parameter to 1 will cause all rows in the image buffer to be unpadded. + * + * @param height pointer to an integer variable that will receive the height + * (in pixels) of the uncompressed image + * + * @param pixelFormat pointer to an integer variable that specifies or will + * receive the pixel format of the uncompressed image buffer. The behavior of + * #tjLoadImage() will vary depending on the value of *pixelFormat + * passed to the function: + * - @ref TJPF_UNKNOWN : The uncompressed image buffer returned by the function + * will use the most optimal pixel format for the file type, and + * *pixelFormat will contain the ID of this pixel format upon + * successful return from the function. + * - @ref TJPF_GRAY : Only PGM files and 8-bit BMP files with a grayscale + * colormap can be loaded. + * - @ref TJPF_CMYK : The RGB or grayscale pixels stored in the file will be + * converted using a quick & dirty algorithm that is suitable only for testing + * purposes (proper conversion between CMYK and other formats requires a color + * management system.) + * - Other @ref TJPF "pixel formats" : The uncompressed image buffer will use + * the specified pixel format, and pixel format conversion will be performed if + * necessary. + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags". + * + * @return a pointer to a newly-allocated buffer containing the uncompressed + * image, converted to the chosen pixel format and with the chosen row + * alignment, or NULL if an error occurred (see #tjGetErrorStr2().) This + * buffer should be freed using #tjFree(). + */ +DLLEXPORT unsigned char *tjLoadImage(const char *filename, int *width, + int align, int *height, int *pixelFormat, + int flags); + + +/** + * Save an uncompressed image from memory to disk. + * + * @param filename name of a file to which to save the uncompressed image. + * The image will be stored in Windows BMP or PBMPLUS (PPM/PGM) format, + * depending on the file extension. + * + * @param buffer pointer to an image buffer containing RGB, grayscale, or + * CMYK pixels to be saved + * + * @param width width (in pixels) of the uncompressed image + * + * @param pitch bytes per line in the image buffer. Setting this parameter to + * 0 is the equivalent of setting it to + * width * #tjPixelSize[pixelFormat]. + * + * @param height height (in pixels) of the uncompressed image + * + * @param pixelFormat pixel format of the image buffer (see @ref TJPF + * "Pixel formats".) If this parameter is set to @ref TJPF_GRAY, then the + * image will be stored in PGM or 8-bit (indexed color) BMP format. Otherwise, + * the image will be stored in PPM or 24-bit BMP format. If this parameter + * is set to @ref TJPF_CMYK, then the CMYK pixels will be converted to RGB + * using a quick & dirty algorithm that is suitable only for testing (proper + * conversion between CMYK and other formats requires a color management + * system.) + * + * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP + * "flags". + * + * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr2().) + */ +DLLEXPORT int tjSaveImage(const char *filename, unsigned char *buffer, + int width, int pitch, int height, int pixelFormat, + int flags); /** @@ -1462,76 +1640,97 @@ DLLEXPORT unsigned char* DLLCALL tjAlloc(int bytes); * * @sa tjAlloc() */ -DLLEXPORT void DLLCALL tjFree(unsigned char *buffer); +DLLEXPORT void tjFree(unsigned char *buffer); /** * Returns a descriptive error message explaining why the last command failed. * + * @param handle a handle to a TurboJPEG compressor, decompressor, or + * transformer instance, or NULL if the error was generated by a global + * function (but note that retrieving the error message for a global function + * is not thread-safe.) + * * @return a descriptive error message explaining why the last command failed. */ -DLLEXPORT char* DLLCALL tjGetErrorStr(void); +DLLEXPORT char *tjGetErrorStr2(tjhandle handle); + + +/** + * Returns a code indicating the severity of the last error. See + * @ref TJERR "Error codes". + * + * @param handle a handle to a TurboJPEG compressor, decompressor or + * transformer instance + * + * @return a code indicating the severity of the last error. See + * @ref TJERR "Error codes". + */ +DLLEXPORT int tjGetErrorCode(tjhandle handle); /* Deprecated functions and macros */ -#define TJFLAG_FORCEMMX 8 -#define TJFLAG_FORCESSE 16 -#define TJFLAG_FORCESSE2 32 -#define TJFLAG_FORCESSE3 128 +#define TJFLAG_FORCEMMX 8 +#define TJFLAG_FORCESSE 16 +#define TJFLAG_FORCESSE2 32 +#define TJFLAG_FORCESSE3 128 /* Backward compatibility functions and macros (nothing to see here) */ -#define NUMSUBOPT TJ_NUMSAMP -#define TJ_444 TJSAMP_444 -#define TJ_422 TJSAMP_422 -#define TJ_420 TJSAMP_420 -#define TJ_411 TJSAMP_420 -#define TJ_GRAYSCALE TJSAMP_GRAY - -#define TJ_BGR 1 -#define TJ_BOTTOMUP TJFLAG_BOTTOMUP -#define TJ_FORCEMMX TJFLAG_FORCEMMX -#define TJ_FORCESSE TJFLAG_FORCESSE -#define TJ_FORCESSE2 TJFLAG_FORCESSE2 -#define TJ_ALPHAFIRST 64 -#define TJ_FORCESSE3 TJFLAG_FORCESSE3 -#define TJ_FASTUPSAMPLE TJFLAG_FASTUPSAMPLE -#define TJ_YUV 512 - -DLLEXPORT unsigned long DLLCALL TJBUFSIZE(int width, int height); - -DLLEXPORT unsigned long DLLCALL TJBUFSIZEYUV(int width, int height, - int jpegSubsamp); - -DLLEXPORT unsigned long DLLCALL tjBufSizeYUV(int width, int height, - int subsamp); - -DLLEXPORT int DLLCALL tjCompress(tjhandle handle, unsigned char *srcBuf, - int width, int pitch, int height, int pixelSize, unsigned char *dstBuf, - unsigned long *compressedSize, int jpegSubsamp, int jpegQual, int flags); - -DLLEXPORT int DLLCALL tjEncodeYUV(tjhandle handle, - unsigned char *srcBuf, int width, int pitch, int height, int pixelSize, - unsigned char *dstBuf, int subsamp, int flags); - -DLLEXPORT int DLLCALL tjEncodeYUV2(tjhandle handle, - unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, - unsigned char *dstBuf, int subsamp, int flags); - -DLLEXPORT int DLLCALL tjDecompressHeader(tjhandle handle, - unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height); - -DLLEXPORT int DLLCALL tjDecompressHeader2(tjhandle handle, - unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height, - int *jpegSubsamp); - -DLLEXPORT int DLLCALL tjDecompress(tjhandle handle, - unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, - int width, int pitch, int height, int pixelSize, int flags); - -DLLEXPORT int DLLCALL tjDecompressToYUV(tjhandle handle, - unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, - int flags); +#define NUMSUBOPT TJ_NUMSAMP +#define TJ_444 TJSAMP_444 +#define TJ_422 TJSAMP_422 +#define TJ_420 TJSAMP_420 +#define TJ_411 TJSAMP_420 +#define TJ_GRAYSCALE TJSAMP_GRAY + +#define TJ_BGR 1 +#define TJ_BOTTOMUP TJFLAG_BOTTOMUP +#define TJ_FORCEMMX TJFLAG_FORCEMMX +#define TJ_FORCESSE TJFLAG_FORCESSE +#define TJ_FORCESSE2 TJFLAG_FORCESSE2 +#define TJ_ALPHAFIRST 64 +#define TJ_FORCESSE3 TJFLAG_FORCESSE3 +#define TJ_FASTUPSAMPLE TJFLAG_FASTUPSAMPLE +#define TJ_YUV 512 + +DLLEXPORT unsigned long TJBUFSIZE(int width, int height); + +DLLEXPORT unsigned long TJBUFSIZEYUV(int width, int height, int jpegSubsamp); + +DLLEXPORT unsigned long tjBufSizeYUV(int width, int height, int subsamp); + +DLLEXPORT int tjCompress(tjhandle handle, unsigned char *srcBuf, int width, + int pitch, int height, int pixelSize, + unsigned char *dstBuf, unsigned long *compressedSize, + int jpegSubsamp, int jpegQual, int flags); + +DLLEXPORT int tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, int width, + int pitch, int height, int pixelSize, + unsigned char *dstBuf, int subsamp, int flags); + +DLLEXPORT int tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, int width, + int pitch, int height, int pixelFormat, + unsigned char *dstBuf, int subsamp, int flags); + +DLLEXPORT int tjDecompressHeader(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, int *width, + int *height); + +DLLEXPORT int tjDecompressHeader2(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, int *width, + int *height, int *jpegSubsamp); + +DLLEXPORT int tjDecompress(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int width, int pitch, int height, int pixelSize, + int flags); + +DLLEXPORT int tjDecompressToYUV(tjhandle handle, unsigned char *jpegBuf, + unsigned long jpegSize, unsigned char *dstBuf, + int flags); + +DLLEXPORT char *tjGetErrorStr(void); /** diff --git a/usage.txt b/usage.txt index ed97aa9..3cbdb53 100644 --- a/usage.txt +++ b/usage.txt @@ -339,7 +339,7 @@ Switches for advanced users: been compressed using lower quality levels. -dct float Use floating-point DCT method. The float method is mainly a legacy feature. It does -  not produce significantly more accurate results than + not produce significantly more accurate results than the int method, and it is much slower. The float method may also give different results on different machines due to varying roundoff behavior, whereas the diff --git a/win/jconfig.h.in b/win/jconfig.h.in index 9d35121..6db0b34 100644 --- a/win/jconfig.h.in +++ b/win/jconfig.h.in @@ -1,41 +1,30 @@ -/* jconfig.vc --- jconfig.h for Microsoft Visual C++ on Windows 95 or NT. */ -/* see jconfig.txt for explanations */ +#define JPEG_LIB_VERSION @JPEG_LIB_VERSION@ +#define LIBJPEG_TURBO_VERSION @VERSION@ +#define LIBJPEG_TURBO_VERSION_NUMBER @LIBJPEG_TURBO_VERSION_NUMBER@ -#define JPEG_LIB_VERSION @JPEG_LIB_VERSION@ -#define LIBJPEG_TURBO_VERSION @VERSION@ -#define LIBJPEG_TURBO_VERSION_NUMBER @LIBJPEG_TURBO_VERSION_NUMBER@ #cmakedefine C_ARITH_CODING_SUPPORTED #cmakedefine D_ARITH_CODING_SUPPORTED #cmakedefine MEM_SRCDST_SUPPORTED - -/* - * Define BITS_IN_JSAMPLE as either - * 8 for 8-bit sample values (the usual setting) - * 12 for 12-bit sample values - * Only 8 and 12 are legal data precisions for lossy JPEG according to the - * JPEG standard, and the IJG code does not support anything else! - * We do not support run-time selection of data precision, sorry. - */ +#cmakedefine WITH_SIMD #define BITS_IN_JSAMPLE @BITS_IN_JSAMPLE@ /* use 8 or 12 */ -#define HAVE_UNSIGNED_CHAR -#define HAVE_UNSIGNED_SHORT -/* #define void char */ -/* #define const */ -#undef __CHAR_UNSIGNED__ #define HAVE_STDDEF_H #define HAVE_STDLIB_H -#undef NEED_BSD_STRINGS #undef NEED_SYS_TYPES_H -#undef NEED_FAR_POINTERS /* we presume a 32-bit flat memory model */ +#undef NEED_BSD_STRINGS + +#define HAVE_UNSIGNED_CHAR +#define HAVE_UNSIGNED_SHORT #undef INCOMPLETE_TYPES_BROKEN +#undef RIGHT_SHIFT_IS_UNSIGNED +#undef __CHAR_UNSIGNED__ /* Define "boolean" as unsigned char, not int, per Windows custom */ -#ifndef __RPCNDR_H__ /* don't conflict if rpcndr.h already read */ +#ifndef __RPCNDR_H__ /* don't conflict if rpcndr.h already read */ typedef unsigned char boolean; #endif -#define HAVE_BOOLEAN /* prevent jmorecfg.h from redefining it */ +#define HAVE_BOOLEAN /* prevent jmorecfg.h from redefining it */ /* Define "INT32" as int, not long, per Windows custom */ #if !(defined(_BASETSD_H_) || defined(_BASETSD_H)) /* don't conflict if basetsd.h already read */ @@ -43,9 +32,3 @@ typedef short INT16; typedef signed int INT32; #endif #define XMD_H /* prevent jmorecfg.h from redefining it */ - -#ifdef JPEG_INTERNALS - -#undef RIGHT_SHIFT_IS_UNSIGNED - -#endif /* JPEG_INTERNALS */ diff --git a/win/jconfigint.h.in b/win/jconfigint.h.in deleted file mode 100644 index 2131bf5..0000000 --- a/win/jconfigint.h.in +++ /dev/null @@ -1,13 +0,0 @@ -#define VERSION "@VERSION@" -#define BUILD "@BUILD@" -#define PACKAGE_NAME "@CMAKE_PROJECT_NAME@" - -#ifndef INLINE -#if defined(__GNUC__) -#define INLINE inline __attribute__((always_inline)) -#elif defined(_MSC_VER) -#define INLINE __forceinline -#else -#define INLINE -#endif -#endif diff --git a/win/jpeg62-memsrcdst.def b/win/jpeg62-memsrcdst.def index 6499316..4d24a14 100755 --- a/win/jpeg62-memsrcdst.def +++ b/win/jpeg62-memsrcdst.def @@ -1,106 +1,108 @@ EXPORTS - jcopy_block_row @ 1 ; - jcopy_sample_rows @ 2 ; - jdiv_round_up @ 3 ; - jinit_1pass_quantizer @ 4 ; - jinit_2pass_quantizer @ 5 ; - jinit_c_coef_controller @ 6 ; - jinit_c_main_controller @ 7 ; - jinit_c_master_control @ 8 ; - jinit_c_prep_controller @ 9 ; - jinit_color_converter @ 10 ; - jinit_color_deconverter @ 11 ; - jinit_compress_master @ 12 ; - jinit_d_coef_controller @ 13 ; - jinit_d_main_controller @ 14 ; - jinit_d_post_controller @ 15 ; - jinit_downsampler @ 16 ; - jinit_forward_dct @ 17 ; - jinit_huff_decoder @ 18 ; - jinit_huff_encoder @ 19 ; - jinit_input_controller @ 20 ; - jinit_inverse_dct @ 21 ; - jinit_marker_reader @ 22 ; - jinit_marker_writer @ 23 ; - jinit_master_decompress @ 24 ; - jinit_memory_mgr @ 25 ; - jinit_merged_upsampler @ 26 ; - jinit_phuff_decoder @ 27 ; - jinit_phuff_encoder @ 28 ; - jinit_upsampler @ 29 ; - jpeg_CreateCompress @ 30 ; - jpeg_CreateDecompress @ 31 ; - jpeg_abort @ 32 ; - jpeg_abort_compress @ 33 ; - jpeg_abort_decompress @ 34 ; - jpeg_add_quant_table @ 35 ; - jpeg_alloc_huff_table @ 36 ; - jpeg_alloc_quant_table @ 37 ; - jpeg_calc_output_dimensions @ 38 ; - jpeg_consume_input @ 39 ; - jpeg_copy_critical_parameters @ 40 ; - jpeg_default_colorspace @ 41 ; - jpeg_destroy @ 42 ; - jpeg_destroy_compress @ 43 ; - jpeg_destroy_decompress @ 44 ; - jpeg_fdct_float @ 45 ; - jpeg_fdct_ifast @ 46 ; - jpeg_fdct_islow @ 47 ; - jpeg_fill_bit_buffer @ 48 ; - jpeg_finish_compress @ 49 ; - jpeg_finish_decompress @ 50 ; - jpeg_finish_output @ 51 ; - jpeg_free_large @ 52 ; - jpeg_free_small @ 53 ; - jpeg_gen_optimal_table @ 54 ; - jpeg_get_large @ 55 ; - jpeg_get_small @ 56 ; - jpeg_has_multiple_scans @ 57 ; - jpeg_huff_decode @ 58 ; - jpeg_idct_1x1 @ 59 ; - jpeg_idct_2x2 @ 60 ; - jpeg_idct_4x4 @ 61 ; - jpeg_idct_float @ 62 ; - jpeg_idct_ifast @ 63 ; - jpeg_idct_islow @ 64 ; - jpeg_input_complete @ 65 ; - jpeg_make_c_derived_tbl @ 66 ; - jpeg_make_d_derived_tbl @ 67 ; - jpeg_mem_available @ 68 ; - jpeg_mem_init @ 69 ; - jpeg_mem_term @ 70 ; - jpeg_new_colormap @ 71 ; - jpeg_open_backing_store @ 72 ; - jpeg_quality_scaling @ 73 ; - jpeg_read_coefficients @ 74 ; - jpeg_read_header @ 75 ; - jpeg_read_raw_data @ 76 ; - jpeg_read_scanlines @ 77 ; - jpeg_resync_to_restart @ 78 ; - jpeg_save_markers @ 79 ; - jpeg_set_colorspace @ 80 ; - jpeg_set_defaults @ 81 ; - jpeg_set_linear_quality @ 82 ; - jpeg_set_marker_processor @ 83 ; - jpeg_set_quality @ 84 ; - jpeg_simple_progression @ 85 ; - jpeg_start_compress @ 86 ; - jpeg_start_decompress @ 87 ; - jpeg_start_output @ 88 ; - jpeg_std_error @ 89 ; - jpeg_stdio_dest @ 90 ; - jpeg_stdio_src @ 91 ; - jpeg_suppress_tables @ 92 ; - jpeg_write_coefficients @ 93 ; - jpeg_write_m_byte @ 94 ; - jpeg_write_m_header @ 95 ; - jpeg_write_marker @ 96 ; - jpeg_write_raw_data @ 97 ; - jpeg_write_scanlines @ 98 ; - jpeg_write_tables @ 99 ; - jround_up @ 100 ; - jzero_far @ 101 ; - jpeg_mem_dest @ 102 ; - jpeg_mem_src @ 103 ; - jpeg_skip_scanlines @ 104 ; - jpeg_crop_scanline @ 105 ; + jcopy_block_row @ 1 ; + jcopy_sample_rows @ 2 ; + jdiv_round_up @ 3 ; + jinit_1pass_quantizer @ 4 ; + jinit_2pass_quantizer @ 5 ; + jinit_c_coef_controller @ 6 ; + jinit_c_main_controller @ 7 ; + jinit_c_master_control @ 8 ; + jinit_c_prep_controller @ 9 ; + jinit_color_converter @ 10 ; + jinit_color_deconverter @ 11 ; + jinit_compress_master @ 12 ; + jinit_d_coef_controller @ 13 ; + jinit_d_main_controller @ 14 ; + jinit_d_post_controller @ 15 ; + jinit_downsampler @ 16 ; + jinit_forward_dct @ 17 ; + jinit_huff_decoder @ 18 ; + jinit_huff_encoder @ 19 ; + jinit_input_controller @ 20 ; + jinit_inverse_dct @ 21 ; + jinit_marker_reader @ 22 ; + jinit_marker_writer @ 23 ; + jinit_master_decompress @ 24 ; + jinit_memory_mgr @ 25 ; + jinit_merged_upsampler @ 26 ; + jinit_phuff_decoder @ 27 ; + jinit_phuff_encoder @ 28 ; + jinit_upsampler @ 29 ; + jpeg_CreateCompress @ 30 ; + jpeg_CreateDecompress @ 31 ; + jpeg_abort @ 32 ; + jpeg_abort_compress @ 33 ; + jpeg_abort_decompress @ 34 ; + jpeg_add_quant_table @ 35 ; + jpeg_alloc_huff_table @ 36 ; + jpeg_alloc_quant_table @ 37 ; + jpeg_calc_output_dimensions @ 38 ; + jpeg_consume_input @ 39 ; + jpeg_copy_critical_parameters @ 40 ; + jpeg_default_colorspace @ 41 ; + jpeg_destroy @ 42 ; + jpeg_destroy_compress @ 43 ; + jpeg_destroy_decompress @ 44 ; + jpeg_fdct_float @ 45 ; + jpeg_fdct_ifast @ 46 ; + jpeg_fdct_islow @ 47 ; + jpeg_fill_bit_buffer @ 48 ; + jpeg_finish_compress @ 49 ; + jpeg_finish_decompress @ 50 ; + jpeg_finish_output @ 51 ; + jpeg_free_large @ 52 ; + jpeg_free_small @ 53 ; + jpeg_gen_optimal_table @ 54 ; + jpeg_get_large @ 55 ; + jpeg_get_small @ 56 ; + jpeg_has_multiple_scans @ 57 ; + jpeg_huff_decode @ 58 ; + jpeg_idct_1x1 @ 59 ; + jpeg_idct_2x2 @ 60 ; + jpeg_idct_4x4 @ 61 ; + jpeg_idct_float @ 62 ; + jpeg_idct_ifast @ 63 ; + jpeg_idct_islow @ 64 ; + jpeg_input_complete @ 65 ; + jpeg_make_c_derived_tbl @ 66 ; + jpeg_make_d_derived_tbl @ 67 ; + jpeg_mem_available @ 68 ; + jpeg_mem_init @ 69 ; + jpeg_mem_term @ 70 ; + jpeg_new_colormap @ 71 ; + jpeg_open_backing_store @ 72 ; + jpeg_quality_scaling @ 73 ; + jpeg_read_coefficients @ 74 ; + jpeg_read_header @ 75 ; + jpeg_read_raw_data @ 76 ; + jpeg_read_scanlines @ 77 ; + jpeg_resync_to_restart @ 78 ; + jpeg_save_markers @ 79 ; + jpeg_set_colorspace @ 80 ; + jpeg_set_defaults @ 81 ; + jpeg_set_linear_quality @ 82 ; + jpeg_set_marker_processor @ 83 ; + jpeg_set_quality @ 84 ; + jpeg_simple_progression @ 85 ; + jpeg_start_compress @ 86 ; + jpeg_start_decompress @ 87 ; + jpeg_start_output @ 88 ; + jpeg_std_error @ 89 ; + jpeg_stdio_dest @ 90 ; + jpeg_stdio_src @ 91 ; + jpeg_suppress_tables @ 92 ; + jpeg_write_coefficients @ 93 ; + jpeg_write_m_byte @ 94 ; + jpeg_write_m_header @ 95 ; + jpeg_write_marker @ 96 ; + jpeg_write_raw_data @ 97 ; + jpeg_write_scanlines @ 98 ; + jpeg_write_tables @ 99 ; + jround_up @ 100 ; + jzero_far @ 101 ; + jpeg_mem_dest @ 102 ; + jpeg_mem_src @ 103 ; + jpeg_skip_scanlines @ 104 ; + jpeg_crop_scanline @ 105 ; + jpeg_read_icc_profile @ 106 ; + jpeg_write_icc_profile @ 107 ; diff --git a/win/jpeg62.def b/win/jpeg62.def index 9f30b1a..f3c69b2 100755 --- a/win/jpeg62.def +++ b/win/jpeg62.def @@ -1,104 +1,106 @@ EXPORTS - jcopy_block_row @ 1 ; - jcopy_sample_rows @ 2 ; - jdiv_round_up @ 3 ; - jinit_1pass_quantizer @ 4 ; - jinit_2pass_quantizer @ 5 ; - jinit_c_coef_controller @ 6 ; - jinit_c_main_controller @ 7 ; - jinit_c_master_control @ 8 ; - jinit_c_prep_controller @ 9 ; - jinit_color_converter @ 10 ; - jinit_color_deconverter @ 11 ; - jinit_compress_master @ 12 ; - jinit_d_coef_controller @ 13 ; - jinit_d_main_controller @ 14 ; - jinit_d_post_controller @ 15 ; - jinit_downsampler @ 16 ; - jinit_forward_dct @ 17 ; - jinit_huff_decoder @ 18 ; - jinit_huff_encoder @ 19 ; - jinit_input_controller @ 20 ; - jinit_inverse_dct @ 21 ; - jinit_marker_reader @ 22 ; - jinit_marker_writer @ 23 ; - jinit_master_decompress @ 24 ; - jinit_memory_mgr @ 25 ; - jinit_merged_upsampler @ 26 ; - jinit_phuff_decoder @ 27 ; - jinit_phuff_encoder @ 28 ; - jinit_upsampler @ 29 ; - jpeg_CreateCompress @ 30 ; - jpeg_CreateDecompress @ 31 ; - jpeg_abort @ 32 ; - jpeg_abort_compress @ 33 ; - jpeg_abort_decompress @ 34 ; - jpeg_add_quant_table @ 35 ; - jpeg_alloc_huff_table @ 36 ; - jpeg_alloc_quant_table @ 37 ; - jpeg_calc_output_dimensions @ 38 ; - jpeg_consume_input @ 39 ; - jpeg_copy_critical_parameters @ 40 ; - jpeg_default_colorspace @ 41 ; - jpeg_destroy @ 42 ; - jpeg_destroy_compress @ 43 ; - jpeg_destroy_decompress @ 44 ; - jpeg_fdct_float @ 45 ; - jpeg_fdct_ifast @ 46 ; - jpeg_fdct_islow @ 47 ; - jpeg_fill_bit_buffer @ 48 ; - jpeg_finish_compress @ 49 ; - jpeg_finish_decompress @ 50 ; - jpeg_finish_output @ 51 ; - jpeg_free_large @ 52 ; - jpeg_free_small @ 53 ; - jpeg_gen_optimal_table @ 54 ; - jpeg_get_large @ 55 ; - jpeg_get_small @ 56 ; - jpeg_has_multiple_scans @ 57 ; - jpeg_huff_decode @ 58 ; - jpeg_idct_1x1 @ 59 ; - jpeg_idct_2x2 @ 60 ; - jpeg_idct_4x4 @ 61 ; - jpeg_idct_float @ 62 ; - jpeg_idct_ifast @ 63 ; - jpeg_idct_islow @ 64 ; - jpeg_input_complete @ 65 ; - jpeg_make_c_derived_tbl @ 66 ; - jpeg_make_d_derived_tbl @ 67 ; - jpeg_mem_available @ 68 ; - jpeg_mem_init @ 69 ; - jpeg_mem_term @ 70 ; - jpeg_new_colormap @ 71 ; - jpeg_open_backing_store @ 72 ; - jpeg_quality_scaling @ 73 ; - jpeg_read_coefficients @ 74 ; - jpeg_read_header @ 75 ; - jpeg_read_raw_data @ 76 ; - jpeg_read_scanlines @ 77 ; - jpeg_resync_to_restart @ 78 ; - jpeg_save_markers @ 79 ; - jpeg_set_colorspace @ 80 ; - jpeg_set_defaults @ 81 ; - jpeg_set_linear_quality @ 82 ; - jpeg_set_marker_processor @ 83 ; - jpeg_set_quality @ 84 ; - jpeg_simple_progression @ 85 ; - jpeg_start_compress @ 86 ; - jpeg_start_decompress @ 87 ; - jpeg_start_output @ 88 ; - jpeg_std_error @ 89 ; - jpeg_stdio_dest @ 90 ; - jpeg_stdio_src @ 91 ; - jpeg_suppress_tables @ 92 ; - jpeg_write_coefficients @ 93 ; - jpeg_write_m_byte @ 94 ; - jpeg_write_m_header @ 95 ; - jpeg_write_marker @ 96 ; - jpeg_write_raw_data @ 97 ; - jpeg_write_scanlines @ 98 ; - jpeg_write_tables @ 99 ; - jround_up @ 100 ; - jzero_far @ 101 ; - jpeg_skip_scanlines @ 102 ; - jpeg_crop_scanline @ 103 ; + jcopy_block_row @ 1 ; + jcopy_sample_rows @ 2 ; + jdiv_round_up @ 3 ; + jinit_1pass_quantizer @ 4 ; + jinit_2pass_quantizer @ 5 ; + jinit_c_coef_controller @ 6 ; + jinit_c_main_controller @ 7 ; + jinit_c_master_control @ 8 ; + jinit_c_prep_controller @ 9 ; + jinit_color_converter @ 10 ; + jinit_color_deconverter @ 11 ; + jinit_compress_master @ 12 ; + jinit_d_coef_controller @ 13 ; + jinit_d_main_controller @ 14 ; + jinit_d_post_controller @ 15 ; + jinit_downsampler @ 16 ; + jinit_forward_dct @ 17 ; + jinit_huff_decoder @ 18 ; + jinit_huff_encoder @ 19 ; + jinit_input_controller @ 20 ; + jinit_inverse_dct @ 21 ; + jinit_marker_reader @ 22 ; + jinit_marker_writer @ 23 ; + jinit_master_decompress @ 24 ; + jinit_memory_mgr @ 25 ; + jinit_merged_upsampler @ 26 ; + jinit_phuff_decoder @ 27 ; + jinit_phuff_encoder @ 28 ; + jinit_upsampler @ 29 ; + jpeg_CreateCompress @ 30 ; + jpeg_CreateDecompress @ 31 ; + jpeg_abort @ 32 ; + jpeg_abort_compress @ 33 ; + jpeg_abort_decompress @ 34 ; + jpeg_add_quant_table @ 35 ; + jpeg_alloc_huff_table @ 36 ; + jpeg_alloc_quant_table @ 37 ; + jpeg_calc_output_dimensions @ 38 ; + jpeg_consume_input @ 39 ; + jpeg_copy_critical_parameters @ 40 ; + jpeg_default_colorspace @ 41 ; + jpeg_destroy @ 42 ; + jpeg_destroy_compress @ 43 ; + jpeg_destroy_decompress @ 44 ; + jpeg_fdct_float @ 45 ; + jpeg_fdct_ifast @ 46 ; + jpeg_fdct_islow @ 47 ; + jpeg_fill_bit_buffer @ 48 ; + jpeg_finish_compress @ 49 ; + jpeg_finish_decompress @ 50 ; + jpeg_finish_output @ 51 ; + jpeg_free_large @ 52 ; + jpeg_free_small @ 53 ; + jpeg_gen_optimal_table @ 54 ; + jpeg_get_large @ 55 ; + jpeg_get_small @ 56 ; + jpeg_has_multiple_scans @ 57 ; + jpeg_huff_decode @ 58 ; + jpeg_idct_1x1 @ 59 ; + jpeg_idct_2x2 @ 60 ; + jpeg_idct_4x4 @ 61 ; + jpeg_idct_float @ 62 ; + jpeg_idct_ifast @ 63 ; + jpeg_idct_islow @ 64 ; + jpeg_input_complete @ 65 ; + jpeg_make_c_derived_tbl @ 66 ; + jpeg_make_d_derived_tbl @ 67 ; + jpeg_mem_available @ 68 ; + jpeg_mem_init @ 69 ; + jpeg_mem_term @ 70 ; + jpeg_new_colormap @ 71 ; + jpeg_open_backing_store @ 72 ; + jpeg_quality_scaling @ 73 ; + jpeg_read_coefficients @ 74 ; + jpeg_read_header @ 75 ; + jpeg_read_raw_data @ 76 ; + jpeg_read_scanlines @ 77 ; + jpeg_resync_to_restart @ 78 ; + jpeg_save_markers @ 79 ; + jpeg_set_colorspace @ 80 ; + jpeg_set_defaults @ 81 ; + jpeg_set_linear_quality @ 82 ; + jpeg_set_marker_processor @ 83 ; + jpeg_set_quality @ 84 ; + jpeg_simple_progression @ 85 ; + jpeg_start_compress @ 86 ; + jpeg_start_decompress @ 87 ; + jpeg_start_output @ 88 ; + jpeg_std_error @ 89 ; + jpeg_stdio_dest @ 90 ; + jpeg_stdio_src @ 91 ; + jpeg_suppress_tables @ 92 ; + jpeg_write_coefficients @ 93 ; + jpeg_write_m_byte @ 94 ; + jpeg_write_m_header @ 95 ; + jpeg_write_marker @ 96 ; + jpeg_write_raw_data @ 97 ; + jpeg_write_scanlines @ 98 ; + jpeg_write_tables @ 99 ; + jround_up @ 100 ; + jzero_far @ 101 ; + jpeg_skip_scanlines @ 102 ; + jpeg_crop_scanline @ 103 ; + jpeg_read_icc_profile @ 104 ; + jpeg_write_icc_profile @ 105 ; diff --git a/win/jpeg7-memsrcdst.def b/win/jpeg7-memsrcdst.def index 37a4777..a005aff 100644 --- a/win/jpeg7-memsrcdst.def +++ b/win/jpeg7-memsrcdst.def @@ -1,108 +1,110 @@ EXPORTS - jcopy_block_row @ 1 ; - jcopy_sample_rows @ 2 ; - jdiv_round_up @ 3 ; - jinit_1pass_quantizer @ 4 ; - jinit_2pass_quantizer @ 5 ; - jinit_c_coef_controller @ 6 ; - jinit_c_main_controller @ 7 ; - jinit_c_master_control @ 8 ; - jinit_c_prep_controller @ 9 ; - jinit_color_converter @ 10 ; - jinit_color_deconverter @ 11 ; - jinit_compress_master @ 12 ; - jinit_d_coef_controller @ 13 ; - jinit_d_main_controller @ 14 ; - jinit_d_post_controller @ 15 ; - jinit_downsampler @ 16 ; - jinit_forward_dct @ 17 ; - jinit_huff_decoder @ 18 ; - jinit_huff_encoder @ 19 ; - jinit_input_controller @ 20 ; - jinit_inverse_dct @ 21 ; - jinit_marker_reader @ 22 ; - jinit_marker_writer @ 23 ; - jinit_master_decompress @ 24 ; - jinit_memory_mgr @ 25 ; - jinit_merged_upsampler @ 26 ; - jinit_phuff_decoder @ 27 ; - jinit_phuff_encoder @ 28 ; - jinit_upsampler @ 29 ; - jpeg_CreateCompress @ 30 ; - jpeg_CreateDecompress @ 31 ; - jpeg_abort @ 32 ; - jpeg_abort_compress @ 33 ; - jpeg_abort_decompress @ 34 ; - jpeg_add_quant_table @ 35 ; - jpeg_alloc_huff_table @ 36 ; - jpeg_alloc_quant_table @ 37 ; - jpeg_calc_jpeg_dimensions @ 38 ; - jpeg_calc_output_dimensions @ 39 ; - jpeg_consume_input @ 40 ; - jpeg_copy_critical_parameters @ 41 ; - jpeg_default_colorspace @ 42 ; - jpeg_default_qtables @ 43 ; - jpeg_destroy @ 44 ; - jpeg_destroy_compress @ 45 ; - jpeg_destroy_decompress @ 46 ; - jpeg_fdct_float @ 47 ; - jpeg_fdct_ifast @ 48 ; - jpeg_fdct_islow @ 49 ; - jpeg_fill_bit_buffer @ 50 ; - jpeg_finish_compress @ 51 ; - jpeg_finish_decompress @ 52 ; - jpeg_finish_output @ 53 ; - jpeg_free_large @ 54 ; - jpeg_free_small @ 55 ; - jpeg_gen_optimal_table @ 56 ; - jpeg_get_large @ 57 ; - jpeg_get_small @ 58 ; - jpeg_has_multiple_scans @ 59 ; - jpeg_huff_decode @ 60 ; - jpeg_idct_1x1 @ 61 ; - jpeg_idct_2x2 @ 62 ; - jpeg_idct_4x4 @ 63 ; - jpeg_idct_float @ 64 ; - jpeg_idct_ifast @ 65 ; - jpeg_idct_islow @ 66 ; - jpeg_input_complete @ 67 ; - jpeg_make_c_derived_tbl @ 68 ; - jpeg_make_d_derived_tbl @ 69 ; - jpeg_mem_available @ 70 ; - jpeg_mem_init @ 71 ; - jpeg_mem_term @ 72 ; - jpeg_new_colormap @ 73 ; - jpeg_open_backing_store @ 74 ; - jpeg_quality_scaling @ 75 ; - jpeg_read_coefficients @ 76 ; - jpeg_read_header @ 77 ; - jpeg_read_raw_data @ 78 ; - jpeg_read_scanlines @ 79 ; - jpeg_resync_to_restart @ 80 ; - jpeg_save_markers @ 81 ; - jpeg_set_colorspace @ 82 ; - jpeg_set_defaults @ 83 ; - jpeg_set_linear_quality @ 84 ; - jpeg_set_marker_processor @ 85 ; - jpeg_set_quality @ 86 ; - jpeg_simple_progression @ 87 ; - jpeg_start_compress @ 88 ; - jpeg_start_decompress @ 89 ; - jpeg_start_output @ 90 ; - jpeg_std_error @ 91 ; - jpeg_stdio_dest @ 92 ; - jpeg_stdio_src @ 93 ; - jpeg_suppress_tables @ 94 ; - jpeg_write_coefficients @ 95 ; - jpeg_write_m_byte @ 96 ; - jpeg_write_m_header @ 97 ; - jpeg_write_marker @ 98 ; - jpeg_write_raw_data @ 99 ; - jpeg_write_scanlines @ 100 ; - jpeg_write_tables @ 101 ; - jround_up @ 102 ; - jzero_far @ 103 ; - jpeg_mem_dest @ 104 ; - jpeg_mem_src @ 105 ; - jpeg_skip_scanlines @ 106 ; - jpeg_crop_scanline @ 107 ; + jcopy_block_row @ 1 ; + jcopy_sample_rows @ 2 ; + jdiv_round_up @ 3 ; + jinit_1pass_quantizer @ 4 ; + jinit_2pass_quantizer @ 5 ; + jinit_c_coef_controller @ 6 ; + jinit_c_main_controller @ 7 ; + jinit_c_master_control @ 8 ; + jinit_c_prep_controller @ 9 ; + jinit_color_converter @ 10 ; + jinit_color_deconverter @ 11 ; + jinit_compress_master @ 12 ; + jinit_d_coef_controller @ 13 ; + jinit_d_main_controller @ 14 ; + jinit_d_post_controller @ 15 ; + jinit_downsampler @ 16 ; + jinit_forward_dct @ 17 ; + jinit_huff_decoder @ 18 ; + jinit_huff_encoder @ 19 ; + jinit_input_controller @ 20 ; + jinit_inverse_dct @ 21 ; + jinit_marker_reader @ 22 ; + jinit_marker_writer @ 23 ; + jinit_master_decompress @ 24 ; + jinit_memory_mgr @ 25 ; + jinit_merged_upsampler @ 26 ; + jinit_phuff_decoder @ 27 ; + jinit_phuff_encoder @ 28 ; + jinit_upsampler @ 29 ; + jpeg_CreateCompress @ 30 ; + jpeg_CreateDecompress @ 31 ; + jpeg_abort @ 32 ; + jpeg_abort_compress @ 33 ; + jpeg_abort_decompress @ 34 ; + jpeg_add_quant_table @ 35 ; + jpeg_alloc_huff_table @ 36 ; + jpeg_alloc_quant_table @ 37 ; + jpeg_calc_jpeg_dimensions @ 38 ; + jpeg_calc_output_dimensions @ 39 ; + jpeg_consume_input @ 40 ; + jpeg_copy_critical_parameters @ 41 ; + jpeg_default_colorspace @ 42 ; + jpeg_default_qtables @ 43 ; + jpeg_destroy @ 44 ; + jpeg_destroy_compress @ 45 ; + jpeg_destroy_decompress @ 46 ; + jpeg_fdct_float @ 47 ; + jpeg_fdct_ifast @ 48 ; + jpeg_fdct_islow @ 49 ; + jpeg_fill_bit_buffer @ 50 ; + jpeg_finish_compress @ 51 ; + jpeg_finish_decompress @ 52 ; + jpeg_finish_output @ 53 ; + jpeg_free_large @ 54 ; + jpeg_free_small @ 55 ; + jpeg_gen_optimal_table @ 56 ; + jpeg_get_large @ 57 ; + jpeg_get_small @ 58 ; + jpeg_has_multiple_scans @ 59 ; + jpeg_huff_decode @ 60 ; + jpeg_idct_1x1 @ 61 ; + jpeg_idct_2x2 @ 62 ; + jpeg_idct_4x4 @ 63 ; + jpeg_idct_float @ 64 ; + jpeg_idct_ifast @ 65 ; + jpeg_idct_islow @ 66 ; + jpeg_input_complete @ 67 ; + jpeg_make_c_derived_tbl @ 68 ; + jpeg_make_d_derived_tbl @ 69 ; + jpeg_mem_available @ 70 ; + jpeg_mem_init @ 71 ; + jpeg_mem_term @ 72 ; + jpeg_new_colormap @ 73 ; + jpeg_open_backing_store @ 74 ; + jpeg_quality_scaling @ 75 ; + jpeg_read_coefficients @ 76 ; + jpeg_read_header @ 77 ; + jpeg_read_raw_data @ 78 ; + jpeg_read_scanlines @ 79 ; + jpeg_resync_to_restart @ 80 ; + jpeg_save_markers @ 81 ; + jpeg_set_colorspace @ 82 ; + jpeg_set_defaults @ 83 ; + jpeg_set_linear_quality @ 84 ; + jpeg_set_marker_processor @ 85 ; + jpeg_set_quality @ 86 ; + jpeg_simple_progression @ 87 ; + jpeg_start_compress @ 88 ; + jpeg_start_decompress @ 89 ; + jpeg_start_output @ 90 ; + jpeg_std_error @ 91 ; + jpeg_stdio_dest @ 92 ; + jpeg_stdio_src @ 93 ; + jpeg_suppress_tables @ 94 ; + jpeg_write_coefficients @ 95 ; + jpeg_write_m_byte @ 96 ; + jpeg_write_m_header @ 97 ; + jpeg_write_marker @ 98 ; + jpeg_write_raw_data @ 99 ; + jpeg_write_scanlines @ 100 ; + jpeg_write_tables @ 101 ; + jround_up @ 102 ; + jzero_far @ 103 ; + jpeg_mem_dest @ 104 ; + jpeg_mem_src @ 105 ; + jpeg_skip_scanlines @ 106 ; + jpeg_crop_scanline @ 107 ; + jpeg_read_icc_profile @ 108 ; + jpeg_write_icc_profile @ 109 ; diff --git a/win/jpeg7.def b/win/jpeg7.def index 92463c5..49f4c02 100644 --- a/win/jpeg7.def +++ b/win/jpeg7.def @@ -1,106 +1,108 @@ EXPORTS - jcopy_block_row @ 1 ; - jcopy_sample_rows @ 2 ; - jdiv_round_up @ 3 ; - jinit_1pass_quantizer @ 4 ; - jinit_2pass_quantizer @ 5 ; - jinit_c_coef_controller @ 6 ; - jinit_c_main_controller @ 7 ; - jinit_c_master_control @ 8 ; - jinit_c_prep_controller @ 9 ; - jinit_color_converter @ 10 ; - jinit_color_deconverter @ 11 ; - jinit_compress_master @ 12 ; - jinit_d_coef_controller @ 13 ; - jinit_d_main_controller @ 14 ; - jinit_d_post_controller @ 15 ; - jinit_downsampler @ 16 ; - jinit_forward_dct @ 17 ; - jinit_huff_decoder @ 18 ; - jinit_huff_encoder @ 19 ; - jinit_input_controller @ 20 ; - jinit_inverse_dct @ 21 ; - jinit_marker_reader @ 22 ; - jinit_marker_writer @ 23 ; - jinit_master_decompress @ 24 ; - jinit_memory_mgr @ 25 ; - jinit_merged_upsampler @ 26 ; - jinit_phuff_decoder @ 27 ; - jinit_phuff_encoder @ 28 ; - jinit_upsampler @ 29 ; - jpeg_CreateCompress @ 30 ; - jpeg_CreateDecompress @ 31 ; - jpeg_abort @ 32 ; - jpeg_abort_compress @ 33 ; - jpeg_abort_decompress @ 34 ; - jpeg_add_quant_table @ 35 ; - jpeg_alloc_huff_table @ 36 ; - jpeg_alloc_quant_table @ 37 ; - jpeg_calc_jpeg_dimensions @ 38 ; - jpeg_calc_output_dimensions @ 39 ; - jpeg_consume_input @ 40 ; - jpeg_copy_critical_parameters @ 41 ; - jpeg_default_colorspace @ 42 ; - jpeg_default_qtables @ 43 ; - jpeg_destroy @ 44 ; - jpeg_destroy_compress @ 45 ; - jpeg_destroy_decompress @ 46 ; - jpeg_fdct_float @ 47 ; - jpeg_fdct_ifast @ 48 ; - jpeg_fdct_islow @ 49 ; - jpeg_fill_bit_buffer @ 50 ; - jpeg_finish_compress @ 51 ; - jpeg_finish_decompress @ 52 ; - jpeg_finish_output @ 53 ; - jpeg_free_large @ 54 ; - jpeg_free_small @ 55 ; - jpeg_gen_optimal_table @ 56 ; - jpeg_get_large @ 57 ; - jpeg_get_small @ 58 ; - jpeg_has_multiple_scans @ 59 ; - jpeg_huff_decode @ 60 ; - jpeg_idct_1x1 @ 61 ; - jpeg_idct_2x2 @ 62 ; - jpeg_idct_4x4 @ 63 ; - jpeg_idct_float @ 64 ; - jpeg_idct_ifast @ 65 ; - jpeg_idct_islow @ 66 ; - jpeg_input_complete @ 67 ; - jpeg_make_c_derived_tbl @ 68 ; - jpeg_make_d_derived_tbl @ 69 ; - jpeg_mem_available @ 70 ; - jpeg_mem_init @ 71 ; - jpeg_mem_term @ 72 ; - jpeg_new_colormap @ 73 ; - jpeg_open_backing_store @ 74 ; - jpeg_quality_scaling @ 75 ; - jpeg_read_coefficients @ 76 ; - jpeg_read_header @ 77 ; - jpeg_read_raw_data @ 78 ; - jpeg_read_scanlines @ 79 ; - jpeg_resync_to_restart @ 80 ; - jpeg_save_markers @ 81 ; - jpeg_set_colorspace @ 82 ; - jpeg_set_defaults @ 83 ; - jpeg_set_linear_quality @ 84 ; - jpeg_set_marker_processor @ 85 ; - jpeg_set_quality @ 86 ; - jpeg_simple_progression @ 87 ; - jpeg_start_compress @ 88 ; - jpeg_start_decompress @ 89 ; - jpeg_start_output @ 90 ; - jpeg_std_error @ 91 ; - jpeg_stdio_dest @ 92 ; - jpeg_stdio_src @ 93 ; - jpeg_suppress_tables @ 94 ; - jpeg_write_coefficients @ 95 ; - jpeg_write_m_byte @ 96 ; - jpeg_write_m_header @ 97 ; - jpeg_write_marker @ 98 ; - jpeg_write_raw_data @ 99 ; - jpeg_write_scanlines @ 100 ; - jpeg_write_tables @ 101 ; - jround_up @ 102 ; - jzero_far @ 103 ; - jpeg_skip_scanlines @ 104 ; - jpeg_crop_scanline @ 105 ; + jcopy_block_row @ 1 ; + jcopy_sample_rows @ 2 ; + jdiv_round_up @ 3 ; + jinit_1pass_quantizer @ 4 ; + jinit_2pass_quantizer @ 5 ; + jinit_c_coef_controller @ 6 ; + jinit_c_main_controller @ 7 ; + jinit_c_master_control @ 8 ; + jinit_c_prep_controller @ 9 ; + jinit_color_converter @ 10 ; + jinit_color_deconverter @ 11 ; + jinit_compress_master @ 12 ; + jinit_d_coef_controller @ 13 ; + jinit_d_main_controller @ 14 ; + jinit_d_post_controller @ 15 ; + jinit_downsampler @ 16 ; + jinit_forward_dct @ 17 ; + jinit_huff_decoder @ 18 ; + jinit_huff_encoder @ 19 ; + jinit_input_controller @ 20 ; + jinit_inverse_dct @ 21 ; + jinit_marker_reader @ 22 ; + jinit_marker_writer @ 23 ; + jinit_master_decompress @ 24 ; + jinit_memory_mgr @ 25 ; + jinit_merged_upsampler @ 26 ; + jinit_phuff_decoder @ 27 ; + jinit_phuff_encoder @ 28 ; + jinit_upsampler @ 29 ; + jpeg_CreateCompress @ 30 ; + jpeg_CreateDecompress @ 31 ; + jpeg_abort @ 32 ; + jpeg_abort_compress @ 33 ; + jpeg_abort_decompress @ 34 ; + jpeg_add_quant_table @ 35 ; + jpeg_alloc_huff_table @ 36 ; + jpeg_alloc_quant_table @ 37 ; + jpeg_calc_jpeg_dimensions @ 38 ; + jpeg_calc_output_dimensions @ 39 ; + jpeg_consume_input @ 40 ; + jpeg_copy_critical_parameters @ 41 ; + jpeg_default_colorspace @ 42 ; + jpeg_default_qtables @ 43 ; + jpeg_destroy @ 44 ; + jpeg_destroy_compress @ 45 ; + jpeg_destroy_decompress @ 46 ; + jpeg_fdct_float @ 47 ; + jpeg_fdct_ifast @ 48 ; + jpeg_fdct_islow @ 49 ; + jpeg_fill_bit_buffer @ 50 ; + jpeg_finish_compress @ 51 ; + jpeg_finish_decompress @ 52 ; + jpeg_finish_output @ 53 ; + jpeg_free_large @ 54 ; + jpeg_free_small @ 55 ; + jpeg_gen_optimal_table @ 56 ; + jpeg_get_large @ 57 ; + jpeg_get_small @ 58 ; + jpeg_has_multiple_scans @ 59 ; + jpeg_huff_decode @ 60 ; + jpeg_idct_1x1 @ 61 ; + jpeg_idct_2x2 @ 62 ; + jpeg_idct_4x4 @ 63 ; + jpeg_idct_float @ 64 ; + jpeg_idct_ifast @ 65 ; + jpeg_idct_islow @ 66 ; + jpeg_input_complete @ 67 ; + jpeg_make_c_derived_tbl @ 68 ; + jpeg_make_d_derived_tbl @ 69 ; + jpeg_mem_available @ 70 ; + jpeg_mem_init @ 71 ; + jpeg_mem_term @ 72 ; + jpeg_new_colormap @ 73 ; + jpeg_open_backing_store @ 74 ; + jpeg_quality_scaling @ 75 ; + jpeg_read_coefficients @ 76 ; + jpeg_read_header @ 77 ; + jpeg_read_raw_data @ 78 ; + jpeg_read_scanlines @ 79 ; + jpeg_resync_to_restart @ 80 ; + jpeg_save_markers @ 81 ; + jpeg_set_colorspace @ 82 ; + jpeg_set_defaults @ 83 ; + jpeg_set_linear_quality @ 84 ; + jpeg_set_marker_processor @ 85 ; + jpeg_set_quality @ 86 ; + jpeg_simple_progression @ 87 ; + jpeg_start_compress @ 88 ; + jpeg_start_decompress @ 89 ; + jpeg_start_output @ 90 ; + jpeg_std_error @ 91 ; + jpeg_stdio_dest @ 92 ; + jpeg_stdio_src @ 93 ; + jpeg_suppress_tables @ 94 ; + jpeg_write_coefficients @ 95 ; + jpeg_write_m_byte @ 96 ; + jpeg_write_m_header @ 97 ; + jpeg_write_marker @ 98 ; + jpeg_write_raw_data @ 99 ; + jpeg_write_scanlines @ 100 ; + jpeg_write_tables @ 101 ; + jround_up @ 102 ; + jzero_far @ 103 ; + jpeg_skip_scanlines @ 104 ; + jpeg_crop_scanline @ 105 ; + jpeg_read_icc_profile @ 106 ; + jpeg_write_icc_profile @ 107 ; diff --git a/win/jpeg8.def b/win/jpeg8.def index 19246ac..0a53125 100644 --- a/win/jpeg8.def +++ b/win/jpeg8.def @@ -1,109 +1,111 @@ EXPORTS - jcopy_block_row @ 1 ; - jcopy_sample_rows @ 2 ; - jdiv_round_up @ 3 ; - jinit_1pass_quantizer @ 4 ; - jinit_2pass_quantizer @ 5 ; - jinit_c_coef_controller @ 6 ; - jinit_c_main_controller @ 7 ; - jinit_c_master_control @ 8 ; - jinit_c_prep_controller @ 9 ; - jinit_color_converter @ 10 ; - jinit_color_deconverter @ 11 ; - jinit_compress_master @ 12 ; - jinit_d_coef_controller @ 13 ; - jinit_d_main_controller @ 14 ; - jinit_d_post_controller @ 15 ; - jinit_downsampler @ 16 ; - jinit_forward_dct @ 17 ; - jinit_huff_decoder @ 18 ; - jinit_huff_encoder @ 19 ; - jinit_input_controller @ 20 ; - jinit_inverse_dct @ 21 ; - jinit_marker_reader @ 22 ; - jinit_marker_writer @ 23 ; - jinit_master_decompress @ 24 ; - jinit_memory_mgr @ 25 ; - jinit_merged_upsampler @ 26 ; - jinit_phuff_decoder @ 27 ; - jinit_phuff_encoder @ 28 ; - jinit_upsampler @ 29 ; - jpeg_CreateCompress @ 30 ; - jpeg_CreateDecompress @ 31 ; - jpeg_abort @ 32 ; - jpeg_abort_compress @ 33 ; - jpeg_abort_decompress @ 34 ; - jpeg_add_quant_table @ 35 ; - jpeg_alloc_huff_table @ 36 ; - jpeg_alloc_quant_table @ 37 ; - jpeg_calc_jpeg_dimensions @ 38 ; - jpeg_calc_output_dimensions @ 39 ; - jpeg_consume_input @ 40 ; - jpeg_copy_critical_parameters @ 41 ; - jpeg_core_output_dimensions @ 42 ; - jpeg_default_colorspace @ 43 ; - jpeg_default_qtables @ 44 ; - jpeg_destroy @ 45 ; - jpeg_destroy_compress @ 46 ; - jpeg_destroy_decompress @ 47 ; - jpeg_fdct_float @ 48 ; - jpeg_fdct_ifast @ 49 ; - jpeg_fdct_islow @ 50 ; - jpeg_fill_bit_buffer @ 51 ; - jpeg_finish_compress @ 52 ; - jpeg_finish_decompress @ 53 ; - jpeg_finish_output @ 54 ; - jpeg_free_large @ 55 ; - jpeg_free_small @ 56 ; - jpeg_gen_optimal_table @ 57 ; - jpeg_get_large @ 58 ; - jpeg_get_small @ 59 ; - jpeg_has_multiple_scans @ 60 ; - jpeg_huff_decode @ 61 ; - jpeg_idct_1x1 @ 62 ; - jpeg_idct_2x2 @ 63 ; - jpeg_idct_4x4 @ 64 ; - jpeg_idct_float @ 65 ; - jpeg_idct_ifast @ 66 ; - jpeg_idct_islow @ 67 ; - jpeg_input_complete @ 68 ; - jpeg_make_c_derived_tbl @ 69 ; - jpeg_make_d_derived_tbl @ 70 ; - jpeg_mem_available @ 71 ; - jpeg_mem_dest @ 72 ; - jpeg_mem_init @ 73 ; - jpeg_mem_src @ 74 ; - jpeg_mem_term @ 75 ; - jpeg_new_colormap @ 76 ; - jpeg_open_backing_store @ 77 ; - jpeg_quality_scaling @ 78 ; - jpeg_read_coefficients @ 79 ; - jpeg_read_header @ 80 ; - jpeg_read_raw_data @ 81 ; - jpeg_read_scanlines @ 82 ; - jpeg_resync_to_restart @ 83 ; - jpeg_save_markers @ 84 ; - jpeg_set_colorspace @ 85 ; - jpeg_set_defaults @ 86 ; - jpeg_set_linear_quality @ 87 ; - jpeg_set_marker_processor @ 88 ; - jpeg_set_quality @ 89 ; - jpeg_simple_progression @ 90 ; - jpeg_start_compress @ 91 ; - jpeg_start_decompress @ 92 ; - jpeg_start_output @ 93 ; - jpeg_std_error @ 94 ; - jpeg_stdio_dest @ 95 ; - jpeg_stdio_src @ 96 ; - jpeg_suppress_tables @ 97 ; - jpeg_write_coefficients @ 98 ; - jpeg_write_m_byte @ 99 ; - jpeg_write_m_header @ 100 ; - jpeg_write_marker @ 101 ; - jpeg_write_raw_data @ 102 ; - jpeg_write_scanlines @ 103 ; - jpeg_write_tables @ 104 ; - jround_up @ 105 ; - jzero_far @ 106 ; - jpeg_skip_scanlines @ 107 ; - jpeg_crop_scanline @ 108 ; + jcopy_block_row @ 1 ; + jcopy_sample_rows @ 2 ; + jdiv_round_up @ 3 ; + jinit_1pass_quantizer @ 4 ; + jinit_2pass_quantizer @ 5 ; + jinit_c_coef_controller @ 6 ; + jinit_c_main_controller @ 7 ; + jinit_c_master_control @ 8 ; + jinit_c_prep_controller @ 9 ; + jinit_color_converter @ 10 ; + jinit_color_deconverter @ 11 ; + jinit_compress_master @ 12 ; + jinit_d_coef_controller @ 13 ; + jinit_d_main_controller @ 14 ; + jinit_d_post_controller @ 15 ; + jinit_downsampler @ 16 ; + jinit_forward_dct @ 17 ; + jinit_huff_decoder @ 18 ; + jinit_huff_encoder @ 19 ; + jinit_input_controller @ 20 ; + jinit_inverse_dct @ 21 ; + jinit_marker_reader @ 22 ; + jinit_marker_writer @ 23 ; + jinit_master_decompress @ 24 ; + jinit_memory_mgr @ 25 ; + jinit_merged_upsampler @ 26 ; + jinit_phuff_decoder @ 27 ; + jinit_phuff_encoder @ 28 ; + jinit_upsampler @ 29 ; + jpeg_CreateCompress @ 30 ; + jpeg_CreateDecompress @ 31 ; + jpeg_abort @ 32 ; + jpeg_abort_compress @ 33 ; + jpeg_abort_decompress @ 34 ; + jpeg_add_quant_table @ 35 ; + jpeg_alloc_huff_table @ 36 ; + jpeg_alloc_quant_table @ 37 ; + jpeg_calc_jpeg_dimensions @ 38 ; + jpeg_calc_output_dimensions @ 39 ; + jpeg_consume_input @ 40 ; + jpeg_copy_critical_parameters @ 41 ; + jpeg_core_output_dimensions @ 42 ; + jpeg_default_colorspace @ 43 ; + jpeg_default_qtables @ 44 ; + jpeg_destroy @ 45 ; + jpeg_destroy_compress @ 46 ; + jpeg_destroy_decompress @ 47 ; + jpeg_fdct_float @ 48 ; + jpeg_fdct_ifast @ 49 ; + jpeg_fdct_islow @ 50 ; + jpeg_fill_bit_buffer @ 51 ; + jpeg_finish_compress @ 52 ; + jpeg_finish_decompress @ 53 ; + jpeg_finish_output @ 54 ; + jpeg_free_large @ 55 ; + jpeg_free_small @ 56 ; + jpeg_gen_optimal_table @ 57 ; + jpeg_get_large @ 58 ; + jpeg_get_small @ 59 ; + jpeg_has_multiple_scans @ 60 ; + jpeg_huff_decode @ 61 ; + jpeg_idct_1x1 @ 62 ; + jpeg_idct_2x2 @ 63 ; + jpeg_idct_4x4 @ 64 ; + jpeg_idct_float @ 65 ; + jpeg_idct_ifast @ 66 ; + jpeg_idct_islow @ 67 ; + jpeg_input_complete @ 68 ; + jpeg_make_c_derived_tbl @ 69 ; + jpeg_make_d_derived_tbl @ 70 ; + jpeg_mem_available @ 71 ; + jpeg_mem_dest @ 72 ; + jpeg_mem_init @ 73 ; + jpeg_mem_src @ 74 ; + jpeg_mem_term @ 75 ; + jpeg_new_colormap @ 76 ; + jpeg_open_backing_store @ 77 ; + jpeg_quality_scaling @ 78 ; + jpeg_read_coefficients @ 79 ; + jpeg_read_header @ 80 ; + jpeg_read_raw_data @ 81 ; + jpeg_read_scanlines @ 82 ; + jpeg_resync_to_restart @ 83 ; + jpeg_save_markers @ 84 ; + jpeg_set_colorspace @ 85 ; + jpeg_set_defaults @ 86 ; + jpeg_set_linear_quality @ 87 ; + jpeg_set_marker_processor @ 88 ; + jpeg_set_quality @ 89 ; + jpeg_simple_progression @ 90 ; + jpeg_start_compress @ 91 ; + jpeg_start_decompress @ 92 ; + jpeg_start_output @ 93 ; + jpeg_std_error @ 94 ; + jpeg_stdio_dest @ 95 ; + jpeg_stdio_src @ 96 ; + jpeg_suppress_tables @ 97 ; + jpeg_write_coefficients @ 98 ; + jpeg_write_m_byte @ 99 ; + jpeg_write_m_header @ 100 ; + jpeg_write_marker @ 101 ; + jpeg_write_raw_data @ 102 ; + jpeg_write_scanlines @ 103 ; + jpeg_write_tables @ 104 ; + jround_up @ 105 ; + jzero_far @ 106 ; + jpeg_skip_scanlines @ 107 ; + jpeg_crop_scanline @ 108 ; + jpeg_read_icc_profile @ 109 ; + jpeg_write_icc_profile @ 110 ; diff --git a/wizard.txt b/wizard.txt index ede721e..c57fe38 100644 --- a/wizard.txt +++ b/wizard.txt @@ -43,7 +43,8 @@ appear between numbers. Also, comments can be included: a comment starts with '#' and extends to the end of the line. Here is an example file that duplicates the default quantization tables: - # Quantization tables given in JPEG spec, section K.1 + # Quantization tables given in Annex K (Clause K.1) of + # Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994. # This is table 0 (the luminance table): 16 11 10 16 24 40 51 61 diff --git a/wrbmp.c b/wrbmp.c index 728bbad..38a64e8 100644 --- a/wrbmp.c +++ b/wrbmp.c @@ -21,6 +21,7 @@ * This code contributed by James Arthur Boucher. */ +#include "cmyk.h" #include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ #include "jconfigint.h" @@ -56,15 +57,26 @@ typedef struct { JDIMENSION row_width; /* physical width of one row in the BMP file */ int pad_bytes; /* number of padding bytes needed per row */ JDIMENSION cur_output_row; /* next row# to write to virtual array */ + + boolean use_inversion_array; /* TRUE = buffer the whole image, which is + stored to disk in bottom-up order, and + receive rows from the calling program in + top-down order + + FALSE = the calling program will maintain + its own image buffer and write the rows in + bottom-up order */ + + JSAMPLE *iobuffer; /* I/O buffer (used to buffer a single row to + disk if use_inversion_array == FALSE) */ } bmp_dest_struct; typedef bmp_dest_struct *bmp_dest_ptr; /* Forward declarations */ -LOCAL(void) write_colormap - (j_decompress_ptr cinfo, bmp_dest_ptr dest, int map_colors, - int map_entry_size); +LOCAL(void) write_colormap(j_decompress_ptr cinfo, bmp_dest_ptr dest, + int map_colors, int map_entry_size); static INLINE boolean is_big_endian(void) @@ -82,29 +94,36 @@ static INLINE boolean is_big_endian(void) */ METHODDEF(void) -put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, - JDIMENSION rows_supplied) +put_pixel_rows(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) /* This version is for writing 24-bit pixels */ { - bmp_dest_ptr dest = (bmp_dest_ptr) dinfo; + bmp_dest_ptr dest = (bmp_dest_ptr)dinfo; JSAMPARRAY image_ptr; register JSAMPROW inptr, outptr; register JDIMENSION col; int pad; - /* Access next row in virtual array */ - image_ptr = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, dest->whole_image, - dest->cur_output_row, (JDIMENSION) 1, TRUE); - dest->cur_output_row++; + if (dest->use_inversion_array) { + /* Access next row in virtual array */ + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr)cinfo, dest->whole_image, + dest->cur_output_row, (JDIMENSION)1, TRUE); + dest->cur_output_row++; + outptr = image_ptr[0]; + } else { + outptr = dest->iobuffer; + } /* Transfer data. Note destination values must be in BGR order * (even though Microsoft's own documents say the opposite). */ inptr = dest->pub.buffer[0]; - outptr = image_ptr[0]; - if (cinfo->out_color_space == JCS_RGB565) { + if (cinfo->out_color_space == JCS_EXT_BGR) { + MEMCOPY(outptr, inptr, dest->row_width); + outptr += cinfo->output_width * 3; + } else if (cinfo->out_color_space == JCS_RGB565) { boolean big_endian = is_big_endian(); unsigned short *inptr2 = (unsigned short *)inptr; for (col = cinfo->output_width; col > 0; col--) { @@ -120,61 +139,70 @@ put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, outptr += 3; inptr2++; } - } else { + } else if (cinfo->out_color_space == JCS_CMYK) { for (col = cinfo->output_width; col > 0; col--) { - outptr[2] = *inptr++; /* can omit GETJSAMPLE() safely */ - outptr[1] = *inptr++; - outptr[0] = *inptr++; + /* can omit GETJSAMPLE() safely */ + JSAMPLE c = *inptr++, m = *inptr++, y = *inptr++, k = *inptr++; + cmyk_to_rgb(c, m, y, k, outptr + 2, outptr + 1, outptr); outptr += 3; } + } else { + register int rindex = rgb_red[cinfo->out_color_space]; + register int gindex = rgb_green[cinfo->out_color_space]; + register int bindex = rgb_blue[cinfo->out_color_space]; + register int ps = rgb_pixelsize[cinfo->out_color_space]; + + for (col = cinfo->output_width; col > 0; col--) { + /* can omit GETJSAMPLE() safely */ + outptr[0] = inptr[bindex]; + outptr[1] = inptr[gindex]; + outptr[2] = inptr[rindex]; + outptr += 3; inptr += ps; + } } /* Zero out the pad bytes. */ pad = dest->pad_bytes; while (--pad >= 0) *outptr++ = 0; + + if (!dest->use_inversion_array) + (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->row_width); } METHODDEF(void) -put_gray_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, - JDIMENSION rows_supplied) +put_gray_rows(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) /* This version is for grayscale OR quantized color output */ { - bmp_dest_ptr dest = (bmp_dest_ptr) dinfo; + bmp_dest_ptr dest = (bmp_dest_ptr)dinfo; JSAMPARRAY image_ptr; register JSAMPROW inptr, outptr; - register JDIMENSION col; int pad; - /* Access next row in virtual array */ - image_ptr = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, dest->whole_image, - dest->cur_output_row, (JDIMENSION) 1, TRUE); - dest->cur_output_row++; + if (dest->use_inversion_array) { + /* Access next row in virtual array */ + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr)cinfo, dest->whole_image, + dest->cur_output_row, (JDIMENSION)1, TRUE); + dest->cur_output_row++; + outptr = image_ptr[0]; + } else { + outptr = dest->iobuffer; + } /* Transfer data. */ inptr = dest->pub.buffer[0]; - outptr = image_ptr[0]; - for (col = cinfo->output_width; col > 0; col--) { - *outptr++ = *inptr++; /* can omit GETJSAMPLE() safely */ - } + MEMCOPY(outptr, inptr, cinfo->output_width); + outptr += cinfo->output_width; /* Zero out the pad bytes. */ pad = dest->pad_bytes; while (--pad >= 0) *outptr++ = 0; -} - -/* - * Startup: normally writes the file header. - * In this module we may as well postpone everything until finish_output. - */ - -METHODDEF(void) -start_output_bmp (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) -{ - /* no work here */ + if (!dest->use_inversion_array) + (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->row_width); } @@ -187,24 +215,26 @@ start_output_bmp (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) */ LOCAL(void) -write_bmp_header (j_decompress_ptr cinfo, bmp_dest_ptr dest) +write_bmp_header(j_decompress_ptr cinfo, bmp_dest_ptr dest) /* Write a Windows-style BMP file header, including colormap if needed */ { char bmpfileheader[14]; char bmpinfoheader[40]; -#define PUT_2B(array,offset,value) \ - (array[offset] = (char) ((value) & 0xFF), \ - array[offset+1] = (char) (((value) >> 8) & 0xFF)) -#define PUT_4B(array,offset,value) \ - (array[offset] = (char) ((value) & 0xFF), \ - array[offset+1] = (char) (((value) >> 8) & 0xFF), \ - array[offset+2] = (char) (((value) >> 16) & 0xFF), \ - array[offset+3] = (char) (((value) >> 24) & 0xFF)) + +#define PUT_2B(array, offset, value) \ + (array[offset] = (char)((value) & 0xFF), \ + array[offset + 1] = (char)(((value) >> 8) & 0xFF)) +#define PUT_4B(array, offset, value) \ + (array[offset] = (char)((value) & 0xFF), \ + array[offset + 1] = (char)(((value) >> 8) & 0xFF), \ + array[offset + 2] = (char)(((value) >> 16) & 0xFF), \ + array[offset + 3] = (char)(((value) >> 24) & 0xFF)) + long headersize, bfSize; int bits_per_pixel, cmap_entries; /* Compute colormap size and total file size */ - if (cinfo->out_color_space == JCS_RGB) { + if (IsExtRGB(cinfo->out_color_space)) { if (cinfo->quantize_colors) { /* Colormapped RGB */ bits_per_pixel = 8; @@ -214,7 +244,8 @@ write_bmp_header (j_decompress_ptr cinfo, bmp_dest_ptr dest) bits_per_pixel = 24; cmap_entries = 0; } - } else if (cinfo->out_color_space == JCS_RGB565) { + } else if (cinfo->out_color_space == JCS_RGB565 || + cinfo->out_color_space == JCS_CMYK) { bits_per_pixel = 24; cmap_entries = 0; } else { @@ -224,7 +255,7 @@ write_bmp_header (j_decompress_ptr cinfo, bmp_dest_ptr dest) } /* File size */ headersize = 14 + 40 + cmap_entries * 4; /* Header and colormap */ - bfSize = headersize + (long) dest->row_width * (long) cinfo->output_height; + bfSize = headersize + (long)dest->row_width * (long)cinfo->output_height; /* Set unused fields of header to 0 */ MEMZERO(bmpfileheader, sizeof(bmpfileheader)); @@ -246,15 +277,15 @@ write_bmp_header (j_decompress_ptr cinfo, bmp_dest_ptr dest) /* we leave biCompression = 0, for none */ /* we leave biSizeImage = 0; this is correct for uncompressed data */ if (cinfo->density_unit == 2) { /* if have density in dots/cm, then */ - PUT_4B(bmpinfoheader, 24, (long) (cinfo->X_density*100)); /* XPels/M */ - PUT_4B(bmpinfoheader, 28, (long) (cinfo->Y_density*100)); /* XPels/M */ + PUT_4B(bmpinfoheader, 24, (long)(cinfo->X_density * 100)); /* XPels/M */ + PUT_4B(bmpinfoheader, 28, (long)(cinfo->Y_density * 100)); /* XPels/M */ } PUT_2B(bmpinfoheader, 32, cmap_entries); /* biClrUsed */ /* we leave biClrImportant = 0 */ - if (JFWRITE(dest->pub.output_file, bmpfileheader, 14) != (size_t) 14) + if (JFWRITE(dest->pub.output_file, bmpfileheader, 14) != (size_t)14) ERREXIT(cinfo, JERR_FILE_WRITE); - if (JFWRITE(dest->pub.output_file, bmpinfoheader, 40) != (size_t) 40) + if (JFWRITE(dest->pub.output_file, bmpinfoheader, 40) != (size_t)40) ERREXIT(cinfo, JERR_FILE_WRITE); if (cmap_entries > 0) @@ -263,7 +294,7 @@ write_bmp_header (j_decompress_ptr cinfo, bmp_dest_ptr dest) LOCAL(void) -write_os2_header (j_decompress_ptr cinfo, bmp_dest_ptr dest) +write_os2_header(j_decompress_ptr cinfo, bmp_dest_ptr dest) /* Write an OS2-style BMP file header, including colormap if needed */ { char bmpfileheader[14]; @@ -272,7 +303,9 @@ write_os2_header (j_decompress_ptr cinfo, bmp_dest_ptr dest) int bits_per_pixel, cmap_entries; /* Compute colormap size and total file size */ - if (cinfo->out_color_space == JCS_RGB) { + if (cinfo->out_color_space == JCS_RGB || + (cinfo->out_color_space >= JCS_EXT_RGB && + cinfo->out_color_space <= JCS_EXT_ARGB)) { if (cinfo->quantize_colors) { /* Colormapped RGB */ bits_per_pixel = 8; @@ -282,7 +315,8 @@ write_os2_header (j_decompress_ptr cinfo, bmp_dest_ptr dest) bits_per_pixel = 24; cmap_entries = 0; } - } else if (cinfo->out_color_space == JCS_RGB565) { + } else if (cinfo->out_color_space == JCS_RGB565 || + cinfo->out_color_space == JCS_CMYK) { bits_per_pixel = 24; cmap_entries = 0; } else { @@ -292,7 +326,7 @@ write_os2_header (j_decompress_ptr cinfo, bmp_dest_ptr dest) } /* File size */ headersize = 14 + 12 + cmap_entries * 3; /* Header and colormap */ - bfSize = headersize + (long) dest->row_width * (long) cinfo->output_height; + bfSize = headersize + (long)dest->row_width * (long)cinfo->output_height; /* Set unused fields of header to 0 */ MEMZERO(bmpfileheader, sizeof(bmpfileheader)); @@ -312,9 +346,9 @@ write_os2_header (j_decompress_ptr cinfo, bmp_dest_ptr dest) PUT_2B(bmpcoreheader, 8, 1); /* bcPlanes - must be 1 */ PUT_2B(bmpcoreheader, 10, bits_per_pixel); /* bcBitCount */ - if (JFWRITE(dest->pub.output_file, bmpfileheader, 14) != (size_t) 14) + if (JFWRITE(dest->pub.output_file, bmpfileheader, 14) != (size_t)14) ERREXIT(cinfo, JERR_FILE_WRITE); - if (JFWRITE(dest->pub.output_file, bmpcoreheader, 12) != (size_t) 12) + if (JFWRITE(dest->pub.output_file, bmpcoreheader, 12) != (size_t)12) ERREXIT(cinfo, JERR_FILE_WRITE); if (cmap_entries > 0) @@ -328,8 +362,8 @@ write_os2_header (j_decompress_ptr cinfo, bmp_dest_ptr dest) */ LOCAL(void) -write_colormap (j_decompress_ptr cinfo, bmp_dest_ptr dest, - int map_colors, int map_entry_size) +write_colormap(j_decompress_ptr cinfo, bmp_dest_ptr dest, int map_colors, + int map_entry_size) { JSAMPARRAY colormap = cinfo->colormap; int num_colors = cinfo->actual_number_of_colors; @@ -379,40 +413,62 @@ write_colormap (j_decompress_ptr cinfo, bmp_dest_ptr dest, } +/* + * Startup: write the file header unless the inversion array is being used. + */ + METHODDEF(void) -finish_output_bmp (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +start_output_bmp(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) { - bmp_dest_ptr dest = (bmp_dest_ptr) dinfo; + bmp_dest_ptr dest = (bmp_dest_ptr)dinfo; + + if (!dest->use_inversion_array) { + /* Write the header and colormap */ + if (dest->is_os2) + write_os2_header(cinfo, dest); + else + write_bmp_header(cinfo, dest); + } +} + + +METHODDEF(void) +finish_output_bmp(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +{ + bmp_dest_ptr dest = (bmp_dest_ptr)dinfo; register FILE *outfile = dest->pub.output_file; JSAMPARRAY image_ptr; register JSAMPROW data_ptr; JDIMENSION row; register JDIMENSION col; - cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; - - /* Write the header and colormap */ - if (dest->is_os2) - write_os2_header(cinfo, dest); - else - write_bmp_header(cinfo, dest); - - /* Write the file body from our virtual array */ - for (row = cinfo->output_height; row > 0; row--) { - if (progress != NULL) { - progress->pub.pass_counter = (long) (cinfo->output_height - row); - progress->pub.pass_limit = (long) cinfo->output_height; - (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); - } - image_ptr = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, dest->whole_image, row-1, (JDIMENSION) 1, FALSE); - data_ptr = image_ptr[0]; - for (col = dest->row_width; col > 0; col--) { - putc(GETJSAMPLE(*data_ptr), outfile); - data_ptr++; + cd_progress_ptr progress = (cd_progress_ptr)cinfo->progress; + + if (dest->use_inversion_array) { + /* Write the header and colormap */ + if (dest->is_os2) + write_os2_header(cinfo, dest); + else + write_bmp_header(cinfo, dest); + + /* Write the file body from our virtual array */ + for (row = cinfo->output_height; row > 0; row--) { + if (progress != NULL) { + progress->pub.pass_counter = (long)(cinfo->output_height - row); + progress->pub.pass_limit = (long)cinfo->output_height; + (*progress->pub.progress_monitor) ((j_common_ptr)cinfo); + } + image_ptr = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr)cinfo, dest->whole_image, row - 1, (JDIMENSION)1, + FALSE); + data_ptr = image_ptr[0]; + for (col = dest->row_width; col > 0; col--) { + putc(GETJSAMPLE(*data_ptr), outfile); + data_ptr++; + } } + if (progress != NULL) + progress->completed_extra_passes++; } - if (progress != NULL) - progress->completed_extra_passes++; /* Make sure we wrote the output file OK */ fflush(outfile); @@ -426,15 +482,16 @@ finish_output_bmp (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) */ GLOBAL(djpeg_dest_ptr) -jinit_write_bmp (j_decompress_ptr cinfo, boolean is_os2) +jinit_write_bmp(j_decompress_ptr cinfo, boolean is_os2, + boolean use_inversion_array) { bmp_dest_ptr dest; JDIMENSION row_width; /* Create module interface object, fill in method pointers */ dest = (bmp_dest_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - sizeof(bmp_dest_struct)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + sizeof(bmp_dest_struct)); dest->pub.start_output = start_output_bmp; dest->pub.finish_output = finish_output_bmp; dest->pub.calc_buffer_dimensions = NULL; @@ -442,13 +499,16 @@ jinit_write_bmp (j_decompress_ptr cinfo, boolean is_os2) if (cinfo->out_color_space == JCS_GRAYSCALE) { dest->pub.put_pixel_rows = put_gray_rows; - } else if (cinfo->out_color_space == JCS_RGB) { + } else if (cinfo->out_color_space == JCS_RGB || + (cinfo->out_color_space >= JCS_EXT_RGB && + cinfo->out_color_space <= JCS_EXT_ARGB)) { if (cinfo->quantize_colors) dest->pub.put_pixel_rows = put_gray_rows; else dest->pub.put_pixel_rows = put_pixel_rows; - } else if (cinfo->out_color_space == JCS_RGB565) { - dest->pub.put_pixel_rows = put_pixel_rows; + } else if (cinfo->out_color_space == JCS_RGB565 || + cinfo->out_color_space == JCS_CMYK) { + dest->pub.put_pixel_rows = put_pixel_rows; } else { ERREXIT(cinfo, JERR_BMP_COLORSPACE); } @@ -460,35 +520,42 @@ jinit_write_bmp (j_decompress_ptr cinfo, boolean is_os2) if (cinfo->out_color_space == JCS_RGB565) { row_width = cinfo->output_width * 2; dest->row_width = dest->data_width = cinfo->output_width * 3; + while ((row_width & 3) != 0) row_width++; + } else if (!cinfo->quantize_colors && + (IsExtRGB(cinfo->out_color_space) || + cinfo->out_color_space == JCS_CMYK)) { + row_width = cinfo->output_width * cinfo->output_components; + dest->row_width = dest->data_width = cinfo->output_width * 3; } else { row_width = cinfo->output_width * cinfo->output_components; dest->row_width = dest->data_width = row_width; } while ((dest->row_width & 3) != 0) dest->row_width++; - dest->pad_bytes = (int) (dest->row_width - dest->data_width); - if (cinfo->out_color_space == JCS_RGB565) { - while ((row_width & 3) != 0) row_width++; + dest->pad_bytes = (int)(dest->row_width - dest->data_width); + + + if (use_inversion_array) { + /* Allocate space for inversion array, prepare for write pass */ + dest->whole_image = (*cinfo->mem->request_virt_sarray) + ((j_common_ptr)cinfo, JPOOL_IMAGE, FALSE, + dest->row_width, cinfo->output_height, (JDIMENSION)1); + dest->cur_output_row = 0; + if (cinfo->progress != NULL) { + cd_progress_ptr progress = (cd_progress_ptr)cinfo->progress; + progress->total_extra_passes++; /* count file input as separate pass */ + } } else { - row_width = dest->row_width; - } - - - /* Allocate space for inversion array, prepare for write pass */ - dest->whole_image = (*cinfo->mem->request_virt_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, - dest->row_width, cinfo->output_height, (JDIMENSION) 1); - dest->cur_output_row = 0; - if (cinfo->progress != NULL) { - cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; - progress->total_extra_passes++; /* count file input as separate pass */ + dest->iobuffer = (JSAMPLE *)(*cinfo->mem->alloc_small) + ((j_common_ptr)cinfo, JPOOL_IMAGE, dest->row_width); } + dest->use_inversion_array = use_inversion_array; /* Create decompressor output buffer. */ dest->pub.buffer = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, row_width, (JDIMENSION) 1); + ((j_common_ptr)cinfo, JPOOL_IMAGE, row_width, (JDIMENSION)1); dest->pub.buffer_height = 1; - return (djpeg_dest_ptr) dest; + return (djpeg_dest_ptr)dest; } #endif /* BMP_SUPPORTED */ diff --git a/wrgif.c b/wrgif.c index 8d2050f..1804e0b 100644 --- a/wrgif.c +++ b/wrgif.c @@ -72,7 +72,7 @@ typedef struct { typedef gif_dest_struct *gif_dest_ptr; /* Largest value that will fit in N bits */ -#define MAXCODE(n_bits) ((1 << (n_bits)) - 1) +#define MAXCODE(n_bits) ((1 << (n_bits)) - 1) /* @@ -81,13 +81,13 @@ typedef gif_dest_struct *gif_dest_ptr; */ LOCAL(void) -flush_packet (gif_dest_ptr dinfo) +flush_packet(gif_dest_ptr dinfo) /* flush any accumulated data */ { if (dinfo->bytesinpkt > 0) { /* never write zero-length packet */ - dinfo->packetbuf[0] = (char) dinfo->bytesinpkt++; - if (JFWRITE(dinfo->pub.output_file, dinfo->packetbuf, dinfo->bytesinpkt) - != (size_t) dinfo->bytesinpkt) + dinfo->packetbuf[0] = (char)dinfo->bytesinpkt++; + if (JFWRITE(dinfo->pub.output_file, dinfo->packetbuf, dinfo->bytesinpkt) != + (size_t)dinfo->bytesinpkt) ERREXIT(dinfo->cinfo, JERR_FILE_WRITE); dinfo->bytesinpkt = 0; } @@ -95,21 +95,21 @@ flush_packet (gif_dest_ptr dinfo) /* Add a character to current packet; flush to disk if necessary */ -#define CHAR_OUT(dinfo,c) \ - { (dinfo)->packetbuf[++(dinfo)->bytesinpkt] = (char) (c); \ - if ((dinfo)->bytesinpkt >= 255) \ - flush_packet(dinfo); \ - } +#define CHAR_OUT(dinfo, c) { \ + (dinfo)->packetbuf[++(dinfo)->bytesinpkt] = (char)(c); \ + if ((dinfo)->bytesinpkt >= 255) \ + flush_packet(dinfo); \ +} /* Routine to convert variable-width codes into a byte stream */ LOCAL(void) -output (gif_dest_ptr dinfo, int code) +output(gif_dest_ptr dinfo, int code) /* Emit a code of n_bits bits */ /* Uses cur_accum and cur_bits to reblock into 8-bit bytes */ { - dinfo->cur_accum |= ((long) code) << dinfo->cur_bits; + dinfo->cur_accum |= ((long)code) << dinfo->cur_bits; dinfo->cur_bits += dinfo->n_bits; while (dinfo->cur_bits >= 8) { @@ -143,7 +143,7 @@ output (gif_dest_ptr dinfo, int code) */ LOCAL(void) -compress_init (gif_dest_ptr dinfo, int i_bits) +compress_init(gif_dest_ptr dinfo, int i_bits) /* Initialize pseudo-compressor */ { /* init all the state variables */ @@ -162,7 +162,7 @@ compress_init (gif_dest_ptr dinfo, int i_bits) LOCAL(void) -compress_pixel (gif_dest_ptr dinfo, int c) +compress_pixel(gif_dest_ptr dinfo, int c) /* Accept and "compress" one pixel value. * The given value must be less than n_bits wide. */ @@ -182,7 +182,7 @@ compress_pixel (gif_dest_ptr dinfo, int c) LOCAL(void) -compress_term (gif_dest_ptr dinfo) +compress_term(gif_dest_ptr dinfo) /* Clean up at end */ { /* Send an EOF code */ @@ -200,7 +200,7 @@ compress_term (gif_dest_ptr dinfo) LOCAL(void) -put_word (gif_dest_ptr dinfo, unsigned int w) +put_word(gif_dest_ptr dinfo, unsigned int w) /* Emit a 16-bit word, LSB first */ { putc(w & 0xFF, dinfo->pub.output_file); @@ -209,7 +209,7 @@ put_word (gif_dest_ptr dinfo, unsigned int w) LOCAL(void) -put_3bytes (gif_dest_ptr dinfo, int val) +put_3bytes(gif_dest_ptr dinfo, int val) /* Emit 3 copies of same byte value --- handy subr for colormap construction */ { putc(val, dinfo->pub.output_file); @@ -219,7 +219,7 @@ put_3bytes (gif_dest_ptr dinfo, int val) LOCAL(void) -emit_header (gif_dest_ptr dinfo, int num_colors, JSAMPARRAY colormap) +emit_header(gif_dest_ptr dinfo, int num_colors, JSAMPARRAY colormap) /* Output the GIF file header, including color map */ /* If colormap==NULL, synthesize a grayscale colormap */ { @@ -249,18 +249,18 @@ emit_header (gif_dest_ptr dinfo, int num_colors, JSAMPARRAY colormap) putc('7', dinfo->pub.output_file); putc('a', dinfo->pub.output_file); /* Write the Logical Screen Descriptor */ - put_word(dinfo, (unsigned int) dinfo->cinfo->output_width); - put_word(dinfo, (unsigned int) dinfo->cinfo->output_height); + put_word(dinfo, (unsigned int)dinfo->cinfo->output_width); + put_word(dinfo, (unsigned int)dinfo->cinfo->output_height); FlagByte = 0x80; /* Yes, there is a global color table */ - FlagByte |= (BitsPerPixel-1) << 4; /* color resolution */ - FlagByte |= (BitsPerPixel-1); /* size of global color table */ + FlagByte |= (BitsPerPixel - 1) << 4; /* color resolution */ + FlagByte |= (BitsPerPixel - 1); /* size of global color table */ putc(FlagByte, dinfo->pub.output_file); putc(0, dinfo->pub.output_file); /* Background color index */ putc(0, dinfo->pub.output_file); /* Reserved (aspect ratio in GIF89) */ /* Write the Global Color Map */ /* If the color map is more than 8 bits precision, */ /* we reduce it to 8 bits by shifting */ - for (i=0; i < ColorMapSize; i++) { + for (i = 0; i < ColorMapSize; i++) { if (i < num_colors) { if (colormap != NULL) { if (dinfo->cinfo->out_color_space == JCS_RGB) { @@ -274,7 +274,7 @@ emit_header (gif_dest_ptr dinfo, int num_colors, JSAMPARRAY colormap) } } else { /* Create a grayscale map of num_colors values, range 0..255 */ - put_3bytes(dinfo, (i * 255 + (num_colors-1)/2) / (num_colors-1)); + put_3bytes(dinfo, (i * 255 + (num_colors - 1) / 2) / (num_colors - 1)); } } else { /* fill out the map to a power of 2 */ @@ -285,15 +285,15 @@ emit_header (gif_dest_ptr dinfo, int num_colors, JSAMPARRAY colormap) putc(',', dinfo->pub.output_file); /* separator */ put_word(dinfo, 0); /* left/top offset */ put_word(dinfo, 0); - put_word(dinfo, (unsigned int) dinfo->cinfo->output_width); /* image size */ - put_word(dinfo, (unsigned int) dinfo->cinfo->output_height); + put_word(dinfo, (unsigned int)dinfo->cinfo->output_width); /* image size */ + put_word(dinfo, (unsigned int)dinfo->cinfo->output_height); /* flag byte: not interlaced, no local color map */ putc(0x00, dinfo->pub.output_file); /* Write Initial Code Size byte */ putc(InitCodeSize, dinfo->pub.output_file); /* Initialize for "compression" of image data */ - compress_init(dinfo, InitCodeSize+1); + compress_init(dinfo, InitCodeSize + 1); } @@ -302,14 +302,14 @@ emit_header (gif_dest_ptr dinfo, int num_colors, JSAMPARRAY colormap) */ METHODDEF(void) -start_output_gif (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +start_output_gif(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) { - gif_dest_ptr dest = (gif_dest_ptr) dinfo; + gif_dest_ptr dest = (gif_dest_ptr)dinfo; if (cinfo->quantize_colors) emit_header(dest, cinfo->actual_number_of_colors, cinfo->colormap); else - emit_header(dest, 256, (JSAMPARRAY) NULL); + emit_header(dest, 256, (JSAMPARRAY)NULL); } @@ -319,10 +319,10 @@ start_output_gif (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) */ METHODDEF(void) -put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, - JDIMENSION rows_supplied) +put_pixel_rows(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) { - gif_dest_ptr dest = (gif_dest_ptr) dinfo; + gif_dest_ptr dest = (gif_dest_ptr)dinfo; register JSAMPROW ptr; register JDIMENSION col; @@ -338,9 +338,9 @@ put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, */ METHODDEF(void) -finish_output_gif (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +finish_output_gif(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) { - gif_dest_ptr dest = (gif_dest_ptr) dinfo; + gif_dest_ptr dest = (gif_dest_ptr)dinfo; /* Flush "compression" mechanism */ compress_term(dest); @@ -360,7 +360,7 @@ finish_output_gif (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) */ METHODDEF(void) -calc_buffer_dimensions_gif (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +calc_buffer_dimensions_gif(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) { } @@ -370,14 +370,14 @@ calc_buffer_dimensions_gif (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) */ GLOBAL(djpeg_dest_ptr) -jinit_write_gif (j_decompress_ptr cinfo) +jinit_write_gif(j_decompress_ptr cinfo) { gif_dest_ptr dest; /* Create module interface object, fill in method pointers */ dest = (gif_dest_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - sizeof(gif_dest_struct)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + sizeof(gif_dest_struct)); dest->cinfo = cinfo; /* make back link for subroutines */ dest->pub.start_output = start_output_gif; dest->pub.put_pixel_rows = put_pixel_rows; @@ -404,10 +404,10 @@ jinit_write_gif (j_decompress_ptr cinfo) /* Create decompressor output buffer. */ dest->pub.buffer = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, cinfo->output_width, (JDIMENSION) 1); + ((j_common_ptr)cinfo, JPOOL_IMAGE, cinfo->output_width, (JDIMENSION)1); dest->pub.buffer_height = 1; - return (djpeg_dest_ptr) dest; + return (djpeg_dest_ptr)dest; } #endif /* GIF_SUPPORTED */ diff --git a/wrjpgcom.1 b/wrjpgcom.1 index d419a99..a255cab 100644 --- a/wrjpgcom.1 +++ b/wrjpgcom.1 @@ -56,7 +56,7 @@ single argument. Longer comments can be read from a text file. If you give neither .B \-comment nor -.BR \-cfile , +.BR \-cfile, then .B wrjpgcom will read the comment text from standard input. (In this case an input image diff --git a/wrjpgcom.c b/wrjpgcom.c index 531c152..8a4e741 100644 --- a/wrjpgcom.c +++ b/wrjpgcom.c @@ -18,7 +18,7 @@ #include "jinclude.h" /* get auto-config symbols, */ #ifndef HAVE_STDLIB_H /* should declare malloc() */ -extern void *malloc (); +extern void *malloc(); #endif #include /* to declare isupper(), tolower() */ #ifdef USE_SETMODE @@ -57,7 +57,7 @@ extern void *malloc (); */ #ifndef MAX_COM_LENGTH -#define MAX_COM_LENGTH 65000L /* must be <= 65533 in any case */ +#define MAX_COM_LENGTH 65000L /* must be <= 65533 in any case */ #endif @@ -83,7 +83,7 @@ static FILE *outfile; /* output JPEG file */ /* Read one byte, testing for EOF */ static int -read_1_byte (void) +read_1_byte(void) { int c; @@ -96,7 +96,7 @@ read_1_byte (void) /* Read 2 bytes, convert to unsigned int */ /* All 2-byte quantities in JPEG markers are MSB first */ static unsigned int -read_2_bytes (void) +read_2_bytes(void) { int c1, c2; @@ -106,34 +106,34 @@ read_2_bytes (void) c2 = NEXTBYTE(); if (c2 == EOF) ERREXIT("Premature EOF in JPEG file"); - return (((unsigned int) c1) << 8) + ((unsigned int) c2); + return (((unsigned int)c1) << 8) + ((unsigned int)c2); } /* Routines to write data to output file */ static void -write_1_byte (int c) +write_1_byte(int c) { PUTBYTE(c); } static void -write_2_bytes (unsigned int val) +write_2_bytes(unsigned int val) { PUTBYTE((val >> 8) & 0xFF); PUTBYTE(val & 0xFF); } static void -write_marker (int marker) +write_marker(int marker) { PUTBYTE(0xFF); PUTBYTE(marker); } static void -copy_rest_of_file (void) +copy_rest_of_file(void) { int c; @@ -148,23 +148,23 @@ copy_rest_of_file (void) * in this program. (See jdmarker.c for a more complete list.) */ -#define M_SOF0 0xC0 /* Start Of Frame N */ -#define M_SOF1 0xC1 /* N indicates which compression process */ -#define M_SOF2 0xC2 /* Only SOF0-SOF2 are now in common use */ -#define M_SOF3 0xC3 -#define M_SOF5 0xC5 /* NB: codes C4 and CC are NOT SOF markers */ -#define M_SOF6 0xC6 -#define M_SOF7 0xC7 -#define M_SOF9 0xC9 -#define M_SOF10 0xCA -#define M_SOF11 0xCB -#define M_SOF13 0xCD -#define M_SOF14 0xCE -#define M_SOF15 0xCF -#define M_SOI 0xD8 /* Start Of Image (beginning of datastream) */ -#define M_EOI 0xD9 /* End Of Image (end of datastream) */ -#define M_SOS 0xDA /* Start Of Scan (begins compressed data) */ -#define M_COM 0xFE /* COMment */ +#define M_SOF0 0xC0 /* Start Of Frame N */ +#define M_SOF1 0xC1 /* N indicates which compression process */ +#define M_SOF2 0xC2 /* Only SOF0-SOF2 are now in common use */ +#define M_SOF3 0xC3 +#define M_SOF5 0xC5 /* NB: codes C4 and CC are NOT SOF markers */ +#define M_SOF6 0xC6 +#define M_SOF7 0xC7 +#define M_SOF9 0xC9 +#define M_SOF10 0xCA +#define M_SOF11 0xCB +#define M_SOF13 0xCD +#define M_SOF14 0xCE +#define M_SOF15 0xCF +#define M_SOI 0xD8 /* Start Of Image (beginning of datastream) */ +#define M_EOI 0xD9 /* End Of Image (end of datastream) */ +#define M_SOS 0xDA /* Start Of Scan (begins compressed data) */ +#define M_COM 0xFE /* COMment */ /* @@ -178,7 +178,7 @@ copy_rest_of_file (void) */ static int -next_marker (void) +next_marker(void) { int c; int discarded_bytes = 0; @@ -213,7 +213,7 @@ next_marker (void) */ static int -first_marker (void) +first_marker(void) { int c1, c2; @@ -235,7 +235,7 @@ first_marker (void) */ static void -copy_variable (void) +copy_variable(void) /* Copy an unknown or uninteresting variable-length marker */ { unsigned int length; @@ -255,7 +255,7 @@ copy_variable (void) } static void -skip_variable (void) +skip_variable(void) /* Skip over an unknown or uninteresting variable-length marker */ { unsigned int length; @@ -268,7 +268,7 @@ skip_variable (void) length -= 2; /* Skip over the remaining bytes */ while (length > 0) { - (void) read_1_byte(); + (void)read_1_byte(); length--; } } @@ -280,7 +280,7 @@ skip_variable (void) */ static int -scan_JPEG_header (int keep_COM) +scan_JPEG_header(int keep_COM) { int marker; @@ -342,7 +342,7 @@ static const char *progname; /* program name for error messages */ static void -usage (void) +usage(void) /* complain about bad command line */ { fprintf(stderr, "wrjpgcom inserts a textual comment in a JPEG file.\n"); @@ -364,7 +364,7 @@ usage (void) fprintf(stderr, "If you do not give either -comment or -cfile on the command line,\n"); fprintf(stderr, "then the comment text is read from standard input.\n"); fprintf(stderr, "It can be multiple lines, up to %u characters total.\n", - (unsigned int) MAX_COM_LENGTH); + (unsigned int)MAX_COM_LENGTH); #ifndef TWO_FILE_COMMANDLINE fprintf(stderr, "You must specify an input JPEG file name when supplying\n"); fprintf(stderr, "comment text from standard input.\n"); @@ -375,7 +375,7 @@ usage (void) static int -keymatch (char *arg, const char *keyword, int minchars) +keymatch(char *arg, const char *keyword, int minchars) /* Case-insensitive matching of (possibly abbreviated) keyword switches. */ /* keyword is the constant keyword (must be lower case already), */ /* minchars is length of minimum legal abbreviation. */ @@ -404,7 +404,7 @@ keymatch (char *arg, const char *keyword, int minchars) */ int -main (int argc, char **argv) +main(int argc, char **argv) { int argn; char *arg; @@ -444,38 +444,38 @@ main (int argc, char **argv) * under MS-DOG and must parse out the quoted string ourselves. Sigh. */ if (comment_arg[0] == '"') { - comment_arg = (char *) malloc((size_t) MAX_COM_LENGTH); + comment_arg = (char *)malloc((size_t)MAX_COM_LENGTH); if (comment_arg == NULL) ERREXIT("Insufficient memory"); - if (strlen(argv[argn]) + 2 >= (size_t) MAX_COM_LENGTH) { + if (strlen(argv[argn]) + 2 >= (size_t)MAX_COM_LENGTH) { fprintf(stderr, "Comment text may not exceed %u bytes\n", - (unsigned int) MAX_COM_LENGTH); + (unsigned int)MAX_COM_LENGTH); exit(EXIT_FAILURE); } - strcpy(comment_arg, argv[argn]+1); + strcpy(comment_arg, argv[argn] + 1); for (;;) { - comment_length = (unsigned int) strlen(comment_arg); - if (comment_length > 0 && comment_arg[comment_length-1] == '"') { - comment_arg[comment_length-1] = '\0'; /* zap terminating quote */ + comment_length = (unsigned int)strlen(comment_arg); + if (comment_length > 0 && comment_arg[comment_length - 1] == '"') { + comment_arg[comment_length - 1] = '\0'; /* zap terminating quote */ break; } if (++argn >= argc) ERREXIT("Missing ending quote mark"); if (strlen(comment_arg) + strlen(argv[argn]) + 2 >= - (size_t) MAX_COM_LENGTH) { + (size_t)MAX_COM_LENGTH) { fprintf(stderr, "Comment text may not exceed %u bytes\n", - (unsigned int) MAX_COM_LENGTH); + (unsigned int)MAX_COM_LENGTH); exit(EXIT_FAILURE); } strcat(comment_arg, " "); strcat(comment_arg, argv[argn]); } - } else if (strlen(argv[argn]) >= (size_t) MAX_COM_LENGTH) { + } else if (strlen(argv[argn]) >= (size_t)MAX_COM_LENGTH) { fprintf(stderr, "Comment text may not exceed %u bytes\n", - (unsigned int) MAX_COM_LENGTH); + (unsigned int)MAX_COM_LENGTH); exit(EXIT_FAILURE); } - comment_length = (unsigned int) strlen(comment_arg); + comment_length = (unsigned int)strlen(comment_arg); } else usage(); } @@ -513,18 +513,17 @@ main (int argc, char **argv) /* Open the output file. */ #ifdef TWO_FILE_COMMANDLINE /* Must have explicit output file name */ - if (argn != argc-2) { - fprintf(stderr, "%s: must name one input and one output file\n", - progname); + if (argn != argc - 2) { + fprintf(stderr, "%s: must name one input and one output file\n", progname); usage(); } - if ((outfile = fopen(argv[argn+1], WRITE_BINARY)) == NULL) { - fprintf(stderr, "%s: can't open %s\n", progname, argv[argn+1]); + if ((outfile = fopen(argv[argn + 1], WRITE_BINARY)) == NULL) { + fprintf(stderr, "%s: can't open %s\n", progname, argv[argn + 1]); exit(EXIT_FAILURE); } #else /* Unix style: expect zero or one file name */ - if (argn < argc-1) { + if (argn < argc - 1) { fprintf(stderr, "%s: only one input file\n", progname); usage(); } @@ -547,18 +546,18 @@ main (int argc, char **argv) FILE *src_file; int c; - comment_arg = (char *) malloc((size_t) MAX_COM_LENGTH); + comment_arg = (char *)malloc((size_t)MAX_COM_LENGTH); if (comment_arg == NULL) ERREXIT("Insufficient memory"); comment_length = 0; src_file = (comment_file != NULL ? comment_file : stdin); while ((c = getc(src_file)) != EOF) { - if (comment_length >= (unsigned int) MAX_COM_LENGTH) { + if (comment_length >= (unsigned int)MAX_COM_LENGTH) { fprintf(stderr, "Comment text may not exceed %u bytes\n", - (unsigned int) MAX_COM_LENGTH); + (unsigned int)MAX_COM_LENGTH); exit(EXIT_FAILURE); } - comment_arg[comment_length++] = (char) c; + comment_arg[comment_length++] = (char)c; } if (comment_file != NULL) fclose(comment_file); @@ -581,7 +580,7 @@ main (int argc, char **argv) } } /* Duplicate the remainder of the source file. - * Note that any COM markers occuring after SOF will not be touched. + * Note that any COM markers occurring after SOF will not be touched. */ write_marker(marker); copy_rest_of_file(); diff --git a/wrppm.c b/wrppm.c index 91cb10b..819a0a7 100644 --- a/wrppm.c +++ b/wrppm.c @@ -19,6 +19,7 @@ * an ordinary stdio stream. */ +#include "cmyk.h" #include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ #ifdef PPM_SUPPORTED @@ -35,23 +36,23 @@ */ #if BITS_IN_JSAMPLE == 8 -#define PUTPPMSAMPLE(ptr,v) *ptr++ = (char) (v) -#define BYTESPERSAMPLE 1 -#define PPM_MAXVAL 255 +#define PUTPPMSAMPLE(ptr, v) *ptr++ = (char)(v) +#define BYTESPERSAMPLE 1 +#define PPM_MAXVAL 255 #else #ifdef PPM_NORAWWORD -#define PUTPPMSAMPLE(ptr,v) *ptr++ = (char) ((v) >> (BITS_IN_JSAMPLE-8)) -#define BYTESPERSAMPLE 1 -#define PPM_MAXVAL 255 +#define PUTPPMSAMPLE(ptr, v) *ptr++ = (char)((v) >> (BITS_IN_JSAMPLE - 8)) +#define BYTESPERSAMPLE 1 +#define PPM_MAXVAL 255 #else /* The word-per-sample format always puts the MSB first. */ -#define PUTPPMSAMPLE(ptr,v) \ - { register int val_ = v; \ - *ptr++ = (char) ((val_ >> 8) & 0xFF); \ - *ptr++ = (char) (val_ & 0xFF); \ - } -#define BYTESPERSAMPLE 2 -#define PPM_MAXVAL ((1<> 8) & 0xFF); \ + *ptr++ = (char)(val_ & 0xFF); \ +} +#define BYTESPERSAMPLE 2 +#define PPM_MAXVAL ((1 << BITS_IN_JSAMPLE) - 1) #endif #endif @@ -86,12 +87,12 @@ typedef ppm_dest_struct *ppm_dest_ptr; */ METHODDEF(void) -put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, - JDIMENSION rows_supplied) +put_pixel_rows(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) { - ppm_dest_ptr dest = (ppm_dest_ptr) dinfo; + ppm_dest_ptr dest = (ppm_dest_ptr)dinfo; - (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); + (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); } @@ -101,20 +102,80 @@ put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, */ METHODDEF(void) -copy_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, - JDIMENSION rows_supplied) +copy_pixel_rows(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) { - ppm_dest_ptr dest = (ppm_dest_ptr) dinfo; + ppm_dest_ptr dest = (ppm_dest_ptr)dinfo; register char *bufferptr; register JSAMPROW ptr; +#if BITS_IN_JSAMPLE != 8 || (!defined(HAVE_UNSIGNED_CHAR) && !defined(__CHAR_UNSIGNED__)) register JDIMENSION col; +#endif ptr = dest->pub.buffer[0]; bufferptr = dest->iobuffer; +#if BITS_IN_JSAMPLE == 8 && (defined(HAVE_UNSIGNED_CHAR) || defined(__CHAR_UNSIGNED__)) + MEMCOPY(bufferptr, ptr, dest->samples_per_row); +#else for (col = dest->samples_per_row; col > 0; col--) { PUTPPMSAMPLE(bufferptr, GETJSAMPLE(*ptr++)); } - (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); +#endif + (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); +} + + +/* + * Convert extended RGB to RGB. + */ + +METHODDEF(void) +put_rgb(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, JDIMENSION rows_supplied) +{ + ppm_dest_ptr dest = (ppm_dest_ptr)dinfo; + register char *bufferptr; + register JSAMPROW ptr; + register JDIMENSION col; + register int rindex = rgb_red[cinfo->out_color_space]; + register int gindex = rgb_green[cinfo->out_color_space]; + register int bindex = rgb_blue[cinfo->out_color_space]; + register int ps = rgb_pixelsize[cinfo->out_color_space]; + + ptr = dest->pub.buffer[0]; + bufferptr = dest->iobuffer; + for (col = cinfo->output_width; col > 0; col--) { + PUTPPMSAMPLE(bufferptr, ptr[rindex]); + PUTPPMSAMPLE(bufferptr, ptr[gindex]); + PUTPPMSAMPLE(bufferptr, ptr[bindex]); + ptr += ps; + } + (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); +} + + +/* + * Convert CMYK to RGB. + */ + +METHODDEF(void) +put_cmyk(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) +{ + ppm_dest_ptr dest = (ppm_dest_ptr)dinfo; + register char *bufferptr; + register JSAMPROW ptr; + register JDIMENSION col; + + ptr = dest->pub.buffer[0]; + bufferptr = dest->iobuffer; + for (col = cinfo->output_width; col > 0; col--) { + JSAMPLE r, g, b, c = *ptr++, m = *ptr++, y = *ptr++, k = *ptr++; + cmyk_to_rgb(c, m, y, k, &r, &g, &b); + PUTPPMSAMPLE(bufferptr, r); + PUTPPMSAMPLE(bufferptr, g); + PUTPPMSAMPLE(bufferptr, b); + } + (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); } @@ -124,10 +185,10 @@ copy_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, */ METHODDEF(void) -put_demapped_rgb (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, - JDIMENSION rows_supplied) +put_demapped_rgb(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) { - ppm_dest_ptr dest = (ppm_dest_ptr) dinfo; + ppm_dest_ptr dest = (ppm_dest_ptr)dinfo; register char *bufferptr; register int pixval; register JSAMPROW ptr; @@ -144,15 +205,15 @@ put_demapped_rgb (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, PUTPPMSAMPLE(bufferptr, GETJSAMPLE(color_map1[pixval])); PUTPPMSAMPLE(bufferptr, GETJSAMPLE(color_map2[pixval])); } - (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); + (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); } METHODDEF(void) -put_demapped_gray (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, - JDIMENSION rows_supplied) +put_demapped_gray(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) { - ppm_dest_ptr dest = (ppm_dest_ptr) dinfo; + ppm_dest_ptr dest = (ppm_dest_ptr)dinfo; register char *bufferptr; register JSAMPROW ptr; register JSAMPROW color_map = cinfo->colormap[0]; @@ -163,7 +224,7 @@ put_demapped_gray (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, for (col = cinfo->output_width; col > 0; col--) { PUTPPMSAMPLE(bufferptr, GETJSAMPLE(color_map[GETJSAMPLE(*ptr++)])); } - (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); + (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); } @@ -172,23 +233,32 @@ put_demapped_gray (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, */ METHODDEF(void) -start_output_ppm (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +start_output_ppm(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) { - ppm_dest_ptr dest = (ppm_dest_ptr) dinfo; + ppm_dest_ptr dest = (ppm_dest_ptr)dinfo; /* Emit file header */ switch (cinfo->out_color_space) { case JCS_GRAYSCALE: /* emit header for raw PGM format */ fprintf(dest->pub.output_file, "P5\n%ld %ld\n%d\n", - (long) cinfo->output_width, (long) cinfo->output_height, - PPM_MAXVAL); + (long)cinfo->output_width, (long)cinfo->output_height, PPM_MAXVAL); break; case JCS_RGB: + case JCS_EXT_RGB: + case JCS_EXT_RGBX: + case JCS_EXT_BGR: + case JCS_EXT_BGRX: + case JCS_EXT_XBGR: + case JCS_EXT_XRGB: + case JCS_EXT_RGBA: + case JCS_EXT_BGRA: + case JCS_EXT_ABGR: + case JCS_EXT_ARGB: + case JCS_CMYK: /* emit header for raw PPM format */ fprintf(dest->pub.output_file, "P6\n%ld %ld\n%d\n", - (long) cinfo->output_width, (long) cinfo->output_height, - PPM_MAXVAL); + (long)cinfo->output_width, (long)cinfo->output_height, PPM_MAXVAL); break; default: ERREXIT(cinfo, JERR_PPM_COLORSPACE); @@ -201,7 +271,7 @@ start_output_ppm (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) */ METHODDEF(void) -finish_output_ppm (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +finish_output_ppm(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) { /* Make sure we wrote the output file OK */ fflush(dinfo->output_file); @@ -215,11 +285,14 @@ finish_output_ppm (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) */ METHODDEF(void) -calc_buffer_dimensions_ppm (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +calc_buffer_dimensions_ppm(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) { - ppm_dest_ptr dest = (ppm_dest_ptr) dinfo; + ppm_dest_ptr dest = (ppm_dest_ptr)dinfo; - dest->samples_per_row = cinfo->output_width * cinfo->out_color_components; + if (cinfo->out_color_space == JCS_GRAYSCALE) + dest->samples_per_row = cinfo->output_width * cinfo->out_color_components; + else + dest->samples_per_row = cinfo->output_width * 3; dest->buffer_width = dest->samples_per_row * (BYTESPERSAMPLE * sizeof(char)); } @@ -229,13 +302,13 @@ calc_buffer_dimensions_ppm (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) */ GLOBAL(djpeg_dest_ptr) -jinit_write_ppm (j_decompress_ptr cinfo) +jinit_write_ppm(j_decompress_ptr cinfo) { ppm_dest_ptr dest; /* Create module interface object, fill in method pointers */ dest = (ppm_dest_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(ppm_dest_struct)); dest->pub.start_output = start_output_ppm; dest->pub.finish_output = finish_output_ppm; @@ -245,21 +318,30 @@ jinit_write_ppm (j_decompress_ptr cinfo) jpeg_calc_output_dimensions(cinfo); /* Create physical I/O buffer */ - dest->pub.calc_buffer_dimensions (cinfo, (djpeg_dest_ptr) dest); - dest->iobuffer = (char *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, dest->buffer_width); + dest->pub.calc_buffer_dimensions(cinfo, (djpeg_dest_ptr)dest); + dest->iobuffer = (char *)(*cinfo->mem->alloc_small) + ((j_common_ptr)cinfo, JPOOL_IMAGE, dest->buffer_width); if (cinfo->quantize_colors || BITS_IN_JSAMPLE != 8 || - sizeof(JSAMPLE) != sizeof(char)) { + sizeof(JSAMPLE) != sizeof(char) || + (cinfo->out_color_space != JCS_EXT_RGB +#if RGB_RED == 0 && RGB_GREEN == 1 && RGB_BLUE == 2 && RGB_PIXELSIZE == 3 + && cinfo->out_color_space != JCS_RGB +#endif + )) { /* When quantizing, we need an output buffer for colormap indexes * that's separate from the physical I/O buffer. We also need a * separate buffer if pixel format translation must take place. */ dest->pub.buffer = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->output_width * cinfo->output_components, (JDIMENSION) 1); + ((j_common_ptr)cinfo, JPOOL_IMAGE, + cinfo->output_width * cinfo->output_components, (JDIMENSION)1); dest->pub.buffer_height = 1; - if (! cinfo->quantize_colors) + if (IsExtRGB(cinfo->out_color_space)) + dest->pub.put_pixel_rows = put_rgb; + else if (cinfo->out_color_space == JCS_CMYK) + dest->pub.put_pixel_rows = put_cmyk; + else if (!cinfo->quantize_colors) dest->pub.put_pixel_rows = copy_pixel_rows; else if (cinfo->out_color_space == JCS_GRAYSCALE) dest->pub.put_pixel_rows = put_demapped_gray; @@ -268,13 +350,13 @@ jinit_write_ppm (j_decompress_ptr cinfo) } else { /* We will fwrite() directly from decompressor output buffer. */ /* Synthesize a JSAMPARRAY pointer structure */ - dest->pixrow = (JSAMPROW) dest->iobuffer; - dest->pub.buffer = & dest->pixrow; + dest->pixrow = (JSAMPROW)dest->iobuffer; + dest->pub.buffer = &dest->pixrow; dest->pub.buffer_height = 1; dest->pub.put_pixel_rows = put_pixel_rows; } - return (djpeg_dest_ptr) dest; + return (djpeg_dest_ptr)dest; } #endif /* PPM_SUPPORTED */ diff --git a/wrrle.c b/wrrle.c index 880fadf..5c98ec0 100644 --- a/wrrle.c +++ b/wrrle.c @@ -51,7 +51,7 @@ */ #define CMAPBITS 8 -#define CMAPLENGTH (1<<(CMAPBITS)) +#define CMAPLENGTH (1 << (CMAPBITS)) typedef struct { struct djpeg_dest_struct pub; /* public fields */ @@ -65,9 +65,9 @@ typedef struct { typedef rle_dest_struct *rle_dest_ptr; /* Forward declarations */ -METHODDEF(void) rle_put_pixel_rows - (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, - JDIMENSION rows_supplied); +METHODDEF(void) rle_put_pixel_rows(j_decompress_ptr cinfo, + djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied); /* @@ -77,13 +77,13 @@ METHODDEF(void) rle_put_pixel_rows */ METHODDEF(void) -start_output_rle (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +start_output_rle(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) { - rle_dest_ptr dest = (rle_dest_ptr) dinfo; + rle_dest_ptr dest = (rle_dest_ptr)dinfo; size_t cmapsize; int i, ci; #ifdef PROGRESS_REPORT - cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + cd_progress_ptr progress = (cd_progress_ptr)cinfo->progress; #endif /* @@ -117,8 +117,8 @@ start_output_rle (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) if (cinfo->quantize_colors) { /* Allocate storage for RLE-style cmap, zero any extra entries */ cmapsize = cinfo->out_color_components * CMAPLENGTH * sizeof(rle_map); - dest->colormap = (rle_map *) (*cinfo->mem->alloc_small) - ((j_common_ptr) cinfo, JPOOL_IMAGE, cmapsize); + dest->colormap = (rle_map *)(*cinfo->mem->alloc_small) + ((j_common_ptr)cinfo, JPOOL_IMAGE, cmapsize); MEMZERO(dest->colormap, cmapsize); /* Save away data in RLE format --- note 8-bit left shift! */ @@ -133,7 +133,7 @@ start_output_rle (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) /* Set the output buffer to the first row */ dest->pub.buffer = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, dest->image, (JDIMENSION) 0, (JDIMENSION) 1, TRUE); + ((j_common_ptr)cinfo, dest->image, (JDIMENSION)0, (JDIMENSION)1, TRUE); dest->pub.buffer_height = 1; dest->pub.put_pixel_rows = rle_put_pixel_rows; @@ -153,15 +153,15 @@ start_output_rle (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) */ METHODDEF(void) -rle_put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, - JDIMENSION rows_supplied) +rle_put_pixel_rows(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) { - rle_dest_ptr dest = (rle_dest_ptr) dinfo; + rle_dest_ptr dest = (rle_dest_ptr)dinfo; if (cinfo->output_scanline < cinfo->output_height) { dest->pub.buffer = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, dest->image, - cinfo->output_scanline, (JDIMENSION) 1, TRUE); + ((j_common_ptr)cinfo, dest->image, + cinfo->output_scanline, (JDIMENSION)1, TRUE); } } @@ -172,9 +172,9 @@ rle_put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, */ METHODDEF(void) -finish_output_rle (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +finish_output_rle(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) { - rle_dest_ptr dest = (rle_dest_ptr) dinfo; + rle_dest_ptr dest = (rle_dest_ptr)dinfo; rle_hdr header; /* Output file information */ rle_pixel **rle_row, *red, *green, *blue; JSAMPROW output_row; @@ -182,7 +182,7 @@ finish_output_rle (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) int row, col; int ci; #ifdef PROGRESS_REPORT - cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; + cd_progress_ptr progress = (cd_progress_ptr)cinfo->progress; #endif /* Initialize the header info */ @@ -202,7 +202,8 @@ finish_output_rle (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) header.cmaplen = CMAPBITS; header.cmap = dest->colormap; /* Add a comment to the output image with the true colormap length. */ - sprintf(cmapcomment, "color_map_length=%d", cinfo->actual_number_of_colors); + sprintf(cmapcomment, "color_map_length=%d", + cinfo->actual_number_of_colors); rle_putcom(cmapcomment, &header); } @@ -217,29 +218,29 @@ finish_output_rle (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) if (progress != NULL) { progress->pub.pass_limit = cinfo->output_height; progress->pub.pass_counter = 0; - (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + (*progress->pub.progress_monitor) ((j_common_ptr)cinfo); } #endif if (cinfo->output_components == 1) { - for (row = cinfo->output_height-1; row >= 0; row--) { - rle_row = (rle_pixel **) (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, dest->image, - (JDIMENSION) row, (JDIMENSION) 1, FALSE); - rle_putrow(rle_row, (int) cinfo->output_width, &header); + for (row = cinfo->output_height - 1; row >= 0; row--) { + rle_row = (rle_pixel **)(*cinfo->mem->access_virt_sarray) + ((j_common_ptr)cinfo, dest->image, + (JDIMENSION)row, (JDIMENSION)1, FALSE); + rle_putrow(rle_row, (int)cinfo->output_width, &header); #ifdef PROGRESS_REPORT if (progress != NULL) { progress->pub.pass_counter++; - (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + (*progress->pub.progress_monitor) ((j_common_ptr)cinfo); } #endif } } else { - for (row = cinfo->output_height-1; row >= 0; row--) { - rle_row = (rle_pixel **) dest->rle_row; + for (row = cinfo->output_height - 1; row >= 0; row--) { + rle_row = (rle_pixel **)dest->rle_row; output_row = *(*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, dest->image, - (JDIMENSION) row, (JDIMENSION) 1, FALSE); + ((j_common_ptr)cinfo, dest->image, + (JDIMENSION)row, (JDIMENSION)1, FALSE); red = rle_row[0]; green = rle_row[1]; blue = rle_row[2]; @@ -248,11 +249,11 @@ finish_output_rle (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) *green++ = GETJSAMPLE(*output_row++); *blue++ = GETJSAMPLE(*output_row++); } - rle_putrow(rle_row, (int) cinfo->output_width, &header); + rle_putrow(rle_row, (int)cinfo->output_width, &header); #ifdef PROGRESS_REPORT if (progress != NULL) { progress->pub.pass_counter++; - (*progress->pub.progress_monitor) ((j_common_ptr) cinfo); + (*progress->pub.progress_monitor) ((j_common_ptr)cinfo); } #endif } @@ -276,14 +277,14 @@ finish_output_rle (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) */ GLOBAL(djpeg_dest_ptr) -jinit_write_rle (j_decompress_ptr cinfo) +jinit_write_rle(j_decompress_ptr cinfo) { rle_dest_ptr dest; /* Create module interface object, fill in method pointers */ dest = (rle_dest_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - sizeof(rle_dest_struct)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + sizeof(rle_dest_struct)); dest->pub.start_output = start_output_rle; dest->pub.finish_output = finish_output_rle; dest->pub.calc_buffer_dimensions = NULL; @@ -293,16 +294,16 @@ jinit_write_rle (j_decompress_ptr cinfo) /* Allocate a work array for output to the RLE library. */ dest->rle_row = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->output_width, (JDIMENSION) cinfo->output_components); + ((j_common_ptr)cinfo, JPOOL_IMAGE, + cinfo->output_width, (JDIMENSION)cinfo->output_components); /* Allocate a virtual array to hold the image. */ dest->image = (*cinfo->mem->request_virt_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, - (JDIMENSION) (cinfo->output_width * cinfo->output_components), - cinfo->output_height, (JDIMENSION) 1); + ((j_common_ptr)cinfo, JPOOL_IMAGE, FALSE, + (JDIMENSION)(cinfo->output_width * cinfo->output_components), + cinfo->output_height, (JDIMENSION)1); - return (djpeg_dest_ptr) dest; + return (djpeg_dest_ptr)dest; } #endif /* RLE_SUPPORTED */ diff --git a/wrtarga.c b/wrtarga.c index 4db9313..9dfa920 100644 --- a/wrtarga.c +++ b/wrtarga.c @@ -45,7 +45,7 @@ typedef tga_dest_struct *tga_dest_ptr; LOCAL(void) -write_header (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, int num_colors) +write_header(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, int num_colors) /* Create and write a Targa header */ { char targaheader[18]; @@ -55,15 +55,15 @@ write_header (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, int num_colors) if (num_colors > 0) { targaheader[1] = 1; /* color map type 1 */ - targaheader[5] = (char) (num_colors & 0xFF); - targaheader[6] = (char) (num_colors >> 8); + targaheader[5] = (char)(num_colors & 0xFF); + targaheader[6] = (char)(num_colors >> 8); targaheader[7] = 24; /* 24 bits per cmap entry */ } - targaheader[12] = (char) (cinfo->output_width & 0xFF); - targaheader[13] = (char) (cinfo->output_width >> 8); - targaheader[14] = (char) (cinfo->output_height & 0xFF); - targaheader[15] = (char) (cinfo->output_height >> 8); + targaheader[12] = (char)(cinfo->output_width & 0xFF); + targaheader[13] = (char)(cinfo->output_width >> 8); + targaheader[14] = (char)(cinfo->output_height & 0xFF); + targaheader[15] = (char)(cinfo->output_height >> 8); targaheader[17] = 0x20; /* Top-down, non-interlaced */ if (cinfo->out_color_space == JCS_GRAYSCALE) { @@ -79,7 +79,7 @@ write_header (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, int num_colors) } } - if (JFWRITE(dinfo->output_file, targaheader, 18) != (size_t) 18) + if (JFWRITE(dinfo->output_file, targaheader, 18) != (size_t)18) ERREXIT(cinfo, JERR_FILE_WRITE); } @@ -90,11 +90,11 @@ write_header (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, int num_colors) */ METHODDEF(void) -put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, - JDIMENSION rows_supplied) +put_pixel_rows(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) /* used for unquantized full-color output */ { - tga_dest_ptr dest = (tga_dest_ptr) dinfo; + tga_dest_ptr dest = (tga_dest_ptr)dinfo; register JSAMPROW inptr; register char *outptr; register JDIMENSION col; @@ -102,20 +102,20 @@ put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, inptr = dest->pub.buffer[0]; outptr = dest->iobuffer; for (col = cinfo->output_width; col > 0; col--) { - outptr[0] = (char) GETJSAMPLE(inptr[2]); /* RGB to BGR order */ - outptr[1] = (char) GETJSAMPLE(inptr[1]); - outptr[2] = (char) GETJSAMPLE(inptr[0]); + outptr[0] = (char)GETJSAMPLE(inptr[2]); /* RGB to BGR order */ + outptr[1] = (char)GETJSAMPLE(inptr[1]); + outptr[2] = (char)GETJSAMPLE(inptr[0]); inptr += 3, outptr += 3; } - (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); + (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); } METHODDEF(void) -put_gray_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, - JDIMENSION rows_supplied) +put_gray_rows(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) /* used for grayscale OR quantized color output */ { - tga_dest_ptr dest = (tga_dest_ptr) dinfo; + tga_dest_ptr dest = (tga_dest_ptr)dinfo; register JSAMPROW inptr; register char *outptr; register JDIMENSION col; @@ -123,9 +123,9 @@ put_gray_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, inptr = dest->pub.buffer[0]; outptr = dest->iobuffer; for (col = cinfo->output_width; col > 0; col--) { - *outptr++ = (char) GETJSAMPLE(*inptr++); + *outptr++ = (char)GETJSAMPLE(*inptr++); } - (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); + (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); } @@ -135,10 +135,10 @@ put_gray_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, */ METHODDEF(void) -put_demapped_gray (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, - JDIMENSION rows_supplied) +put_demapped_gray(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, + JDIMENSION rows_supplied) { - tga_dest_ptr dest = (tga_dest_ptr) dinfo; + tga_dest_ptr dest = (tga_dest_ptr)dinfo; register JSAMPROW inptr; register char *outptr; register JSAMPROW color_map0 = cinfo->colormap[0]; @@ -147,9 +147,9 @@ put_demapped_gray (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, inptr = dest->pub.buffer[0]; outptr = dest->iobuffer; for (col = cinfo->output_width; col > 0; col--) { - *outptr++ = (char) GETJSAMPLE(color_map0[GETJSAMPLE(*inptr++)]); + *outptr++ = (char)GETJSAMPLE(color_map0[GETJSAMPLE(*inptr++)]); } - (void) JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); + (void)JFWRITE(dest->pub.output_file, dest->iobuffer, dest->buffer_width); } @@ -158,9 +158,9 @@ put_demapped_gray (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo, */ METHODDEF(void) -start_output_tga (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +start_output_tga(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) { - tga_dest_ptr dest = (tga_dest_ptr) dinfo; + tga_dest_ptr dest = (tga_dest_ptr)dinfo; int num_colors, i; FILE *outfile; @@ -202,7 +202,7 @@ start_output_tga (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) */ METHODDEF(void) -finish_output_tga (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +finish_output_tga(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) { /* Make sure we wrote the output file OK */ fflush(dinfo->output_file); @@ -216,9 +216,9 @@ finish_output_tga (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) */ METHODDEF(void) -calc_buffer_dimensions_tga (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) +calc_buffer_dimensions_tga(j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) { - tga_dest_ptr dest = (tga_dest_ptr) dinfo; + tga_dest_ptr dest = (tga_dest_ptr)dinfo; dest->buffer_width = cinfo->output_width * cinfo->output_components; } @@ -229,14 +229,14 @@ calc_buffer_dimensions_tga (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo) */ GLOBAL(djpeg_dest_ptr) -jinit_write_targa (j_decompress_ptr cinfo) +jinit_write_targa(j_decompress_ptr cinfo) { tga_dest_ptr dest; /* Create module interface object, fill in method pointers */ dest = (tga_dest_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - sizeof(tga_dest_struct)); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + sizeof(tga_dest_struct)); dest->pub.start_output = start_output_tga; dest->pub.finish_output = finish_output_tga; dest->pub.calc_buffer_dimensions = calc_buffer_dimensions_tga; @@ -245,17 +245,17 @@ jinit_write_targa (j_decompress_ptr cinfo) jpeg_calc_output_dimensions(cinfo); /* Create I/O buffer. */ - dest->pub.calc_buffer_dimensions (cinfo, (djpeg_dest_ptr) dest); + dest->pub.calc_buffer_dimensions(cinfo, (djpeg_dest_ptr)dest); dest->iobuffer = (char *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (size_t) (dest->buffer_width * sizeof(char))); + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + (size_t)(dest->buffer_width * sizeof(char))); /* Create decompressor output buffer. */ dest->pub.buffer = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, dest->buffer_width, (JDIMENSION) 1); + ((j_common_ptr)cinfo, JPOOL_IMAGE, dest->buffer_width, (JDIMENSION)1); dest->pub.buffer_height = 1; - return (djpeg_dest_ptr) dest; + return (djpeg_dest_ptr)dest; } #endif /* TARGA_SUPPORTED */ -- 2.7.4